import { chromium } from 'playwright-extra'; import stealth from 'puppeteer-extra-plugin-stealth'; import { pool } from './src/db/migrate'; chromium.use(stealth()); async function scrapeAZDHSAPI() { console.log('šŸ›ļø Scraping AZDHS via API interception...\n'); const browser = await chromium.launch({ headless: false, }); const context = await browser.newContext({ viewport: { width: 1920, height: 1080 }, }); const page = await context.newPage(); // Capture ALL API responses const allResponses: any[] = []; page.on('response', async (response) => { const url = response.url(); const contentType = response.headers()['content-type'] || ''; // Only capture JSON responses from azcarecheck domain if (url.includes('azcarecheck.azdhs.gov') && contentType.includes('json')) { try { const json = await response.json(); console.log(`šŸ“” Captured JSON from: ${url.substring(0, 80)}...`); allResponses.push({ url, data: json, status: response.status() }); } catch (e) { // Not valid JSON or couldn't parse } } }); try { console.log('šŸ“„ Loading AZDHS page...'); await page.goto('https://azcarecheck.azdhs.gov/s/?facilityId=001t000000L0TApAAN', { waitUntil: 'networkidle', timeout: 120000 }); console.log('ā³ Waiting 60 seconds to capture all API calls...\n'); await page.waitForTimeout(60000); console.log(`\nšŸ“Š Captured ${allResponses.length} JSON API responses\n`); // Analyze responses to find dispensary data let dispensaryData: any[] = []; for (const resp of allResponses) { const data = resp.data; // Look for arrays that might contain dispensary data const checkForDispensaries = (obj: any, path = ''): any[] => { if (Array.isArray(obj) && obj.length > 50) { // Check if array items look like dispensaries const sample = obj[0]; if (sample && typeof sample === 'object') { const keys = Object.keys(sample); if (keys.some(k => k.toLowerCase().includes('name') || k.toLowerCase().includes('address') || k.toLowerCase().includes('facility'))) { console.log(` āœ… Found potential dispensary array at ${path} with ${obj.length} items`); console.log(` Sample keys: ${keys.slice(0, 10).join(', ')}`); return obj; } } } if (typeof obj === 'object' && obj !== null) { for (const [key, value] of Object.entries(obj)) { const result = checkForDispensaries(value, `${path}.${key}`); if (result.length > 0) return result; } } return []; }; const found = checkForDispensaries(data); if (found.length > 0) { dispensaryData = found; console.log(`\nšŸŽÆ Found dispensary data! ${found.length} entries`); console.log(` URL: ${resp.url}\n`); // Show sample of first entry console.log('šŸ“‹ Sample entry:'); console.log(JSON.stringify(found[0], null, 2).substring(0, 500)); break; } } if (dispensaryData.length === 0) { console.log('āŒ Could not find dispensary data in API responses\n'); console.log('šŸ” All captured URLs:'); allResponses.forEach((r, i) => { console.log(` ${i + 1}. ${r.url}`); }); // Save raw responses for manual inspection console.log('\nšŸ’¾ Saving raw API responses to /tmp/azdhs-api-responses.json for inspection...'); const fs = require('fs'); fs.writeFileSync('/tmp/azdhs-api-responses.json', JSON.stringify(allResponses, null, 2)); await browser.close(); await pool.end(); return; } // Save to database console.log('\nšŸ’¾ Saving AZDHS dispensaries to database...\n'); let savedCount = 0; let updatedCount = 0; let skippedCount = 0; for (const item of dispensaryData) { try { // Extract fields - need to inspect the actual structure // Common Salesforce field patterns: Name, Name__c, FacilityName, etc. const name = item.Name || item.name || item.FacilityName || item.facility_name || item.Name__c || item.dispensaryName || item.BusinessName; const address = item.Address || item.address || item.Street || item.street || item.Address__c || item.StreetAddress || item.street_address; const city = item.City || item.city || item.City__c; const state = item.State || item.state || item.State__c || 'AZ'; const zip = item.Zip || item.zip || item.ZipCode || item.zip_code || item.PostalCode || item.Zip__c; const phone = item.Phone || item.phone || item.PhoneNumber || item.phone_number || item.Phone__c; const email = item.Email || item.email || item.Email__c; const lat = item.Latitude || item.latitude || item.lat || item.Latitude__c; const lng = item.Longitude || item.longitude || item.lng || item.lon || item.Longitude__c; if (!name || name.length < 3) { skippedCount++; continue; } // Check if exists const existing = await pool.query( 'SELECT id FROM stores WHERE LOWER(name) = LOWER($1) AND state = $2 AND data_source = $3', [name, state, 'azdhs'] ); const slug = name.toLowerCase().replace(/[^a-z0-9]+/g, '-'); const dutchieUrl = `https://azcarecheck.azdhs.gov/s/?name=${encodeURIComponent(name)}`; if (existing.rows.length > 0) { await pool.query(` UPDATE stores SET address = COALESCE($1, address), city = COALESCE($2, city), zip = COALESCE($3, zip), phone = COALESCE($4, phone), email = COALESCE($5, email), latitude = COALESCE($6, latitude), longitude = COALESCE($7, longitude), updated_at = CURRENT_TIMESTAMP WHERE id = $8 `, [address, city, zip, phone, email, lat, lng, existing.rows[0].id]); updatedCount++; } else { await pool.query(` INSERT INTO stores ( name, slug, dutchie_url, address, city, state, zip, phone, email, latitude, longitude, data_source, active, created_at, updated_at ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, 'azdhs', true, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) `, [name, slug, dutchieUrl, address, city, state, zip, phone, email, lat, lng]); savedCount++; } } catch (error) { console.error(`Error saving: ${error}`); skippedCount++; } } console.log(`\nāœ… Saved ${savedCount} new AZDHS dispensaries`); console.log(`āœ… Updated ${updatedCount} existing AZDHS dispensaries`); if (skippedCount > 0) console.log(`āš ļø Skipped ${skippedCount} entries`); // Show totals by source const totals = await pool.query(` SELECT data_source, COUNT(*) as count FROM stores WHERE state = 'AZ' GROUP BY data_source ORDER BY data_source `); console.log('\nšŸ“Š Arizona dispensaries by source:'); console.table(totals.rows); } catch (error) { console.error(`āŒ Error: ${error}`); throw error; } finally { await browser.close(); await pool.end(); } } scrapeAZDHSAPI();