import { chromium } from 'playwright-extra'; import stealth from 'puppeteer-extra-plugin-stealth'; import { pool } from './src/db/migrate'; chromium.use(stealth()); async function scrapeAZDHSBetter() { console.log('šŸ›ļø Scraping AZDHS official map (improved approach)...\n'); const browser = await chromium.launch({ headless: false, }); const context = await browser.newContext({ viewport: { width: 1920, height: 1080 }, userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', }); const page = await context.newPage(); // Capture API requests const apiData: any[] = []; page.on('response', async (response) => { const url = response.url(); if (url.includes('dispensar') || url.includes('facility') || url.includes('location')) { try { const json = await response.json(); console.log(`šŸ“” Captured API response from: ${url.substring(0, 100)}...`); apiData.push({ url, data: json }); } catch (e) { // Not JSON } } }); try { console.log('šŸ“„ Loading AZDHS page (waiting up to 60s for JavaScript)...'); await page.goto('https://azcarecheck.azdhs.gov/s/?facilityId=001t000000L0TApAAN', { waitUntil: 'domcontentloaded', timeout: 60000 }); // Wait longer for JavaScript to execute console.log('ā³ Waiting 20 seconds for Salesforce to fully load...'); await page.waitForTimeout(20000); // Try to find and click "View All" or expand the map console.log('šŸ” Looking for buttons to expand results...'); const viewAllButton = page.locator('button:has-text("View All"), button:has-text("Show All"), a:has-text("View All")').first(); if (await viewAllButton.isVisible().catch(() => false)) { console.log(' āœ… Found View All button, clicking...'); await viewAllButton.click(); await page.waitForTimeout(5000); } // Try extracting data directly from page console.log('\nšŸ“¦ Extracting dispensary data from page...'); const dispensaries = await page.evaluate(() => { const results: any[] = []; // Look for various data patterns const elements = document.querySelectorAll('[data-facility], [data-location], article, .facility, .location, .dispensary'); elements.forEach((el) => { const text = el.textContent || ''; // Try to extract structured data if (text.length > 20 && text.length < 500) { // Look for name patterns const nameMatch = text.match(/([A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,5})/); if (nameMatch) { results.push({ rawText: text.substring(0, 200), element: el.className, }); } } }); return results; }); console.log(`\nšŸ“Š Found ${dispensaries.length} potential dispensary elements`); console.log(`šŸ“Š Captured ${apiData.length} API responses`); if (apiData.length > 0) { console.log('\nšŸŽÆ Analyzing API data...'); console.log(JSON.stringify(apiData[0], null, 2).substring(0, 1000)); } if (dispensaries.length > 0) { console.log('\nšŸ“‹ Sample dispensary elements:'); console.log(dispensaries.slice(0, 3)); } } catch (error) { console.error(`āŒ Error: ${error}`); throw error; } finally { await browser.close(); await pool.end(); } } scrapeAZDHSBetter();