import { chromium } from 'playwright-extra'; import stealth from 'puppeteer-extra-plugin-stealth'; import { pool } from './src/db/migrate'; chromium.use(stealth()); async function scrapeAZDHSAuto() { console.log('šŸ›ļø Scraping AZDHS - Automatic Mode\n'); const browser = await chromium.launch({ headless: false, // Visible so you can see it working }); const context = await browser.newContext({ viewport: { width: 1920, height: 1080 }, }); const page = await context.newPage(); try { console.log('šŸ“„ Loading AZDHS page...'); await page.goto('https://azcarecheck.azdhs.gov/s/?facilityId=001t000000L0TApAAN', { waitUntil: 'domcontentloaded', timeout: 60000 }); console.log('ā³ Waiting 30 seconds for page to fully load and for you to scroll...\n'); await page.waitForTimeout(30000); console.log('šŸ“¦ Extracting all dispensaries from the page...\n'); // Extract all dispensaries const dispensaries = await page.evaluate(() => { const results: any[] = []; // Look for all possible dispensary container elements const containers = document.querySelectorAll( 'article, [class*="facility"], [class*="dispensary"], [class*="location"], ' + '.slds-card, lightning-card, [data-id], [data-facility-id]' ); containers.forEach((card) => { const disp: any = {}; // Get all text from the card const fullText = card.textContent?.trim() || ''; disp.rawText = fullText.substring(0, 500); // Try various selectors for name const nameSelectors = ['h3', 'h2', 'h4', '[class*="title"]', '[class*="name"]', 'strong', 'b']; for (const selector of nameSelectors) { const el = card.querySelector(selector); if (el && el.textContent && el.textContent.trim().length > 3) { disp.name = el.textContent.trim(); break; } } // Extract phone const phoneLink = card.querySelector('a[href^="tel:"]'); if (phoneLink) { disp.phone = phoneLink.getAttribute('href')?.replace('tel:', '').replace(/\D/g, ''); } else { // Look for phone pattern in text const phoneMatch = fullText.match(/(\d{3}[-.]?\d{3}[-.]?\d{4})/); if (phoneMatch) disp.phone = phoneMatch[1]; } // Extract email const emailLink = card.querySelector('a[href^="mailto:"]'); if (emailLink) { disp.email = emailLink.getAttribute('href')?.replace('mailto:', ''); } // Extract address - look for street address pattern const addressMatch = fullText.match(/(\d+\s+[A-Za-z0-9\s]+(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Drive|Dr|Lane|Ln|Way|Circle|Court|Parkway)\.?(?:\s+(?:Suite|Ste|Unit|#)\s*[\w-]+)?)/i); if (addressMatch) { disp.address = addressMatch[1].trim(); } // Extract city const cityMatch = fullText.match(/([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*),\s*AZ/); if (cityMatch) { disp.city = cityMatch[1]; } // Extract ZIP const zipMatch = fullText.match(/\b(\d{5})(?:-\d{4})?\b/); if (zipMatch) { disp.zip = zipMatch[1]; } // Only add if we found at least a name if (disp.name && disp.name.length > 3) { results.push(disp); } }); return results; }); console.log(`āœ… Found ${dispensaries.length} dispensary entries!\n`); if (dispensaries.length > 0) { console.log('šŸ“‹ Sample of first 5:'); console.table(dispensaries.slice(0, 5).map(d => ({ name: d.name?.substring(0, 40), phone: d.phone, city: d.city, }))); } // Save to database console.log('\nšŸ’¾ Saving to database with data_source="azdhs"...\n'); let savedCount = 0; let updatedCount = 0; let skippedCount = 0; for (const disp of dispensaries) { if (!disp.name) { skippedCount++; continue; } try { // Check if exists by name const existing = await pool.query( 'SELECT id FROM stores WHERE LOWER(name) = LOWER($1) AND state = $2 AND data_source = $3', [disp.name, 'AZ', 'azdhs'] ); const slug = disp.name.toLowerCase().replace(/[^a-z0-9]+/g, '-'); const dutchieUrl = `https://azcarecheck.azdhs.gov/s/?name=${encodeURIComponent(disp.name)}`; if (existing.rows.length > 0) { await pool.query(` UPDATE stores SET address = COALESCE($1, address), city = COALESCE($2, city), zip = COALESCE($3, zip), phone = COALESCE($4, phone), email = COALESCE($5, email), updated_at = CURRENT_TIMESTAMP WHERE id = $6 `, [disp.address, disp.city, disp.zip, disp.phone, disp.email, existing.rows[0].id]); updatedCount++; } else { await pool.query(` INSERT INTO stores ( name, slug, dutchie_url, address, city, state, zip, phone, email, data_source, active, created_at, updated_at ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, 'azdhs', true, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) `, [disp.name, slug, dutchieUrl, disp.address, disp.city, 'AZ', disp.zip, disp.phone, disp.email]); savedCount++; } } catch (error) { console.error(`Error saving ${disp.name}: ${error}`); skippedCount++; } } console.log(`\nāœ… Saved ${savedCount} new AZDHS dispensaries`); console.log(`āœ… Updated ${updatedCount} existing AZDHS dispensaries`); if (skippedCount > 0) console.log(`āš ļø Skipped ${skippedCount} entries`); // Show totals by source const totals = await pool.query(` SELECT data_source, COUNT(*) as count FROM stores WHERE state = 'AZ' GROUP BY data_source ORDER BY data_source `); console.log('\nšŸ“Š Arizona dispensaries by source:'); console.table(totals.rows); console.log('\nāœ… AZDHS scraping complete!'); } catch (error) { console.error(`āŒ Error: ${error}`); throw error; } finally { console.log('\nšŸ‘‰ Browser will close in 5 seconds...'); await page.waitForTimeout(5000); await browser.close(); await pool.end(); } } scrapeAZDHSAuto();