import { firefox } from 'playwright'; import { pool } from './src/db/migrate'; import { getRandomProxy } from './src/utils/proxyManager'; interface DispensaryEnrichment { id: number; azdhs_name: string; address: string; city: string; state: string; zip: string; dba_name?: string; website?: string; google_phone?: string; google_rating?: number; google_review_count?: number; confidence: 'high' | 'medium' | 'low'; notes?: string; } async function enrichFromGoogleMaps() { console.log('🦊 Enriching AZDHS dispensaries from Google Maps using Firefox\n'); // Get a proxy const proxy = await getRandomProxy(); if (!proxy) { console.log('āŒ No proxies available'); await pool.end(); return; } console.log(`šŸ”Œ Using proxy: ${proxy.server}\n`); const browser = await firefox.launch({ headless: true, firefoxUserPrefs: { 'geo.enabled': true, 'geo.provider.use_corelocation': true, 'geo.prompt.testing': true, 'geo.prompt.testing.allow': true, } }); const contextOptions: any = { viewport: { width: 1920, height: 1080 }, userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0', geolocation: { latitude: 33.4484, longitude: -112.0740 }, // Phoenix, AZ permissions: ['geolocation'], proxy: { server: proxy.server, username: proxy.username, password: proxy.password } }; const context = await browser.newContext(contextOptions); const page = await context.newPage(); try { // Get all dispensaries that don't have website yet const result = await pool.query(` SELECT id, slug, name, address, city, state, zip, phone, website, dba_name FROM dispensaries WHERE website IS NULL OR website = '' ORDER BY id LIMIT 50 `); const dispensaries = result.rows; console.log(`šŸ“‹ Found ${dispensaries.length} dispensaries to enrich\n`); let changesCreated = 0; let failed = 0; let skipped = 0; for (const disp of dispensaries) { console.log(`\nšŸ” Processing: ${disp.name}`); console.log(` Address: ${disp.address}, ${disp.city}, ${disp.state} ${disp.zip}`); try { // Search Google Maps with dispensary name + address for better results const searchQuery = `${disp.name} ${disp.address}, ${disp.city}, ${disp.state} ${disp.zip}`; const encodedQuery = encodeURIComponent(searchQuery); const url = `https://www.google.com/maps/search/${encodedQuery}`; console.log(` šŸ“ Searching Maps: ${searchQuery}`); await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 }); // Wait for results await page.waitForTimeout(3000); // Extract business data from the first result const businessData = await page.evaluate(() => { const data: any = {}; // Try to find the place name from the side panel const nameSelectors = [ 'h1[class*="fontHeadline"]', 'h1.DUwDvf', '[data-item-id*="name"] h1' ]; for (const selector of nameSelectors) { const el = document.querySelector(selector); if (el?.textContent) { data.name = el.textContent.trim(); break; } } // Try to find website const websiteSelectors = [ 'a[data-item-id="authority"]', 'a[data-tooltip="Open website"]', 'a[aria-label*="Website"]' ]; for (const selector of websiteSelectors) { const el = document.querySelector(selector) as HTMLAnchorElement; if (el?.href && !el.href.includes('google.com')) { data.website = el.href; break; } } // Try to find phone const phoneSelectors = [ 'button[data-item-id*="phone"]', 'button[aria-label*="Phone"]', '[data-tooltip*="Copy phone number"]' ]; for (const selector of phoneSelectors) { const el = document.querySelector(selector); if (el?.textContent) { const phoneMatch = el.textContent.match(/\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/); if (phoneMatch) { data.phone = phoneMatch[0]; break; } } } // Try to find rating const ratingEl = document.querySelector('[role="img"][aria-label*="stars"]'); if (ratingEl) { const label = ratingEl.getAttribute('aria-label'); const match = label?.match(/(\d+\.?\d*)\s*stars?/); if (match) { data.rating = parseFloat(match[1]); } } // Try to find review count const reviewEl = document.querySelector('[aria-label*="reviews"]'); if (reviewEl) { const label = reviewEl.getAttribute('aria-label'); const match = label?.match(/([\d,]+)\s*reviews?/); if (match) { data.reviewCount = parseInt(match[1].replace(/,/g, '')); } } return data; }); console.log(` Found data:`, businessData); // Determine confidence level let confidence: 'high' | 'medium' | 'low' = 'low'; if (businessData.name && businessData.website && businessData.phone) { confidence = 'high'; } else if (businessData.name && (businessData.website || businessData.phone)) { confidence = 'medium'; } // Track if any changes were made for this dispensary let changesMadeForDispensary = 0; // Create change records for each field that has new data if (businessData.name && businessData.name !== disp.dba_name) { await pool.query(` INSERT INTO dispensary_changes ( dispensary_id, field_name, old_value, new_value, confidence_score, source, change_notes ) VALUES ($1, $2, $3, $4, $5, $6, $7) `, [ disp.id, 'dba_name', disp.dba_name || null, businessData.name, confidence, 'google_maps', `Found via Google Maps search for "${disp.name}"` ]); console.log(` šŸ“ Created change record for DBA name`); changesMadeForDispensary++; } if (businessData.website && businessData.website !== disp.website) { await pool.query(` INSERT INTO dispensary_changes ( dispensary_id, field_name, old_value, new_value, confidence_score, source, change_notes, requires_recrawl ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8) `, [ disp.id, 'website', disp.website || null, businessData.website, confidence, 'google_maps', `Found via Google Maps search for "${disp.name}"`, true ]); console.log(` šŸ“ Created change record for website (requires recrawl)`); changesMadeForDispensary++; } if (businessData.phone && businessData.phone !== disp.phone) { await pool.query(` INSERT INTO dispensary_changes ( dispensary_id, field_name, old_value, new_value, confidence_score, source, change_notes ) VALUES ($1, $2, $3, $4, $5, $6, $7) `, [ disp.id, 'phone', disp.phone || null, businessData.phone, confidence, 'google_maps', `Found via Google Maps search for "${disp.name}"` ]); console.log(` šŸ“ Created change record for phone`); changesMadeForDispensary++; } if (businessData.rating) { await pool.query(` INSERT INTO dispensary_changes ( dispensary_id, field_name, old_value, new_value, confidence_score, source, change_notes ) VALUES ($1, $2, $3, $4, $5, $6, $7) `, [ disp.id, 'google_rating', null, businessData.rating.toString(), confidence, 'google_maps', `Google rating from Maps search` ]); console.log(` šŸ“ Created change record for Google rating`); changesMadeForDispensary++; } if (businessData.reviewCount) { await pool.query(` INSERT INTO dispensary_changes ( dispensary_id, field_name, old_value, new_value, confidence_score, source, change_notes ) VALUES ($1, $2, $3, $4, $5, $6, $7) `, [ disp.id, 'google_review_count', null, businessData.reviewCount.toString(), confidence, 'google_maps', `Google review count from Maps search` ]); console.log(` šŸ“ Created change record for Google review count`); changesMadeForDispensary++; } if (changesMadeForDispensary > 0) { console.log(` āœ… Created ${changesMadeForDispensary} change record(s) for review (${confidence} confidence)`); changesCreated += changesMadeForDispensary; } else { console.log(` ā­ļø No new data found`); skipped++; } } catch (error) { console.log(` āŒ Error: ${error}`); failed++; } // Rate limiting - wait between requests await page.waitForTimeout(3000 + Math.random() * 2000); } console.log('\n' + '='.repeat(80)); console.log(`\nšŸ“Š Summary:`); console.log(` šŸ“ Change records created: ${changesCreated}`); console.log(` ā­ļø Skipped (no new data): ${skipped}`); console.log(` āŒ Failed: ${failed}`); console.log(`\nšŸ’” Visit the Change Approval page to review and approve these changes.`); } finally { await browser.close(); await pool.end(); } } async function main() { try { await enrichFromGoogleMaps(); } catch (error) { console.error('Fatal error:', error); process.exit(1); } } main();