import { chromium } from 'playwright'; import { pool } from './src/db/migrate'; import { getStateProxy, getRandomProxy } from './src/utils/proxyManager'; interface DispensaryEnrichment { id: number; azdhs_name: string; address: string; city: string; state: string; zip: string; dba_name?: string; website?: string; google_phone?: string; google_rating?: number; google_review_count?: number; confidence: 'high' | 'medium' | 'low'; notes?: string; } async function enrichDispensariesFromGoogle() { console.log('šŸ” Starting Google enrichment for AZDHS dispensaries\n'); // Get an Arizona proxy if available, otherwise any proxy let proxy = await getStateProxy('Arizona'); if (!proxy) { console.log('āš ļø No Arizona proxy available, trying any US proxy...'); proxy = await getRandomProxy(); } if (!proxy) { console.log('āŒ No proxies available. Please add proxies to the database.'); await pool.end(); return; } console.log(`šŸ”Œ Using proxy: ${proxy.server}\n`); const browser = await chromium.launch({ headless: true, args: [ '--disable-blink-features=AutomationControlled', ] }); const contextOptions: any = { userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', viewport: { width: 1920, height: 1080 }, locale: 'en-US', timezoneId: 'America/Phoenix', geolocation: { latitude: 33.4484, longitude: -112.0740 }, // Phoenix, AZ permissions: ['geolocation'], proxy: { server: proxy.server, username: proxy.username, password: proxy.password } }; const context = await browser.newContext(contextOptions); // Add stealth techniques await context.addInitScript(() => { // Remove webdriver flag Object.defineProperty(navigator, 'webdriver', { get: () => false }); // Chrome runtime (window as any).chrome = { runtime: {} }; // Permissions const originalQuery = window.navigator.permissions.query; window.navigator.permissions.query = (parameters: any) => ( parameters.name === 'notifications' ? Promise.resolve({ state: Notification.permission } as PermissionStatus) : originalQuery(parameters) ); }); const page = await context.newPage(); try { // Get all dispensaries that don't have website yet const result = await pool.query(` SELECT id, name, address, city, state, zip, phone FROM azdhs_list WHERE website IS NULL OR website = '' ORDER BY id LIMIT 2 `); const dispensaries = result.rows; console.log(`šŸ“‹ Found ${dispensaries.length} dispensaries to enrich\n`); let enriched = 0; let failed = 0; const needsReview: DispensaryEnrichment[] = []; for (const disp of dispensaries) { console.log(`\nšŸ” Processing: ${disp.name}`); console.log(` Address: ${disp.address}, ${disp.city}, ${disp.state} ${disp.zip}`); try { // Search Google for the address + dispensary const searchQuery = `${disp.address}, ${disp.city}, ${disp.state} ${disp.zip} dispensary`; const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(searchQuery)}`; console.log(` Searching: ${searchQuery}`); await page.goto(searchUrl, { waitUntil: 'domcontentloaded', timeout: 15000 }); await page.waitForTimeout(2000); // Try to extract Google Business info const businessData = await page.evaluate(() => { const data: any = {}; // Try to find business name const nameSelectors = [ '[data-attrid="title"]', 'h2[data-attrid="title"]', '.SPZz6b h2', 'h3.LC20lb', '.kp-header .SPZz6b' ]; for (const selector of nameSelectors) { const el = document.querySelector(selector); if (el?.textContent) { data.name = el.textContent.trim(); break; } } // Try to find website const websiteSelectors = [ 'a[data-dtype="d3ph"]', '.yuRUbf a', 'a.ab_button[href^="http"]' ]; for (const selector of websiteSelectors) { const el = document.querySelector(selector) as HTMLAnchorElement; if (el?.href && !el.href.includes('google.com')) { data.website = el.href; break; } } // Try to find phone const phoneSelectors = [ '[data-dtype="d3ph"]', 'span[data-dtype="d3ph"]', '.LrzXr.zdqRlf' ]; for (const selector of phoneSelectors) { const el = document.querySelector(selector); if (el?.textContent && /\d{3}.*\d{3}.*\d{4}/.test(el.textContent)) { data.phone = el.textContent.trim(); break; } } // Try to find rating const ratingEl = document.querySelector('.Aq14fc'); if (ratingEl?.textContent) { const match = ratingEl.textContent.match(/(\d+\.?\d*)/); if (match) data.rating = parseFloat(match[1]); } // Try to find review count const reviewEl = document.querySelector('.hqzQac span'); if (reviewEl?.textContent) { const match = reviewEl.textContent.match(/(\d+)/); if (match) data.reviewCount = parseInt(match[1]); } return data; }); console.log(` Found data:`, businessData); // Determine confidence level let confidence: 'high' | 'medium' | 'low' = 'low'; if (businessData.name && businessData.website && businessData.phone) { confidence = 'high'; } else if (businessData.name && (businessData.website || businessData.phone)) { confidence = 'medium'; } const enrichment: DispensaryEnrichment = { id: disp.id, azdhs_name: disp.name, address: disp.address, city: disp.city, state: disp.state, zip: disp.zip, dba_name: businessData.name, website: businessData.website, google_phone: businessData.phone, google_rating: businessData.rating, google_review_count: businessData.reviewCount, confidence }; if (confidence === 'high') { // Auto-update high confidence matches await pool.query(` UPDATE azdhs_list SET dba_name = $1, website = $2, google_rating = $3, google_review_count = $4, updated_at = CURRENT_TIMESTAMP WHERE id = $5 `, [ businessData.name, businessData.website, businessData.rating, businessData.reviewCount, disp.id ]); console.log(` āœ… Updated (high confidence)`); enriched++; } else { // Flag for manual review needsReview.push(enrichment); console.log(` āš ļø Needs review (${confidence} confidence)`); } } catch (error) { console.log(` āŒ Error: ${error}`); failed++; } // Rate limiting - wait between requests await page.waitForTimeout(3000 + Math.random() * 2000); } console.log('\n' + '='.repeat(80)); console.log(`\nšŸ“Š Summary:`); console.log(` āœ… Enriched: ${enriched}`); console.log(` āš ļø Needs review: ${needsReview.length}`); console.log(` āŒ Failed: ${failed}`); if (needsReview.length > 0) { console.log('\nšŸ“‹ Dispensaries needing manual review:\n'); console.table(needsReview.map(d => ({ ID: d.id, 'AZDHS Name': d.azdhs_name.substring(0, 30), 'Google Name': d.dba_name?.substring(0, 30) || '-', Website: d.website ? 'Yes' : 'No', Phone: d.google_phone ? 'Yes' : 'No', Confidence: d.confidence }))); } } finally { await browser.close(); await pool.end(); } } // Add missing columns if they don't exist async function setupDatabase() { await pool.query(` ALTER TABLE azdhs_list ADD COLUMN IF NOT EXISTS dba_name VARCHAR(255), ADD COLUMN IF NOT EXISTS google_rating DECIMAL(2,1), ADD COLUMN IF NOT EXISTS google_review_count INTEGER `); } async function main() { try { await setupDatabase(); await enrichDispensariesFromGoogle(); } catch (error) { console.error('Fatal error:', error); process.exit(1); } } main();