import { chromium } from 'playwright-extra'; import stealth from 'puppeteer-extra-plugin-stealth'; import { pool } from './src/db/migrate'; chromium.use(stealth()); interface GoogleMapsPlace { name: string; address?: string; city?: string; state?: string; zip?: string; latitude?: number; longitude?: number; phone?: string; website?: string; rating?: number; review_count?: number; place_id?: string; } async function scrapeGoogleMaps() { console.log('πŸ—ΊοΈ Scraping Arizona dispensaries from Google Maps...\n'); const browser = await chromium.launch({ headless: false, }); const context = await browser.newContext({ viewport: { width: 1920, height: 1080 }, userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', geolocation: { latitude: 33.4484, longitude: -112.0740 }, // Phoenix, AZ permissions: ['geolocation'], }); const page = await context.newPage(); try { const searchQuery = 'dispensaries in Arizona'; const encodedQuery = encodeURIComponent(searchQuery); const url = `https://www.google.com/maps/search/${encodedQuery}`; console.log(`πŸ” Searching: ${searchQuery}`); console.log(`πŸ“„ Loading: ${url}\n`); await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 }); // Wait for results to load await page.waitForTimeout(5000); console.log('πŸ“œ Scrolling to load all results...'); // Scroll the results panel to load more listings const scrollPanel = page.locator('[role="feed"]').first(); let previousHeight = 0; let scrollAttempts = 0; const maxScrolls = 50; // Limit scrolls to prevent infinite loop while (scrollAttempts < maxScrolls) { // Scroll to bottom of the feed await scrollPanel.evaluate((el) => { el.scrollTop = el.scrollHeight; }).catch(() => { console.log('Could not scroll panel'); }); await page.waitForTimeout(2000); const currentHeight = await scrollPanel.evaluate((el) => el.scrollHeight).catch(() => 0); if (currentHeight === previousHeight) { console.log(' βœ… Reached end of results'); break; } previousHeight = currentHeight; scrollAttempts++; console.log(` Scroll ${scrollAttempts}/${maxScrolls}`); } console.log('\nπŸ“¦ Extracting dispensary data...'); // Extract all place data const places = await page.evaluate(() => { const results: any[] = []; // Find all place cards in the results feed const placeCards = document.querySelectorAll('a[href*="/maps/place/"]'); placeCards.forEach((card) => { const place: any = { state: 'AZ' }; // Extract name const nameEl = card.querySelector('[class*="fontHeadline"], [class*="fontBody"] div[class*="fontHeadline"]'); if (nameEl) { place.name = nameEl.textContent?.trim(); } // Extract rating const ratingEl = card.querySelector('[role="img"][aria-label*="stars"]'); if (ratingEl) { const ariaLabel = ratingEl.getAttribute('aria-label') || ''; const ratingMatch = ariaLabel.match(/(\d+\.?\d*)\s*stars?/i); if (ratingMatch) { place.rating = parseFloat(ratingMatch[1]); } // Review count const reviewMatch = ariaLabel.match(/(\d+)\s*reviews?/i); if (reviewMatch) { place.review_count = parseInt(reviewMatch[1]); } } // Extract address from aria-label or text content const addressEls = card.querySelectorAll('[class*="fontBody"]'); addressEls.forEach((el) => { const text = el.textContent?.trim() || ''; // Check for address pattern if (text.match(/\d+\s+[\w\s]+,/)) { place.address = text; // Try to parse city and zip const parts = text.split(',').map(s => s.trim()); if (parts.length >= 2) { const lastPart = parts[parts.length - 1]; const zipMatch = lastPart.match(/\b(\d{5})\b/); if (zipMatch) { place.zip = zipMatch[1]; } // City is usually second to last part if (parts.length >= 3) { place.city = parts[parts.length - 2]; } } } }); // Extract href for place URL const href = card.getAttribute('href'); if (href) { place.google_maps_url = href.startsWith('http') ? href : `https://www.google.com${href}`; // Try to extract place ID const placeIdMatch = href.match(/place\/(.*?)(?:\/|$)/); if (placeIdMatch) { place.place_id = placeIdMatch[1]; } // Try to extract coordinates from URL const coordMatch = href.match(/@(-?\d+\.\d+),(-?\d+\.\d+)/); if (coordMatch) { place.latitude = parseFloat(coordMatch[1]); place.longitude = parseFloat(coordMatch[2]); } } if (place.name) { results.push(place); } }); return results; }); console.log(`βœ… Found ${places.length} dispensaries on map\n`); // Show sample if (places.length > 0) { console.log('πŸ“‹ Sample of first 5:'); console.table(places.slice(0, 5).map(p => ({ name: p.name, city: p.city, rating: p.rating, reviews: p.review_count }))); } // Save to database console.log('\nπŸ’Ύ Saving to database...'); let savedCount = 0; let updatedCount = 0; for (const place of places) { try { // Check if exists by name and state const existing = await pool.query( 'SELECT id FROM stores WHERE LOWER(name) = LOWER($1) AND state = $2', [place.name, 'AZ'] ); if (existing.rows.length > 0) { // Update existing await pool.query(` UPDATE stores SET address = COALESCE($1, address), city = COALESCE($2, city), zip = COALESCE($3, zip), latitude = COALESCE($4, latitude), longitude = COALESCE($5, longitude), updated_at = CURRENT_TIMESTAMP WHERE id = $6 `, [ place.address, place.city, place.zip, place.latitude, place.longitude, existing.rows[0].id ]); updatedCount++; } else { // Insert new await pool.query(` INSERT INTO stores ( name, address, city, state, zip, latitude, longitude, active, created_at, updated_at ) VALUES ($1, $2, $3, $4, $5, $6, $7, true, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) `, [ place.name, place.address, place.city, 'AZ', place.zip, place.latitude, place.longitude ]); savedCount++; } } catch (error) { console.error(`Error saving ${place.name}: ${error}`); } } console.log(`\nβœ… Saved ${savedCount} new dispensaries`); console.log(`βœ… Updated ${updatedCount} existing dispensaries`); // Show total count const total = await pool.query('SELECT COUNT(*) FROM stores WHERE state = $1', ['AZ']); console.log(`\nπŸ“Š Total Arizona dispensaries in database: ${total.rows[0].count}`); } catch (error) { console.error(`❌ Error: ${error}`); throw error; } finally { await browser.close(); await pool.end(); } } scrapeGoogleMaps();