import { chromium } from 'playwright'; import { pool } from './src/db/migrate'; import { getRandomProxy } from './src/utils/proxyManager'; import * as fs from 'fs'; async function debugGoogleScraper() { console.log('πŸ” Debugging Google scraper with proxy\n'); // Get a proxy const proxy = await getRandomProxy(); if (!proxy) { console.log('❌ No proxies available'); await pool.end(); return; } console.log(`πŸ”Œ Using proxy: ${proxy.server}\n`); const browser = await chromium.launch({ headless: false, // Run in visible mode args: ['--disable-blink-features=AutomationControlled'] }); const contextOptions: any = { userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', viewport: { width: 1920, height: 1080 }, locale: 'en-US', timezoneId: 'America/Phoenix', geolocation: { latitude: 33.4484, longitude: -112.0740 }, permissions: ['geolocation'], proxy: { server: proxy.server, username: proxy.username, password: proxy.password } }; const context = await browser.newContext(contextOptions); // Add stealth await context.addInitScript(() => { Object.defineProperty(navigator, 'webdriver', { get: () => false }); (window as any).chrome = { runtime: {} }; }); const page = await context.newPage(); try { // Test with the "All Greens Dispensary" example const testAddress = '1035 W Main St, Quartzsite, AZ 85346'; const searchQuery = `${testAddress} dispensary`; const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(searchQuery)}`; console.log(`πŸ” Testing search: ${searchQuery}`); console.log(`πŸ“ URL: ${searchUrl}\n`); await page.goto(searchUrl, { waitUntil: 'networkidle', timeout: 30000 }); await page.waitForTimeout(3000); // Take screenshot await page.screenshot({ path: '/tmp/google-search-debug.png', fullPage: true }); console.log('πŸ“Έ Screenshot saved to /tmp/google-search-debug.png\n'); // Get the full HTML const html = await page.content(); fs.writeFileSync('/tmp/google-search-debug.html', html); console.log('πŸ’Ύ HTML saved to /tmp/google-search-debug.html\n'); // Try to find any text that looks like "All Greens" const pageText = await page.evaluate(() => document.body.innerText); const hasAllGreens = pageText.toLowerCase().includes('all greens'); console.log(`πŸ” Page contains "All Greens": ${hasAllGreens}\n`); if (hasAllGreens) { console.log('βœ… Google found the business!\n'); // Let's try to find where the name appears in the DOM const nameInfo = await page.evaluate(() => { const results: any[] = []; const walker = document.createTreeWalker( document.body, NodeFilter.SHOW_TEXT, null ); let node; while (node = walker.nextNode()) { const text = node.textContent?.trim() || ''; if (text.toLowerCase().includes('all greens')) { const element = node.parentElement; results.push({ text: text, tagName: element?.tagName, className: element?.className, id: element?.id, dataAttrs: Array.from(element?.attributes || []) .filter(attr => attr.name.startsWith('data-')) .map(attr => `${attr.name}="${attr.value}"`) }); } } return results; }); console.log('πŸ“ Found "All Greens" in these elements:'); console.log(JSON.stringify(nameInfo, null, 2)); } // Try current selectors console.log('\nπŸ§ͺ Testing current selectors:\n'); const nameSelectors = [ '[data-attrid="title"]', 'h2[data-attrid="title"]', '.SPZz6b h2', 'h3.LC20lb', '.kp-header .SPZz6b' ]; for (const selector of nameSelectors) { const element = await page.$(selector); if (element) { const text = await element.textContent(); console.log(`βœ… ${selector}: "${text?.trim()}"`); } else { console.log(`❌ ${selector}: not found`); } } // Look for website links console.log('\nπŸ”— Looking for website links:\n'); const links = await page.evaluate(() => { const allLinks = Array.from(document.querySelectorAll('a[href]')); return allLinks .filter(a => { const href = (a as HTMLAnchorElement).href; return href && !href.includes('google.com') && !href.includes('youtube.com') && !href.includes('facebook.com'); }) .slice(0, 10) .map(a => ({ href: (a as HTMLAnchorElement).href, text: a.textContent?.trim().substring(0, 50), className: a.className })); }); console.log('First 10 non-Google links:'); console.log(JSON.stringify(links, null, 2)); // Look for phone numbers console.log('\nπŸ“ž Looking for phone numbers:\n'); const phoneMatches = pageText.match(/\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/g); if (phoneMatches) { console.log('Found phone numbers:', phoneMatches); } else { console.log('No phone numbers found in page text'); } console.log('\n⏸️ Browser will stay open for 30 seconds for manual inspection...'); await page.waitForTimeout(30000); } finally { await browser.close(); await pool.end(); } } debugGoogleScraper().catch(console.error);