import puppeteer from 'puppeteer-extra'; import StealthPlugin from 'puppeteer-extra-plugin-stealth'; import { Pool } from 'pg'; puppeteer.use(StealthPlugin()); const pool = new Pool({ connectionString: 'postgresql://sail:password@localhost:5432/dutchie_menus' }); async function scrapeMobileChrome() { let browser; try { // Get random proxy const proxyResult = await pool.query(` SELECT host, port, protocol FROM proxies ORDER BY RANDOM() LIMIT 1 `); const proxy = proxyResult.rows[0]; const proxyUrl = `${proxy.protocol}://${proxy.host}:${proxy.port}`; console.log('🔌 Proxy:', `${proxy.host}:${proxy.port}`); // Launch browser with proxy browser = await puppeteer.launch({ headless: true, args: [ '--no-sandbox', '--disable-setuid-sandbox', `--proxy-server=${proxyUrl}` ] }); const page = await browser.newPage(); // Standard mobile Chrome user-agent (Android) const mobileUA = 'Mozilla/5.0 (Linux; Android 10; SM-G973F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Mobile Safari/537.36'; await page.setUserAgent(mobileUA); console.log('📱 User-Agent:', mobileUA); console.log(''); // Log requests and responses page.on('request', request => { console.log('📤 REQUEST:', request.method(), request.url()); }); page.on('response', response => { const status = response.status(); const url = response.url(); const emoji = status === 200 ? '✅' : status === 403 ? '🚫' : '📥'; console.log(`${emoji} RESPONSE: ${status} ${url}`); }); const url = 'https://curaleaf.com/stores/curaleaf-dispensary-phoenix-airport/brands'; console.log('🌐 Going to:', url); console.log(''); await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 }); await page.waitForTimeout(5000); // Check what we got const pageData = await page.evaluate(() => ({ title: document.title, url: window.location.href, bodyLength: document.body.innerHTML.length, bodyText: document.body.innerText.substring(0, 500), hasReactRoot: document.getElementById('__next') !== null, scriptTags: document.querySelectorAll('script').length })); console.log(''); console.log('📄 PAGE DATA:'); console.log('─'.repeat(60)); console.log('Title:', pageData.title); console.log('URL:', pageData.url); console.log('Body HTML size:', pageData.bodyLength, 'chars'); console.log('React root exists:', pageData.hasReactRoot ? '✅' : '❌'); console.log('Script tags:', pageData.scriptTags); console.log(''); console.log('First 500 chars of text:'); console.log(pageData.bodyText); console.log(''); // Try to scrape brands const brands = await page.evaluate(() => { const selectors = [ '[data-testid*="brand"]', '[class*="Brand"]', '[class*="brand"]', 'a[href*="/brand/"]', '.brand-card', '.brand-item' ]; const found = new Set(); selectors.forEach(selector => { document.querySelectorAll(selector).forEach(el => { const text = el.textContent?.trim(); if (text && text.length > 0 && text.length < 50) { found.add(text); } }); }); return Array.from(found); }); console.log(`📦 Found ${brands.length} brands`); if (brands.length > 0) { console.log('─'.repeat(60)); brands.forEach((b, i) => console.log(` ${i + 1}. ${b}`)); console.log('─'.repeat(60)); } } catch (error: any) { console.error('❌ Error:', error.message); } finally { if (browser) await browser.close(); await pool.end(); } } scrapeMobileChrome();