import puppeteer from 'puppeteer-extra'; import StealthPlugin from 'puppeteer-extra-plugin-stealth'; import { Pool } from 'pg'; puppeteer.use(StealthPlugin()); const pool = new Pool({ connectionString: 'postgresql://sail:password@localhost:5432/dutchie_menus' }); async function scrapeRealClicks() { let browser; try { // Get random proxy const proxyResult = await pool.query(` SELECT host, port, protocol FROM proxies ORDER BY RANDOM() LIMIT 1 `); const proxy = proxyResult.rows[0]; const proxyUrl = `${proxy.protocol}://${proxy.host}:${proxy.port}`; console.log('🔌 Proxy:', `${proxy.host}:${proxy.port}`); // Launch browser browser = await puppeteer.launch({ headless: true, args: [ '--no-sandbox', '--disable-setuid-sandbox', `--proxy-server=${proxyUrl}` ] }); const page = await browser.newPage(); // Mobile Chrome UA const mobileUA = 'Mozilla/5.0 (Linux; Android 10; SM-G973F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Mobile Safari/537.36'; await page.setUserAgent(mobileUA); console.log('📱 User-Agent:', mobileUA); console.log(''); const url = 'https://curaleaf.com/stores/curaleaf-dispensary-phoenix-airport/brands'; console.log('🌐 Going to:', url); await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 }); await page.waitForTimeout(3000); console.log('📍 Current URL:', page.url()); if (page.url().includes('/age-gate')) { console.log('🔒 Handling age gate...'); // Wait for dropdown button await page.waitForSelector('button[role="combobox"]', { timeout: 10000 }); console.log(' ✅ Found state dropdown'); // Click the dropdown with real browser click await page.click('button[role="combobox"]'); console.log(' ✅ Clicked dropdown'); await page.waitForTimeout(2000); // Wait for options to appear and click Arizona const arizonaClicked = await page.evaluate(() => { const options = Array.from(document.querySelectorAll('[role="option"]')); const azOption = options.find(opt => opt.textContent?.toLowerCase().includes('arizona') ) as HTMLElement; if (azOption) { azOption.click(); return true; } return false; }); if (arizonaClicked) { console.log(' ✅ Selected Arizona'); // Wait a bit for React to process await page.waitForTimeout(3000); console.log(' 📍 URL after selection:', page.url()); // Check if there's a continue/enter button that appeared const buttons = await page.evaluate(() => { const allButtons = Array.from(document.querySelectorAll('button, a')); return allButtons.map(btn => ({ text: btn.textContent?.trim() || '', visible: (btn as HTMLElement).offsetParent !== null, ariaLabel: btn.getAttribute('aria-label') || '' })).filter(b => b.visible); }); console.log(' 📋 Visible buttons:', buttons); // If still on age gate, wait for possible redirect if (page.url().includes('/age-gate')) { console.log(' ⏳ Still on age gate, waiting for redirect...'); try { await page.waitForNavigation({ timeout: 10000 }); console.log(' ✅ Redirected to:', page.url()); } catch (e) { console.log(' ⚠️ No redirect happened'); } } } else { console.log(' ❌ Could not find Arizona option'); } } // Try to scrape console.log(''); console.log('📦 Attempting to scrape brands...'); console.log('📍 Final URL:', page.url()); await page.waitForTimeout(3000); const brands = await page.evaluate(() => { const selectors = [ '[data-testid*="brand"]', '[class*="Brand"]', '[class*="brand"]', 'a[href*="/brand/"]' ]; const found = new Set(); selectors.forEach(selector => { document.querySelectorAll(selector).forEach(el => { const text = el.textContent?.trim(); if (text && text.length > 0 && text.length < 50) { found.add(text); } }); }); return Array.from(found); }); console.log(`Found ${brands.length} brands`); if (brands.length > 0) { console.log('─'.repeat(60)); brands.forEach((b, i) => console.log(` ${i + 1}. ${b}`)); console.log('─'.repeat(60)); } else { // Debug const pageData = await page.evaluate(() => ({ title: document.title, bodyText: document.body.innerText.substring(0, 800) })); console.log(''); console.log('📄 PAGE CONTENT:'); console.log('Title:', pageData.title); console.log('Text:', pageData.bodyText); } } catch (error: any) { console.error('❌ Error:', error.message); } finally { if (browser) await browser.close(); await pool.end(); } } scrapeRealClicks();