import puppeteer from 'puppeteer-extra'; import StealthPlugin from 'puppeteer-extra-plugin-stealth'; import { Pool } from 'pg'; puppeteer.use(StealthPlugin()); const pool = new Pool({ connectionString: 'postgresql://sail:password@localhost:5432/dutchie_menus' }); async function scrapeWithAgeGate() { let browser; try { // Get random proxy const proxyResult = await pool.query(` SELECT host, port, protocol FROM proxies ORDER BY RANDOM() LIMIT 1 `); const proxy = proxyResult.rows[0]; const proxyUrl = `${proxy.protocol}://${proxy.host}:${proxy.port}`; console.log('🔌 Proxy:', `${proxy.host}:${proxy.port}`); // Launch browser with proxy browser = await puppeteer.launch({ headless: true, args: [ '--no-sandbox', '--disable-setuid-sandbox', `--proxy-server=${proxyUrl}` ] }); const page = await browser.newPage(); // Mobile Chrome UA const mobileUA = 'Mozilla/5.0 (Linux; Android 10; SM-G973F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Mobile Safari/537.36'; await page.setUserAgent(mobileUA); console.log('📱 User-Agent:', mobileUA); console.log(''); const url = 'https://curaleaf.com/stores/curaleaf-dispensary-phoenix-airport/brands'; console.log('🌐 Going to:', url); console.log(''); await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 }); // Check if we're on age gate const currentUrl = page.url(); console.log('📍 Current URL:', currentUrl); if (currentUrl.includes('/age-gate')) { console.log('🔒 Age gate detected, handling...'); // Wait for state selector to appear await page.waitForSelector('button[role="combobox"]', { timeout: 10000 }); // Click state selector dropdown await page.click('button[role="combobox"]'); console.log(' ✅ Clicked state dropdown'); await page.waitForTimeout(1000); // Try to find and click Arizona option const arizonaClicked = await page.evaluate(() => { // Look for Arizona in the dropdown options const options = Array.from(document.querySelectorAll('[role="option"]')); const azOption = options.find(opt => opt.textContent?.toLowerCase().includes('arizona') || opt.textContent?.toLowerCase().includes('az') ); if (azOption) { (azOption as HTMLElement).click(); return true; } return false; }); if (arizonaClicked) { console.log(' ✅ Selected Arizona'); await page.waitForTimeout(2000); // Look for submit/enter button const submitButtonClicked = await page.evaluate(() => { // Look for submit button - could be various selectors const possibleSelectors = [ 'button[type="submit"]', 'button:contains("Enter")', 'button:contains("Submit")', 'button:contains("Continue")', 'a[aria-label*="age"]', 'button' ]; for (const selector of possibleSelectors) { const buttons = Array.from(document.querySelectorAll('button, a')); const submitBtn = buttons.find(btn => { const text = btn.textContent?.toLowerCase() || ''; const ariaLabel = btn.getAttribute('aria-label')?.toLowerCase() || ''; return text.includes('enter') || text.includes('submit') || text.includes('continue') || ariaLabel.includes('age'); }); if (submitBtn) { (submitBtn as HTMLElement).click(); return true; } } return false; }); if (submitButtonClicked) { console.log(' ✅ Clicked submit button'); // Wait for navigation try { await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 30000 }); console.log(' ✅ Age gate passed!'); console.log(' 📍 New URL:', page.url()); } catch (navError) { console.log(' ⚠️ Navigation timeout - checking current page...'); console.log(' 📍 Current URL:', page.url()); } } else { console.log(' ⚠️ Could not find submit button, checking if redirect happened anyway...'); await page.waitForTimeout(3000); console.log(' 📍 Current URL:', page.url()); } } else { console.log(' ❌ Could not find Arizona option'); // Debug: show what options are available const availableOptions = await page.evaluate(() => { return Array.from(document.querySelectorAll('[role="option"]')) .map(opt => opt.textContent?.trim()) .filter(Boolean); }); console.log(' Available options:', availableOptions); } } // Now try to scrape brands console.log(''); console.log('📦 Scraping brands...'); await page.waitForTimeout(3000); const brands = await page.evaluate(() => { const selectors = [ '[data-testid*="brand"]', '[class*="Brand"]', '[class*="brand"]', 'a[href*="/brand/"]', '.brand-card', '.brand-item' ]; const found = new Set(); selectors.forEach(selector => { document.querySelectorAll(selector).forEach(el => { const text = el.textContent?.trim(); if (text && text.length > 0 && text.length < 50) { found.add(text); } }); }); return Array.from(found); }); console.log(`Found ${brands.length} brands`); if (brands.length > 0) { console.log('─'.repeat(60)); brands.forEach((b, i) => console.log(` ${i + 1}. ${b}`)); console.log('─'.repeat(60)); } else { // Debug: show page content const pageData = await page.evaluate(() => ({ title: document.title, url: window.location.href, bodyText: document.body.innerText.substring(0, 500) })); console.log(''); console.log('📄 PAGE DEBUG:'); console.log('Title:', pageData.title); console.log('URL:', pageData.url); console.log('Text preview:', pageData.bodyText); } } catch (error: any) { console.error('❌ Error:', error.message); } finally { if (browser) await browser.close(); await pool.end(); } } scrapeWithAgeGate();