import puppeteer from 'puppeteer-extra'; import StealthPlugin from 'puppeteer-extra-plugin-stealth'; import { Pool } from 'pg'; puppeteer.use(StealthPlugin()); const pool = new Pool({ connectionString: 'postgresql://sail:password@localhost:5432/dutchie_menus' }); async function scrapeBypassAgeGate() { let browser; try { // Get random proxy const proxyResult = await pool.query(` SELECT host, port, protocol FROM proxies ORDER BY RANDOM() LIMIT 1 `); const proxy = proxyResult.rows[0]; const proxyUrl = `${proxy.protocol}://${proxy.host}:${proxy.port}`; console.log('🔌 Proxy:', `${proxy.host}:${proxy.port}`); // Launch browser with proxy browser = await puppeteer.launch({ headless: true, args: [ '--no-sandbox', '--disable-setuid-sandbox', `--proxy-server=${proxyUrl}` ] }); const page = await browser.newPage(); // Mobile Chrome UA const mobileUA = 'Mozilla/5.0 (Linux; Android 10; SM-G973F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Mobile Safari/537.36'; await page.setUserAgent(mobileUA); console.log('📱 User-Agent:', mobileUA); console.log(''); // Set age gate bypass cookie/localStorage // First, go to the main domain to set cookies console.log('🍪 Setting age verification cookies...'); await page.goto('https://curaleaf.com', { waitUntil: 'domcontentloaded' }); // Set cookies for age verification await page.evaluateOnNewDocument(() => { // Try various localStorage/cookie approaches try { // Set localStorage for age verification localStorage.setItem('age-verified', 'true'); localStorage.setItem('curaleaf-age-verified', 'true'); localStorage.setItem('state', 'arizona'); localStorage.setItem('selectedState', 'arizona'); } catch (e) {} }); // Set cookies manually await page.setCookie( { name: 'age-verified', value: 'true', domain: '.curaleaf.com', path: '/', }, { name: 'curaleaf-age-gate', value: 'passed', domain: '.curaleaf.com', path: '/', }, { name: 'state', value: 'arizona', domain: '.curaleaf.com', path: '/', } ); console.log(' ✅ Cookies set'); console.log(''); const url = 'https://curaleaf.com/stores/curaleaf-dispensary-phoenix-airport/brands'; console.log('🌐 Going to:', url); console.log(''); await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 }); await page.waitForTimeout(5000); // Check current URL const currentUrl = page.url(); console.log('📍 Current URL:', currentUrl); if (currentUrl.includes('/age-gate')) { console.log('⚠️ Still on age gate - cookies didn\'t work'); console.log(''); // Debug: check what cookies/localStorage we have const storageData = await page.evaluate(() => { const cookies = document.cookie; const localStorageItems: any = {}; for (let i = 0; i < localStorage.length; i++) { const key = localStorage.key(i); if (key) { localStorageItems[key] = localStorage.getItem(key); } } return { cookies, localStorage: localStorageItems }; }); console.log('Current cookies:', storageData.cookies); console.log('Current localStorage:', storageData.localStorage); } else { console.log('✅ Age gate bypassed!'); } // Try to scrape brands console.log(''); console.log('📦 Scraping brands...'); const brands = await page.evaluate(() => { const selectors = [ '[data-testid*="brand"]', '[class*="Brand"]', '[class*="brand"]', 'a[href*="/brand/"]', '.brand-card', '.brand-item' ]; const found = new Set(); selectors.forEach(selector => { document.querySelectorAll(selector).forEach(el => { const text = el.textContent?.trim(); if (text && text.length > 0 && text.length < 50) { found.add(text); } }); }); return Array.from(found); }); console.log(`Found ${brands.length} brands`); if (brands.length > 0) { console.log('─'.repeat(60)); brands.forEach((b, i) => console.log(` ${i + 1}. ${b}`)); console.log('─'.repeat(60)); } else { // Debug const pageData = await page.evaluate(() => ({ title: document.title, url: window.location.href, bodyText: document.body.innerText.substring(0, 500), hasNextRoot: document.getElementById('__next') !== null })); console.log(''); console.log('📄 PAGE DEBUG:'); console.log('Title:', pageData.title); console.log('URL:', pageData.url); console.log('Has __next:', pageData.hasNextRoot); console.log('Text preview:', pageData.bodyText); } } catch (error: any) { console.error('❌ Error:', error.message); } finally { if (browser) await browser.close(); await pool.end(); } } scrapeBypassAgeGate();