import puppeteer from 'puppeteer'; import fs from 'fs'; async function sleep(ms: number): Promise { return new Promise(resolve => setTimeout(resolve, ms)); } async function main() { const browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox'], }); const page = await browser.newPage(); await page.setViewport({ width: 1920, height: 1080 }); console.log('Loading page...\n'); await page.goto('https://shop.bestdispensary.com/shop', { waitUntil: 'networkidle2', timeout: 60000 }); await sleep(3000); // Bypass age gate const ageGate = await page.$('[data-testid="age-gate-modal"]'); if (ageGate) { console.log('Bypassing age gate...'); const btn = await page.$('[data-testid="age-gate-submit-button"]'); if (btn) await btn.click(); await sleep(3000); } // Extract __NEXT_DATA__ console.log('\n=== NEXT.JS DATA ===\n'); const nextData = await page.evaluate(() => { const script = document.getElementById('__NEXT_DATA__'); if (script) { try { return JSON.parse(script.textContent || ''); } catch { return null; } } return null; }); if (nextData) { console.log('Top keys: ' + Object.keys(nextData).join(', ')); if (nextData.props?.pageProps) { console.log('pageProps keys: ' + Object.keys(nextData.props.pageProps).join(', ')); // Look for products const pp = nextData.props.pageProps; if (pp.products) { console.log('\nFound products: ' + pp.products.length); if (pp.products[0]) { console.log('Product fields: ' + Object.keys(pp.products[0]).join(', ')); console.log('\nSample:\n' + JSON.stringify(pp.products[0], null, 2)); } } if (pp.initialProducts) { console.log('\nFound initialProducts: ' + pp.initialProducts.length); } if (pp.data) { console.log('\nFound data: ' + (Array.isArray(pp.data) ? pp.data.length + ' items' : typeof pp.data)); } } } // Also check window object console.log('\n=== WINDOW GLOBALS ===\n'); const windowData = await page.evaluate(() => { const win = window as any; const result: any = {}; // Common patterns for storing product data const patterns = ['products', 'items', 'data', 'state', 'store', 'redux', 'apollo']; Object.keys(win).forEach(key => { const lowerKey = key.toLowerCase(); if (patterns.some(p => lowerKey.includes(p))) { try { const val = win[key]; if (typeof val === 'object' && val !== null) { result[key] = { type: Array.isArray(val) ? 'array' : 'object', keys: Object.keys(val).slice(0, 10), length: Array.isArray(val) ? val.length : undefined, }; } } catch {} } }); return result; }); console.log('Window globals with data-like names:'); Object.entries(windowData).forEach(([k, v]: [string, any]) => { console.log(' ' + k + ': ' + v.type + (v.length ? ' (' + v.length + ')' : '') + ' - keys: ' + v.keys?.join(', ')); }); // Try to find React state console.log('\n=== EXTRACTING FROM DOM ===\n'); const domProducts = await page.evaluate(() => { const products: any[] = []; document.querySelectorAll('a[href*="/product/"]').forEach((card: Element) => { const product: any = {}; product.href = card.getAttribute('href'); product.name = card.querySelector('h3, h4, h5')?.textContent?.trim(); // Get all text const allText = card.textContent || ''; // Extract THC % const thcMatch = allText.match(/(\d+(?:\.\d+)?)\s*%/); if (thcMatch) product.thc = thcMatch[1]; // Extract price const priceMatch = allText.match(/\$(\d+(?:\.\d+)?)/); if (priceMatch) product.price = priceMatch[1]; // Extract weight const weightMatch = allText.match(/(\d+(?:\.\d+)?)\s*[gG]/); if (weightMatch) product.weight = weightMatch[1] + 'g'; // Get brand from card const brandEl = card.querySelector('[class*="brand"]'); product.brand = brandEl?.textContent?.trim(); // Get strain type const strainTypes = ['Indica', 'Sativa', 'Hybrid', 'I/S', 'S/I', 'CBD']; strainTypes.forEach(st => { if (allText.includes(st)) product.strainType = st; }); // Get image const img = card.querySelector('img'); product.image = img?.getAttribute('src'); products.push(product); }); return products; }); console.log('Products from DOM: ' + domProducts.length); if (domProducts.length > 0) { console.log('\nSample:\n' + JSON.stringify(domProducts[0], null, 2)); // Show variety console.log('\n=== DATA QUALITY ==='); const withThc = domProducts.filter(p => p.thc).length; const withPrice = domProducts.filter(p => p.price).length; const withBrand = domProducts.filter(p => p.brand).length; const withStrain = domProducts.filter(p => p.strainType).length; console.log('With THC%: ' + withThc + '/' + domProducts.length); console.log('With Price: ' + withPrice + '/' + domProducts.length); console.log('With Brand: ' + withBrand + '/' + domProducts.length); console.log('With Strain: ' + withStrain + '/' + domProducts.length); } await browser.close(); } main();