import puppeteer from 'puppeteer-extra'; import StealthPlugin from 'puppeteer-extra-plugin-stealth'; puppeteer.use(StealthPlugin()); async function testScrape() { let browser; try { console.log('Launching browser...\n'); browser = await puppeteer.launch({ headless: 'new', args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage' ] }); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'); // Track all network requests - MUST be set up BEFORE navigation const apiResponses: any[] = []; const allRequests: string[] = []; page.on('response', async response => { const url = response.url(); allRequests.push(url); // Log ALL JSON responses to see what we're missing try { const contentType = response.headers()['content-type'] || ''; if (contentType.includes('application/json')) { console.log(`šŸ“” JSON response: ${url.substring(0, 100)}...`); const data = await response.json(); console.log(` Status: ${response.status()}`); console.log(` Keys: ${Object.keys(data).join(', ')}`); // Store all JSON responses apiResponses.push({ url, data }); } } catch (e) { // Not JSON or failed to parse } }); const testUrl = 'https://curaleaf.com/stores/curaleaf-dispensary-phoenix-airport'; console.log(`Navigating to: ${testUrl}`); console.log('(API calls will be logged as they happen)\n'); await page.goto(testUrl, { waitUntil: 'domcontentloaded', timeout: 60000 }); // Check for Dutchie const isDutchie = await page.evaluate(() => { return typeof (window as any).reactEnv !== 'undefined'; }); console.log(`\nIs Dutchie menu: ${isDutchie}`); if (isDutchie) { // Get reactEnv const reactEnv = await page.evaluate(() => { return (window as any).reactEnv; }); console.log('\nreactEnv keys:', Object.keys(reactEnv).join(', ')); console.log('dispensaryId:', reactEnv.dispensaryId); console.log('retailerId:', reactEnv.retailerId); // Check if there's any product data in window or __NEXT_DATA__ const pageData = await page.evaluate(() => { return { hasWindow: typeof window !== 'undefined', hasNextData: typeof (window as any).__NEXT_DATA__ !== 'undefined', nextDataKeys: (window as any).__NEXT_DATA__ ? Object.keys((window as any).__NEXT_DATA__) : [], windowKeys: Object.keys(window).filter(k => k.includes('product') || k.includes('Product') || k.includes('dutchie') || k.includes('Dutchie')).slice(0, 20) }; }); console.log('\nPage data analysis:'); console.log('Has __NEXT_DATA__:', pageData.hasNextData); if (pageData.hasNextData) { console.log('__NEXT_DATA__ keys:', pageData.nextDataKeys.join(', ')); } console.log('Product-related window keys:', pageData.windowKeys.join(', ')); // Scroll and wait console.log('\nScrolling page...'); await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight / 2)); await page.waitForTimeout(5000); console.log('\nšŸ“Š API Responses collected:', apiResponses.length); console.log('Total network requests made:', allRequests.length); // Analyze responses for product data for (const resp of apiResponses) { console.log(`\nAnalyzing: ${resp.url.substring(0, 80)}`); console.log(`Top-level keys: ${Object.keys(resp.data).join(', ')}`); // Check for products if (resp.data.data) { console.log(` data keys: ${Object.keys(resp.data.data).join(', ')}`); if (resp.data.data.filteredProducts) { console.log(' āœ… FOUND filteredProducts!'); const products = resp.data.data.filteredProducts.products || []; console.log(` Products count: ${products.length}`); if (products.length > 0) { const brands = new Set(); products.forEach((p: any) => { if (p.brand) brands.add(p.brand); if (p.brandName) brands.add(p.brandName); }); console.log(` Unique brands: ${Array.from(brands).join(', ')}`); } } } } } } catch (error: any) { console.error('Error:', error.message); } finally { if (browser) { await browser.close(); } } } testScrape();