Files
cannaiq/backend/test-dutchie-simple.ts
2025-11-28 19:45:44 -07:00

135 lines
4.5 KiB
TypeScript

import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
async function testScrape() {
let browser;
try {
console.log('Launching browser...\n');
browser = await puppeteer.launch({
headless: 'new',
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage'
]
});
const page = await browser.newPage();
await page.setUserAgent('Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)');
// Track all network requests - MUST be set up BEFORE navigation
const apiResponses: any[] = [];
const allRequests: string[] = [];
page.on('response', async response => {
const url = response.url();
allRequests.push(url);
// Log ALL JSON responses to see what we're missing
try {
const contentType = response.headers()['content-type'] || '';
if (contentType.includes('application/json')) {
console.log(`📡 JSON response: ${url.substring(0, 100)}...`);
const data = await response.json();
console.log(` Status: ${response.status()}`);
console.log(` Keys: ${Object.keys(data).join(', ')}`);
// Store all JSON responses
apiResponses.push({ url, data });
}
} catch (e) {
// Not JSON or failed to parse
}
});
const testUrl = 'https://curaleaf.com/stores/curaleaf-dispensary-phoenix-airport';
console.log(`Navigating to: ${testUrl}`);
console.log('(API calls will be logged as they happen)\n');
await page.goto(testUrl, { waitUntil: 'domcontentloaded', timeout: 60000 });
// Check for Dutchie
const isDutchie = await page.evaluate(() => {
return typeof (window as any).reactEnv !== 'undefined';
});
console.log(`\nIs Dutchie menu: ${isDutchie}`);
if (isDutchie) {
// Get reactEnv
const reactEnv = await page.evaluate(() => {
return (window as any).reactEnv;
});
console.log('\nreactEnv keys:', Object.keys(reactEnv).join(', '));
console.log('dispensaryId:', reactEnv.dispensaryId);
console.log('retailerId:', reactEnv.retailerId);
// Check if there's any product data in window or __NEXT_DATA__
const pageData = await page.evaluate(() => {
return {
hasWindow: typeof window !== 'undefined',
hasNextData: typeof (window as any).__NEXT_DATA__ !== 'undefined',
nextDataKeys: (window as any).__NEXT_DATA__ ? Object.keys((window as any).__NEXT_DATA__) : [],
windowKeys: Object.keys(window).filter(k => k.includes('product') || k.includes('Product') || k.includes('dutchie') || k.includes('Dutchie')).slice(0, 20)
};
});
console.log('\nPage data analysis:');
console.log('Has __NEXT_DATA__:', pageData.hasNextData);
if (pageData.hasNextData) {
console.log('__NEXT_DATA__ keys:', pageData.nextDataKeys.join(', '));
}
console.log('Product-related window keys:', pageData.windowKeys.join(', '));
// Scroll and wait
console.log('\nScrolling page...');
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight / 2));
await page.waitForTimeout(5000);
console.log('\n📊 API Responses collected:', apiResponses.length);
console.log('Total network requests made:', allRequests.length);
// Analyze responses for product data
for (const resp of apiResponses) {
console.log(`\nAnalyzing: ${resp.url.substring(0, 80)}`);
console.log(`Top-level keys: ${Object.keys(resp.data).join(', ')}`);
// Check for products
if (resp.data.data) {
console.log(` data keys: ${Object.keys(resp.data.data).join(', ')}`);
if (resp.data.data.filteredProducts) {
console.log(' ✅ FOUND filteredProducts!');
const products = resp.data.data.filteredProducts.products || [];
console.log(` Products count: ${products.length}`);
if (products.length > 0) {
const brands = new Set();
products.forEach((p: any) => {
if (p.brand) brands.add(p.brand);
if (p.brandName) brands.add(p.brandName);
});
console.log(` Unique brands: ${Array.from(brands).join(', ')}`);
}
}
}
}
}
} catch (error: any) {
console.error('Error:', error.message);
} finally {
if (browser) {
await browser.close();
}
}
}
testScrape();