135 lines
4.5 KiB
TypeScript
135 lines
4.5 KiB
TypeScript
import puppeteer from 'puppeteer-extra';
|
|
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|
|
|
puppeteer.use(StealthPlugin());
|
|
|
|
async function testScrape() {
|
|
let browser;
|
|
|
|
try {
|
|
console.log('Launching browser...\n');
|
|
|
|
browser = await puppeteer.launch({
|
|
headless: 'new',
|
|
args: [
|
|
'--no-sandbox',
|
|
'--disable-setuid-sandbox',
|
|
'--disable-dev-shm-usage'
|
|
]
|
|
});
|
|
|
|
const page = await browser.newPage();
|
|
await page.setUserAgent('Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)');
|
|
|
|
// Track all network requests - MUST be set up BEFORE navigation
|
|
const apiResponses: any[] = [];
|
|
const allRequests: string[] = [];
|
|
|
|
page.on('response', async response => {
|
|
const url = response.url();
|
|
allRequests.push(url);
|
|
|
|
// Log ALL JSON responses to see what we're missing
|
|
try {
|
|
const contentType = response.headers()['content-type'] || '';
|
|
if (contentType.includes('application/json')) {
|
|
console.log(`📡 JSON response: ${url.substring(0, 100)}...`);
|
|
|
|
const data = await response.json();
|
|
console.log(` Status: ${response.status()}`);
|
|
console.log(` Keys: ${Object.keys(data).join(', ')}`);
|
|
|
|
// Store all JSON responses
|
|
apiResponses.push({ url, data });
|
|
}
|
|
} catch (e) {
|
|
// Not JSON or failed to parse
|
|
}
|
|
});
|
|
|
|
const testUrl = 'https://curaleaf.com/stores/curaleaf-dispensary-phoenix-airport';
|
|
console.log(`Navigating to: ${testUrl}`);
|
|
console.log('(API calls will be logged as they happen)\n');
|
|
|
|
await page.goto(testUrl, { waitUntil: 'domcontentloaded', timeout: 60000 });
|
|
|
|
// Check for Dutchie
|
|
const isDutchie = await page.evaluate(() => {
|
|
return typeof (window as any).reactEnv !== 'undefined';
|
|
});
|
|
|
|
console.log(`\nIs Dutchie menu: ${isDutchie}`);
|
|
|
|
if (isDutchie) {
|
|
// Get reactEnv
|
|
const reactEnv = await page.evaluate(() => {
|
|
return (window as any).reactEnv;
|
|
});
|
|
|
|
console.log('\nreactEnv keys:', Object.keys(reactEnv).join(', '));
|
|
console.log('dispensaryId:', reactEnv.dispensaryId);
|
|
console.log('retailerId:', reactEnv.retailerId);
|
|
|
|
// Check if there's any product data in window or __NEXT_DATA__
|
|
const pageData = await page.evaluate(() => {
|
|
return {
|
|
hasWindow: typeof window !== 'undefined',
|
|
hasNextData: typeof (window as any).__NEXT_DATA__ !== 'undefined',
|
|
nextDataKeys: (window as any).__NEXT_DATA__ ? Object.keys((window as any).__NEXT_DATA__) : [],
|
|
windowKeys: Object.keys(window).filter(k => k.includes('product') || k.includes('Product') || k.includes('dutchie') || k.includes('Dutchie')).slice(0, 20)
|
|
};
|
|
});
|
|
|
|
console.log('\nPage data analysis:');
|
|
console.log('Has __NEXT_DATA__:', pageData.hasNextData);
|
|
if (pageData.hasNextData) {
|
|
console.log('__NEXT_DATA__ keys:', pageData.nextDataKeys.join(', '));
|
|
}
|
|
console.log('Product-related window keys:', pageData.windowKeys.join(', '));
|
|
|
|
// Scroll and wait
|
|
console.log('\nScrolling page...');
|
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight / 2));
|
|
await page.waitForTimeout(5000);
|
|
|
|
console.log('\n📊 API Responses collected:', apiResponses.length);
|
|
console.log('Total network requests made:', allRequests.length);
|
|
|
|
// Analyze responses for product data
|
|
for (const resp of apiResponses) {
|
|
console.log(`\nAnalyzing: ${resp.url.substring(0, 80)}`);
|
|
console.log(`Top-level keys: ${Object.keys(resp.data).join(', ')}`);
|
|
|
|
// Check for products
|
|
if (resp.data.data) {
|
|
console.log(` data keys: ${Object.keys(resp.data.data).join(', ')}`);
|
|
|
|
if (resp.data.data.filteredProducts) {
|
|
console.log(' ✅ FOUND filteredProducts!');
|
|
const products = resp.data.data.filteredProducts.products || [];
|
|
console.log(` Products count: ${products.length}`);
|
|
|
|
if (products.length > 0) {
|
|
const brands = new Set();
|
|
products.forEach((p: any) => {
|
|
if (p.brand) brands.add(p.brand);
|
|
if (p.brandName) brands.add(p.brandName);
|
|
});
|
|
console.log(` Unique brands: ${Array.from(brands).join(', ')}`);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
} catch (error: any) {
|
|
console.error('Error:', error.message);
|
|
} finally {
|
|
if (browser) {
|
|
await browser.close();
|
|
}
|
|
}
|
|
}
|
|
|
|
testScrape();
|