/** * One-off script to test iHeartJane scraping * Mimics remote worker: Puppeteer + stealth + proxy * * Usage: npx ts-node scripts/test-iheartjane.ts */ import puppeteer from 'puppeteer-extra'; import StealthPlugin from 'puppeteer-extra-plugin-stealth'; puppeteer.use(StealthPlugin()); const TARGET_URL = 'https://theflowershopusa.com/mesa/menu/'; const STORE_ID = 2788; async function main() { console.log('[iHeartJane Test] Starting...'); // No proxy for local testing const browser = await puppeteer.launch({ headless: true, args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-blink-features=AutomationControlled', ], }); const page = await browser.newPage(); await page.setViewport({ width: 1920, height: 1080 }); // Intercept network requests to capture API calls const apiResponses: any[] = []; await page.setRequestInterception(true); page.on('request', (req) => { // Block heavy resources const type = req.resourceType(); if (['image', 'font', 'media', 'stylesheet'].includes(type)) { req.abort(); } else { req.continue(); } }); page.on('response', async (response) => { const url = response.url(); const contentType = response.headers()['content-type'] || ''; // Capture any JSON response from iheartjane domains if ((url.includes('iheartjane.com') || url.includes('algolia')) && contentType.includes('json')) { try { const json = await response.json(); const type = url.includes('store') ? 'STORE' : url.includes('product') ? 'PRODUCT' : url.includes('algolia') ? 'ALGOLIA' : 'API'; apiResponses.push({ type, url, data: json }); console.log(`[${type}] ${url.substring(0, 120)}...`); } catch { // Not JSON } } }); console.log(`[iHeartJane Test] Navigating to ${TARGET_URL}`); try { await page.goto(TARGET_URL, { waitUntil: 'networkidle2', timeout: 60000, }); console.log('[iHeartJane Test] Menu page loaded, waiting for data...'); // Wait a bit for all API calls to complete await new Promise(r => setTimeout(r, 3000)); // Also try to get store info by visiting the store page console.log('[iHeartJane Test] Fetching store info...'); const storeInfoUrl = `https://api.iheartjane.com/v1/stores/${STORE_ID}`; // Try to fetch store info via page.evaluate (uses browser context) const storeInfo = await page.evaluate(async (storeId) => { try { const resp = await fetch(`https://api.iheartjane.com/v1/stores/${storeId}`); if (resp.ok) return await resp.json(); return { error: resp.status }; } catch (e: any) { return { error: e.message }; } }, STORE_ID); if (storeInfo && !storeInfo.error) { apiResponses.push({ type: 'STORE_DIRECT', url: storeInfoUrl, data: storeInfo }); console.log('[STORE_DIRECT] Got store info via fetch'); } else { console.log(`[STORE_DIRECT] Failed: ${JSON.stringify(storeInfo)}`); } console.log('[iHeartJane Test] Processing results...'); // Wait for products to load await page.waitForSelector('[data-testid="product-card"], .product-card, [class*="ProductCard"]', { timeout: 30000, }).catch(() => console.log('[iHeartJane Test] No product cards found via selector')); // Try to extract product data from the page const products = await page.evaluate(() => { // Look for product data in various places const results: any[] = []; // Method 1: Look for __INITIAL_STATE__ or similar const scripts = Array.from(document.querySelectorAll('script')); for (const script of scripts) { const text = script.textContent || ''; if (text.includes('products') && text.includes('price')) { try { // Try to find JSON object const match = text.match(/\{[\s\S]*"products"[\s\S]*\}/); if (match) { results.push({ source: 'script', data: match[0].substring(0, 500) }); } } catch {} } } // Method 2: Look for product elements in DOM const productElements = document.querySelectorAll('[data-testid="product-card"], .product-card, [class*="product"]'); for (const el of Array.from(productElements).slice(0, 5)) { const name = el.querySelector('[class*="name"], h3, h4')?.textContent; const price = el.querySelector('[class*="price"]')?.textContent; if (name) { results.push({ source: 'dom', name, price }); } } return results; }); console.log('\n[iHeartJane Test] === RESULTS ==='); console.log(`Total API responses captured: ${apiResponses.length}`); // Group by type const byType: Record = {}; for (const r of apiResponses) { byType[r.type] = byType[r.type] || []; byType[r.type].push(r); } for (const [type, items] of Object.entries(byType)) { console.log(`\n--- ${type} (${items.length} responses) ---`); for (const item of items) { console.log(`URL: ${item.url}`); // Show structure if (item.data.hits) { console.log(` Products: ${item.data.hits.length} hits`); if (item.data.hits[0]) { console.log(` Fields: ${Object.keys(item.data.hits[0]).join(', ')}`); } } else if (item.data.store) { console.log(` Store: ${JSON.stringify(item.data.store, null, 2).substring(0, 1000)}`); } else { console.log(` Keys: ${Object.keys(item.data).join(', ')}`); } } } // Write full data to file const fs = await import('fs'); fs.writeFileSync('/tmp/iheartjane-data.json', JSON.stringify(apiResponses, null, 2)); console.log('\n[iHeartJane Test] Full data saved to /tmp/iheartjane-data.json'); // Take screenshot await page.screenshot({ path: '/tmp/iheartjane-test.png', fullPage: false }); console.log('[iHeartJane Test] Screenshot saved to /tmp/iheartjane-test.png'); } catch (error: any) { console.error('[iHeartJane Test] Error:', error.message); await page.screenshot({ path: '/tmp/iheartjane-error.png' }); } finally { await browser.close(); } console.log('[iHeartJane Test] Done'); } main().catch(console.error);