Files
cannaiq/backend/scripts/test-iheartjane.ts
Kelly 023cfc127f fix(preflight): Apply stored fingerprint to task browser
- Add WorkerFingerprint interface with timezone, city, state, ip, locale
- Store fingerprint in TaskWorker after preflight passes
- Pass fingerprint through TaskContext to handlers
- Apply timezone via CDP and locale via Accept-Language header
- Ensures browser fingerprint matches proxy IP location

This fixes anti-detect detection where timezone/locale mismatch
with proxy IP was getting blocked by Cloudflare.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 16:40:52 -07:00

189 lines
6.3 KiB
TypeScript

/**
* One-off script to test iHeartJane scraping
* Mimics remote worker: Puppeteer + stealth + proxy
*
* Usage: npx ts-node scripts/test-iheartjane.ts
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
const TARGET_URL = 'https://theflowershopusa.com/mesa/menu/';
const STORE_ID = 2788;
async function main() {
console.log('[iHeartJane Test] Starting...');
// No proxy for local testing
const browser = await puppeteer.launch({
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-blink-features=AutomationControlled',
],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Intercept network requests to capture API calls
const apiResponses: any[] = [];
await page.setRequestInterception(true);
page.on('request', (req) => {
// Block heavy resources
const type = req.resourceType();
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
req.abort();
} else {
req.continue();
}
});
page.on('response', async (response) => {
const url = response.url();
const contentType = response.headers()['content-type'] || '';
// Capture any JSON response from iheartjane domains
if ((url.includes('iheartjane.com') || url.includes('algolia')) && contentType.includes('json')) {
try {
const json = await response.json();
const type = url.includes('store') ? 'STORE' :
url.includes('product') ? 'PRODUCT' :
url.includes('algolia') ? 'ALGOLIA' : 'API';
apiResponses.push({ type, url, data: json });
console.log(`[${type}] ${url.substring(0, 120)}...`);
} catch {
// Not JSON
}
}
});
console.log(`[iHeartJane Test] Navigating to ${TARGET_URL}`);
try {
await page.goto(TARGET_URL, {
waitUntil: 'networkidle2',
timeout: 60000,
});
console.log('[iHeartJane Test] Menu page loaded, waiting for data...');
// Wait a bit for all API calls to complete
await new Promise(r => setTimeout(r, 3000));
// Also try to get store info by visiting the store page
console.log('[iHeartJane Test] Fetching store info...');
const storeInfoUrl = `https://api.iheartjane.com/v1/stores/${STORE_ID}`;
// Try to fetch store info via page.evaluate (uses browser context)
const storeInfo = await page.evaluate(async (storeId) => {
try {
const resp = await fetch(`https://api.iheartjane.com/v1/stores/${storeId}`);
if (resp.ok) return await resp.json();
return { error: resp.status };
} catch (e: any) {
return { error: e.message };
}
}, STORE_ID);
if (storeInfo && !storeInfo.error) {
apiResponses.push({ type: 'STORE_DIRECT', url: storeInfoUrl, data: storeInfo });
console.log('[STORE_DIRECT] Got store info via fetch');
} else {
console.log(`[STORE_DIRECT] Failed: ${JSON.stringify(storeInfo)}`);
}
console.log('[iHeartJane Test] Processing results...');
// Wait for products to load
await page.waitForSelector('[data-testid="product-card"], .product-card, [class*="ProductCard"]', {
timeout: 30000,
}).catch(() => console.log('[iHeartJane Test] No product cards found via selector'));
// Try to extract product data from the page
const products = await page.evaluate(() => {
// Look for product data in various places
const results: any[] = [];
// Method 1: Look for __INITIAL_STATE__ or similar
const scripts = Array.from(document.querySelectorAll('script'));
for (const script of scripts) {
const text = script.textContent || '';
if (text.includes('products') && text.includes('price')) {
try {
// Try to find JSON object
const match = text.match(/\{[\s\S]*"products"[\s\S]*\}/);
if (match) {
results.push({ source: 'script', data: match[0].substring(0, 500) });
}
} catch {}
}
}
// Method 2: Look for product elements in DOM
const productElements = document.querySelectorAll('[data-testid="product-card"], .product-card, [class*="product"]');
for (const el of Array.from(productElements).slice(0, 5)) {
const name = el.querySelector('[class*="name"], h3, h4')?.textContent;
const price = el.querySelector('[class*="price"]')?.textContent;
if (name) {
results.push({ source: 'dom', name, price });
}
}
return results;
});
console.log('\n[iHeartJane Test] === RESULTS ===');
console.log(`Total API responses captured: ${apiResponses.length}`);
// Group by type
const byType: Record<string, any[]> = {};
for (const r of apiResponses) {
byType[r.type] = byType[r.type] || [];
byType[r.type].push(r);
}
for (const [type, items] of Object.entries(byType)) {
console.log(`\n--- ${type} (${items.length} responses) ---`);
for (const item of items) {
console.log(`URL: ${item.url}`);
// Show structure
if (item.data.hits) {
console.log(` Products: ${item.data.hits.length} hits`);
if (item.data.hits[0]) {
console.log(` Fields: ${Object.keys(item.data.hits[0]).join(', ')}`);
}
} else if (item.data.store) {
console.log(` Store: ${JSON.stringify(item.data.store, null, 2).substring(0, 1000)}`);
} else {
console.log(` Keys: ${Object.keys(item.data).join(', ')}`);
}
}
}
// Write full data to file
const fs = await import('fs');
fs.writeFileSync('/tmp/iheartjane-data.json', JSON.stringify(apiResponses, null, 2));
console.log('\n[iHeartJane Test] Full data saved to /tmp/iheartjane-data.json');
// Take screenshot
await page.screenshot({ path: '/tmp/iheartjane-test.png', fullPage: false });
console.log('[iHeartJane Test] Screenshot saved to /tmp/iheartjane-test.png');
} catch (error: any) {
console.error('[iHeartJane Test] Error:', error.message);
await page.screenshot({ path: '/tmp/iheartjane-error.png' });
} finally {
await browser.close();
}
console.log('[iHeartJane Test] Done');
}
main().catch(console.error);