- Add WorkerFingerprint interface with timezone, city, state, ip, locale - Store fingerprint in TaskWorker after preflight passes - Pass fingerprint through TaskContext to handlers - Apply timezone via CDP and locale via Accept-Language header - Ensures browser fingerprint matches proxy IP location This fixes anti-detect detection where timezone/locale mismatch with proxy IP was getting blocked by Cloudflare. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
189 lines
6.3 KiB
TypeScript
189 lines
6.3 KiB
TypeScript
/**
|
|
* One-off script to test iHeartJane scraping
|
|
* Mimics remote worker: Puppeteer + stealth + proxy
|
|
*
|
|
* Usage: npx ts-node scripts/test-iheartjane.ts
|
|
*/
|
|
|
|
import puppeteer from 'puppeteer-extra';
|
|
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|
|
|
puppeteer.use(StealthPlugin());
|
|
|
|
const TARGET_URL = 'https://theflowershopusa.com/mesa/menu/';
|
|
const STORE_ID = 2788;
|
|
|
|
async function main() {
|
|
console.log('[iHeartJane Test] Starting...');
|
|
|
|
// No proxy for local testing
|
|
const browser = await puppeteer.launch({
|
|
headless: true,
|
|
args: [
|
|
'--no-sandbox',
|
|
'--disable-setuid-sandbox',
|
|
'--disable-dev-shm-usage',
|
|
'--disable-blink-features=AutomationControlled',
|
|
],
|
|
});
|
|
|
|
const page = await browser.newPage();
|
|
await page.setViewport({ width: 1920, height: 1080 });
|
|
|
|
// Intercept network requests to capture API calls
|
|
const apiResponses: any[] = [];
|
|
|
|
await page.setRequestInterception(true);
|
|
page.on('request', (req) => {
|
|
// Block heavy resources
|
|
const type = req.resourceType();
|
|
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
|
|
req.abort();
|
|
} else {
|
|
req.continue();
|
|
}
|
|
});
|
|
|
|
page.on('response', async (response) => {
|
|
const url = response.url();
|
|
const contentType = response.headers()['content-type'] || '';
|
|
|
|
// Capture any JSON response from iheartjane domains
|
|
if ((url.includes('iheartjane.com') || url.includes('algolia')) && contentType.includes('json')) {
|
|
try {
|
|
const json = await response.json();
|
|
const type = url.includes('store') ? 'STORE' :
|
|
url.includes('product') ? 'PRODUCT' :
|
|
url.includes('algolia') ? 'ALGOLIA' : 'API';
|
|
apiResponses.push({ type, url, data: json });
|
|
console.log(`[${type}] ${url.substring(0, 120)}...`);
|
|
} catch {
|
|
// Not JSON
|
|
}
|
|
}
|
|
});
|
|
|
|
console.log(`[iHeartJane Test] Navigating to ${TARGET_URL}`);
|
|
|
|
try {
|
|
await page.goto(TARGET_URL, {
|
|
waitUntil: 'networkidle2',
|
|
timeout: 60000,
|
|
});
|
|
|
|
console.log('[iHeartJane Test] Menu page loaded, waiting for data...');
|
|
|
|
// Wait a bit for all API calls to complete
|
|
await new Promise(r => setTimeout(r, 3000));
|
|
|
|
// Also try to get store info by visiting the store page
|
|
console.log('[iHeartJane Test] Fetching store info...');
|
|
const storeInfoUrl = `https://api.iheartjane.com/v1/stores/${STORE_ID}`;
|
|
|
|
// Try to fetch store info via page.evaluate (uses browser context)
|
|
const storeInfo = await page.evaluate(async (storeId) => {
|
|
try {
|
|
const resp = await fetch(`https://api.iheartjane.com/v1/stores/${storeId}`);
|
|
if (resp.ok) return await resp.json();
|
|
return { error: resp.status };
|
|
} catch (e: any) {
|
|
return { error: e.message };
|
|
}
|
|
}, STORE_ID);
|
|
|
|
if (storeInfo && !storeInfo.error) {
|
|
apiResponses.push({ type: 'STORE_DIRECT', url: storeInfoUrl, data: storeInfo });
|
|
console.log('[STORE_DIRECT] Got store info via fetch');
|
|
} else {
|
|
console.log(`[STORE_DIRECT] Failed: ${JSON.stringify(storeInfo)}`);
|
|
}
|
|
|
|
console.log('[iHeartJane Test] Processing results...');
|
|
|
|
// Wait for products to load
|
|
await page.waitForSelector('[data-testid="product-card"], .product-card, [class*="ProductCard"]', {
|
|
timeout: 30000,
|
|
}).catch(() => console.log('[iHeartJane Test] No product cards found via selector'));
|
|
|
|
// Try to extract product data from the page
|
|
const products = await page.evaluate(() => {
|
|
// Look for product data in various places
|
|
const results: any[] = [];
|
|
|
|
// Method 1: Look for __INITIAL_STATE__ or similar
|
|
const scripts = Array.from(document.querySelectorAll('script'));
|
|
for (const script of scripts) {
|
|
const text = script.textContent || '';
|
|
if (text.includes('products') && text.includes('price')) {
|
|
try {
|
|
// Try to find JSON object
|
|
const match = text.match(/\{[\s\S]*"products"[\s\S]*\}/);
|
|
if (match) {
|
|
results.push({ source: 'script', data: match[0].substring(0, 500) });
|
|
}
|
|
} catch {}
|
|
}
|
|
}
|
|
|
|
// Method 2: Look for product elements in DOM
|
|
const productElements = document.querySelectorAll('[data-testid="product-card"], .product-card, [class*="product"]');
|
|
for (const el of Array.from(productElements).slice(0, 5)) {
|
|
const name = el.querySelector('[class*="name"], h3, h4')?.textContent;
|
|
const price = el.querySelector('[class*="price"]')?.textContent;
|
|
if (name) {
|
|
results.push({ source: 'dom', name, price });
|
|
}
|
|
}
|
|
|
|
return results;
|
|
});
|
|
|
|
console.log('\n[iHeartJane Test] === RESULTS ===');
|
|
console.log(`Total API responses captured: ${apiResponses.length}`);
|
|
|
|
// Group by type
|
|
const byType: Record<string, any[]> = {};
|
|
for (const r of apiResponses) {
|
|
byType[r.type] = byType[r.type] || [];
|
|
byType[r.type].push(r);
|
|
}
|
|
|
|
for (const [type, items] of Object.entries(byType)) {
|
|
console.log(`\n--- ${type} (${items.length} responses) ---`);
|
|
for (const item of items) {
|
|
console.log(`URL: ${item.url}`);
|
|
// Show structure
|
|
if (item.data.hits) {
|
|
console.log(` Products: ${item.data.hits.length} hits`);
|
|
if (item.data.hits[0]) {
|
|
console.log(` Fields: ${Object.keys(item.data.hits[0]).join(', ')}`);
|
|
}
|
|
} else if (item.data.store) {
|
|
console.log(` Store: ${JSON.stringify(item.data.store, null, 2).substring(0, 1000)}`);
|
|
} else {
|
|
console.log(` Keys: ${Object.keys(item.data).join(', ')}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Write full data to file
|
|
const fs = await import('fs');
|
|
fs.writeFileSync('/tmp/iheartjane-data.json', JSON.stringify(apiResponses, null, 2));
|
|
console.log('\n[iHeartJane Test] Full data saved to /tmp/iheartjane-data.json');
|
|
|
|
// Take screenshot
|
|
await page.screenshot({ path: '/tmp/iheartjane-test.png', fullPage: false });
|
|
console.log('[iHeartJane Test] Screenshot saved to /tmp/iheartjane-test.png');
|
|
|
|
} catch (error: any) {
|
|
console.error('[iHeartJane Test] Error:', error.message);
|
|
await page.screenshot({ path: '/tmp/iheartjane-error.png' });
|
|
} finally {
|
|
await browser.close();
|
|
}
|
|
|
|
console.log('[iHeartJane Test] Done');
|
|
}
|
|
|
|
main().catch(console.error);
|