From 33feca3138b7a2909b56189df4cf823367832465 Mon Sep 17 00:00:00 2001 From: Kelly Date: Fri, 12 Dec 2025 00:25:39 -0700 Subject: [PATCH] fix(antidetect): Match browser timezone to proxy IP location MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add IP geolocation lookup via ip-api.com to get timezone from proxy IP - Use ipify.org API for reliable proxy IP detection (replaces unreliable fingerprint.com scraping) - Set browser timezone via CDP Emulation.setTimezoneOverride to match proxy location - Add detectedTimezone and detectedLocation to preflight result - Add /api/worker-registry/preflight-test endpoint for smoke testing Fixes timezone mismatch where browser showed America/Phoenix while proxy was in America/New_York 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- backend/src/routes/worker-registry.ts | 56 ++++++++++++++ backend/src/services/puppeteer-preflight.ts | 84 ++++++++++++++++++++- 2 files changed, 139 insertions(+), 1 deletion(-) diff --git a/backend/src/routes/worker-registry.ts b/backend/src/routes/worker-registry.ts index 0de75579..2b949cad 100644 --- a/backend/src/routes/worker-registry.ts +++ b/backend/src/routes/worker-registry.ts @@ -23,6 +23,8 @@ import { Router, Request, Response } from 'express'; import { pool } from '../db/pool'; import os from 'os'; +import { runPuppeteerPreflightWithRetry } from '../services/puppeteer-preflight'; +import { CrawlRotator } from '../services/crawl-rotator'; const router = Router(); @@ -864,4 +866,58 @@ router.get('/pods', async (_req: Request, res: Response) => { } }); +// ============================================================ +// PREFLIGHT SMOKE TEST +// ============================================================ + +/** + * POST /api/worker-registry/preflight-test + * Run an HTTP (Puppeteer) preflight test and return results + * + * This is a smoke test endpoint to verify the preflight system works. + * Returns IP, fingerprint data, bot detection results, and products fetched. + */ +router.post('/preflight-test', async (_req: Request, res: Response) => { + try { + console.log('[PreflightTest] Starting HTTP preflight smoke test...'); + + // Create a temporary CrawlRotator for the test + const crawlRotator = new CrawlRotator(); + + // Run the Puppeteer preflight (with 1 retry) + const startTime = Date.now(); + const result = await runPuppeteerPreflightWithRetry(crawlRotator, 1); + const duration = Date.now() - startTime; + + console.log(`[PreflightTest] Completed in ${duration}ms - passed: ${result.passed}`); + + res.json({ + success: true, + test: 'http_preflight', + duration_ms: duration, + result: { + passed: result.passed, + proxy_ip: result.proxyIp, + fingerprint: result.fingerprint, + bot_detection: result.botDetection, + products_returned: result.productsReturned, + browser_user_agent: result.browserUserAgent, + ip_verified: result.ipVerified, + proxy_available: result.proxyAvailable, + proxy_connected: result.proxyConnected, + antidetect_ready: result.antidetectReady, + response_time_ms: result.responseTimeMs, + error: result.error + } + }); + } catch (error: any) { + console.error('[PreflightTest] Error:', error.message); + res.status(500).json({ + success: false, + test: 'http_preflight', + error: error.message + }); + } +}); + export default router; diff --git a/backend/src/services/puppeteer-preflight.ts b/backend/src/services/puppeteer-preflight.ts index 56538c20..5de42699 100644 --- a/backend/src/services/puppeteer-preflight.ts +++ b/backend/src/services/puppeteer-preflight.ts @@ -26,6 +26,34 @@ const TEST_PLATFORM_ID = '6405ef617056e8014d79101b'; const FINGERPRINT_DEMO_URL = 'https://demo.fingerprint.com/'; const AMIUNIQUE_URL = 'https://amiunique.org/fingerprint'; +// IP geolocation API for timezone lookup (free, no key required) +const IP_API_URL = 'http://ip-api.com/json'; + +/** + * Look up timezone from IP address using ip-api.com + * Returns IANA timezone (e.g., 'America/New_York') or null on failure + */ +async function getTimezoneFromIp(ip: string): Promise<{ timezone: string; city?: string; region?: string } | null> { + try { + const axios = require('axios'); + const response = await axios.get(`${IP_API_URL}/${ip}?fields=status,timezone,city,regionName`, { + timeout: 5000, + }); + + if (response.data?.status === 'success' && response.data?.timezone) { + return { + timezone: response.data.timezone, + city: response.data.city, + region: response.data.regionName, + }; + } + return null; + } catch (err: any) { + console.log(`[PuppeteerPreflight] IP geolocation lookup failed: ${err.message}`); + return null; + } +} + export interface PuppeteerPreflightResult extends PreflightResult { method: 'http'; /** Number of products returned (proves API access) */ @@ -42,6 +70,13 @@ export interface PuppeteerPreflightResult extends PreflightResult { expectedProxyIp?: string; /** Whether IP verification passed (detected IP matches proxy) */ ipVerified?: boolean; + /** Detected timezone from IP geolocation */ + detectedTimezone?: string; + /** Detected location from IP geolocation */ + detectedLocation?: { + city?: string; + region?: string; + }; } /** @@ -136,7 +171,52 @@ export async function runPuppeteerPreflight( }; // ========================================================================= - // STEP 1: Visit fingerprint.com demo to verify anti-detect and get IP + // STEP 1a: Get IP address directly via simple API (more reliable than scraping) + // ========================================================================= + console.log(`[PuppeteerPreflight] Getting proxy IP address...`); + try { + const ipApiResponse = await page.evaluate(async () => { + try { + const response = await fetch('https://api.ipify.org?format=json'); + const data = await response.json(); + return { ip: data.ip, error: null }; + } catch (err: any) { + return { ip: null, error: err.message }; + } + }); + + if (ipApiResponse.ip) { + result.proxyIp = ipApiResponse.ip; + result.proxyConnected = true; + console.log(`[PuppeteerPreflight] Detected proxy IP: ${ipApiResponse.ip}`); + + // Look up timezone from IP + const geoData = await getTimezoneFromIp(ipApiResponse.ip); + if (geoData) { + result.detectedTimezone = geoData.timezone; + result.detectedLocation = { city: geoData.city, region: geoData.region }; + console.log(`[PuppeteerPreflight] IP Geolocation: ${geoData.city}, ${geoData.region} (${geoData.timezone})`); + + // Set browser timezone to match proxy location via CDP + try { + const client = await page.target().createCDPSession(); + await client.send('Emulation.setTimezoneOverride', { timezoneId: geoData.timezone }); + console.log(`[PuppeteerPreflight] Browser timezone set to: ${geoData.timezone}`); + } catch (tzErr: any) { + console.log(`[PuppeteerPreflight] Failed to set browser timezone: ${tzErr.message}`); + } + } else { + console.log(`[PuppeteerPreflight] WARNING: Could not determine timezone from IP - timezone mismatch possible`); + } + } else { + console.log(`[PuppeteerPreflight] IP lookup failed: ${ipApiResponse.error || 'unknown error'}`); + } + } catch (ipErr: any) { + console.log(`[PuppeteerPreflight] IP API error: ${ipErr.message}`); + } + + // ========================================================================= + // STEP 1b: Visit fingerprint.com demo to verify anti-detect // ========================================================================= console.log(`[PuppeteerPreflight] Testing anti-detect at ${FINGERPRINT_DEMO_URL}...`); @@ -199,6 +279,8 @@ export async function runPuppeteerPreflight( // Don't fail - residential proxies often show different egress IPs } } + + // Note: Timezone already set earlier via ipify.org IP lookup } if (fingerprintData.visitorId) {