feat(antidetect): Use actual proxy IP location for browser fingerprint

- Replace hardcoded state coords with IP geolocation lookup via ip-api.com
- Browser timezone and geolocation now match actual proxy IP location
- City-level proxy targeting already in place via Evomi _city- parameter
- Add browser-factory.ts shared utility for antidetect setup

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-13 23:49:25 -07:00
parent aac1181f3d
commit 38d7678a2e
2 changed files with 315 additions and 75 deletions

View File

@@ -0,0 +1,241 @@
/**
* Browser Factory - Centralized browser setup with antidetect features
*
* Creates Puppeteer browser instances with:
* - Stealth plugin for antidetect
* - Proxy configuration
* - Timezone spoofing (matches proxy geo)
* - Geolocation spoofing (matches proxy geo)
* - Resource blocking for bandwidth optimization
*
* Use this factory for all browser-based scraping to ensure consistent antidetect.
*/
/**
* Look up IP geolocation via ip-api.com
* Returns IP address, timezone, coordinates, city, and region
*/
async function getProxyGeoData(page: any): Promise<GeoData | null> {
try {
const geoData = await page.evaluate(async () => {
const response = await fetch('http://ip-api.com/json?fields=status,query,timezone,city,regionName,lat,lon');
return response.json();
});
if (geoData?.status === 'success') {
return {
timezone: geoData.timezone,
city: geoData.city,
region: geoData.regionName,
latitude: geoData.lat,
longitude: geoData.lon,
};
}
return null;
} catch (err: any) {
console.log(`[BrowserFactory] IP geolocation lookup failed: ${err.message}`);
return null;
}
}
// Domains to block - analytics, tracking, feature flags
const BLOCKED_DOMAINS = [
'googletagmanager.com',
'google-analytics.com',
'launchdarkly.com',
'assets2.dutchie.com',
'sentry.io',
'segment.io',
'segment.com',
'amplitude.com',
'mixpanel.com',
'hotjar.com',
'fullstory.com',
];
export interface BrowserConfig {
proxyUrl?: string;
/** Target domains for geolocation permission (default: dutchie.com) */
targetDomains?: string[];
/** Block images/fonts/media for bandwidth (default: true) */
blockResources?: boolean;
/** Headless mode (default: 'new') */
headless?: boolean | 'new';
}
export interface GeoData {
timezone: string;
city?: string;
region?: string;
latitude?: number;
longitude?: number;
}
export interface BrowserSession {
browser: any;
page: any;
proxyIp: string | null;
geoData: GeoData | null;
close: () => Promise<void>;
}
/**
* Create a browser session with full antidetect setup
*
* @param config - Browser configuration options
* @returns Browser session with page, proxy IP, and geo data
*/
export async function createBrowserSession(config: BrowserConfig = {}): Promise<BrowserSession> {
const {
proxyUrl,
targetDomains = ['https://dutchie.com', 'https://api.dutchie.com'],
blockResources = true,
headless = 'new',
} = config;
const puppeteer = require('puppeteer-extra');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
puppeteer.use(StealthPlugin());
// Build browser args
const browserArgs = ['--no-sandbox', '--disable-setuid-sandbox'];
if (proxyUrl) {
const proxyUrlParsed = new URL(proxyUrl);
browserArgs.push(`--proxy-server=${proxyUrlParsed.host}`);
}
// Launch browser with stealth + proxy
const browser = await puppeteer.launch({
headless,
args: browserArgs,
});
const page = await browser.newPage();
// Grant geolocation permission for target domains
const context = browser.defaultBrowserContext();
for (const domain of targetDomains) {
try {
await context.overridePermissions(domain, ['geolocation']);
} catch (err) {
// Ignore permission errors for domains we might not visit
}
}
// Set up request interception for resource blocking
if (blockResources) {
await page.setRequestInterception(true);
page.on('request', (request: any) => {
const url = request.url();
const resourceType = request.resourceType();
// Block by domain
if (BLOCKED_DOMAINS.some(domain => url.includes(domain))) {
request.abort();
return;
}
// Block by resource type (optional - saves bandwidth)
if (['image', 'font', 'media', 'stylesheet'].includes(resourceType)) {
request.abort();
return;
}
request.continue();
});
}
// Set up proxy authentication if needed
if (proxyUrl) {
const proxyUrlParsed = new URL(proxyUrl);
if (proxyUrlParsed.username && proxyUrlParsed.password) {
await page.authenticate({
username: decodeURIComponent(proxyUrlParsed.username),
password: decodeURIComponent(proxyUrlParsed.password),
});
}
}
// Detect proxy IP and get actual geolocation
console.log(`[BrowserFactory] Detecting proxy IP and location...`);
const geoData = await getProxyGeoData(page);
if (!geoData) {
console.log(`[BrowserFactory] Failed to detect proxy location - closing browser`);
await browser.close().catch(() => {});
throw new Error('Failed to detect proxy IP/location');
}
console.log(`[BrowserFactory] Proxy location: ${geoData.city}, ${geoData.region} (${geoData.timezone})`);
try {
const client = await page.target().createCDPSession();
// Spoof timezone to match proxy IP
await client.send('Emulation.setTimezoneOverride', { timezoneId: geoData.timezone });
// Spoof geolocation to match proxy IP
await client.send('Emulation.setGeolocationOverride', {
latitude: geoData.latitude,
longitude: geoData.longitude,
accuracy: 100,
});
console.log(`[BrowserFactory] Antidetect configured: TZ=${geoData.timezone}, Geo=${geoData.latitude},${geoData.longitude}`);
} catch (err: any) {
console.log(`[BrowserFactory] Failed to set antidetect: ${err.message}`);
}
const proxyIp: string | null = null; // IP is in geoData
return {
browser,
page,
proxyIp,
geoData,
close: async () => {
try {
await browser.close();
} catch (err) {
// Ignore close errors
}
},
};
}
/**
* Apply antidetect settings to an existing page
* Use this when you can't use createBrowserSession (e.g., existing browser)
*
* @param page - Puppeteer page instance
*/
export async function applyAntidetectSettings(page: any): Promise<GeoData | null> {
// Get IP and geo data in one call
const geoData = await getProxyGeoData(page);
if (!geoData) {
console.log(`[BrowserFactory] Could not detect IP for antidetect settings`);
return null;
}
try {
const client = await page.target().createCDPSession();
await client.send('Emulation.setTimezoneOverride', { timezoneId: geoData.timezone });
if (geoData.latitude && geoData.longitude) {
await client.send('Emulation.setGeolocationOverride', {
latitude: geoData.latitude,
longitude: geoData.longitude,
accuracy: 100,
});
}
console.log(`[BrowserFactory] Antidetect applied: ${geoData.city}, ${geoData.region}`);
} catch (err: any) {
console.log(`[BrowserFactory] Failed to apply antidetect: ${err.message}`);
}
return geoData;
}

View File

@@ -26,25 +26,38 @@ const TEST_PLATFORM_ID = '6405ef617056e8014d79101b';
const FINGERPRINT_DEMO_URL = 'https://demo.fingerprint.com/';
const AMIUNIQUE_URL = 'https://amiunique.org/fingerprint';
// IP geolocation API for timezone lookup (free, no key required)
// IP geolocation API - returns IP, timezone, lat/lng, city, region in one call
const IP_API_URL = 'http://ip-api.com/json';
interface GeoData {
ip: string;
timezone: string;
city: string;
region: string;
lat: number;
lng: number;
}
/**
* Look up timezone from IP address using ip-api.com
* Returns IANA timezone (e.g., 'America/New_York') or null on failure
* Look up IP geolocation via ip-api.com
* Returns IP address, timezone, coordinates, city, and region
*/
async function getTimezoneFromIp(ip: string): Promise<{ timezone: string; city?: string; region?: string } | null> {
async function getProxyGeoData(page: any): Promise<GeoData | null> {
try {
const axios = require('axios');
const response = await axios.get(`${IP_API_URL}/${ip}?fields=status,timezone,city,regionName`, {
timeout: 5000,
// Use browser to fetch - this goes through the proxy
const geoData = await page.evaluate(async () => {
const response = await fetch('http://ip-api.com/json?fields=status,query,timezone,city,regionName,lat,lon');
return response.json();
});
if (response.data?.status === 'success' && response.data?.timezone) {
if (geoData?.status === 'success') {
return {
timezone: response.data.timezone,
city: response.data.city,
region: response.data.regionName,
ip: geoData.query,
timezone: geoData.timezone,
city: geoData.city,
region: geoData.regionName,
lat: geoData.lat,
lng: geoData.lon,
};
}
return null;
@@ -85,10 +98,12 @@ export interface PuppeteerPreflightResult extends PreflightResult {
*
* @param crawlRotator - CrawlRotator instance to get proxy from pool
* @param customProxyUrl - Optional custom proxy URL (for identity pool system)
* @param targetState - Target state code for geo spoofing (e.g., 'AZ')
*/
export async function runPuppeteerPreflight(
crawlRotator?: CrawlRotator,
customProxyUrl?: string
customProxyUrl?: string,
targetState?: string
): Promise<PuppeteerPreflightResult> {
const result: PuppeteerPreflightResult = {
method: 'http',
@@ -183,6 +198,11 @@ export async function runPuppeteerPreflight(
const page = await browser.newPage();
// Grant geolocation permission so navigator.geolocation works without prompting
const context = browser.defaultBrowserContext();
await context.overridePermissions('https://dutchie.com', ['geolocation']);
await context.overridePermissions('https://api.dutchie.com', ['geolocation']);
// Block unnecessary resources to save bandwidth
await page.setRequestInterception(true);
@@ -241,81 +261,59 @@ export async function runPuppeteerPreflight(
};
// =========================================================================
// STEP 1a: Get IP address directly via simple API (more reliable than scraping)
// STEP 1: Detect proxy IP and get actual geolocation
// =========================================================================
console.log(`[PuppeteerPreflight] Detecting proxy IP and location...`);
const geoData = await getProxyGeoData(page);
if (!geoData) {
result.error = 'Failed to detect proxy IP/location';
console.log(`[PuppeteerPreflight] FAILED - ${result.error}`);
result.responseTimeMs = Date.now() - startTime;
return result;
}
result.proxyIp = geoData.ip;
result.proxyConnected = true;
result.detectedTimezone = geoData.timezone;
result.detectedLocation = { city: geoData.city, region: geoData.region };
console.log(`[PuppeteerPreflight] Proxy IP: ${geoData.ip} - ${geoData.city}, ${geoData.region} (${geoData.timezone})`);
// =========================================================================
// STEP 2: Configure antidetect to match actual proxy location
// =========================================================================
console.log(`[PuppeteerPreflight] Getting proxy IP address...`);
try {
const ipApiResponse = await page.evaluate(async () => {
try {
const response = await fetch('https://api.ipify.org?format=json');
const data = await response.json();
return { ip: data.ip, error: null };
} catch (err: any) {
return { ip: null, error: err.message };
}
const client = await page.target().createCDPSession();
// Spoof timezone to match proxy IP
await client.send('Emulation.setTimezoneOverride', { timezoneId: geoData.timezone });
console.log(`[PuppeteerPreflight] Timezone spoofed: ${geoData.timezone}`);
// Spoof geolocation to match proxy IP
await client.send('Emulation.setGeolocationOverride', {
latitude: geoData.lat,
longitude: geoData.lng,
accuracy: 100,
});
console.log(`[PuppeteerPreflight] Geolocation spoofed: ${geoData.lat}, ${geoData.lng}`);
if (ipApiResponse.ip) {
result.proxyIp = ipApiResponse.ip;
result.proxyConnected = true;
console.log(`[PuppeteerPreflight] Detected proxy IP: ${ipApiResponse.ip}`);
// Look up timezone from IP
const geoData = await getTimezoneFromIp(ipApiResponse.ip);
if (geoData) {
result.detectedTimezone = geoData.timezone;
result.detectedLocation = { city: geoData.city, region: geoData.region };
console.log(`[PuppeteerPreflight] IP Geolocation: ${geoData.city}, ${geoData.region} (${geoData.timezone})`);
// Set browser timezone to match proxy location via CDP
try {
const client = await page.target().createCDPSession();
await client.send('Emulation.setTimezoneOverride', { timezoneId: geoData.timezone });
console.log(`[PuppeteerPreflight] Browser timezone set to: ${geoData.timezone}`);
} catch (tzErr: any) {
console.log(`[PuppeteerPreflight] Failed to set browser timezone: ${tzErr.message}`);
}
} else {
console.log(`[PuppeteerPreflight] WARNING: Could not determine timezone from IP - timezone mismatch possible`);
}
} else {
console.log(`[PuppeteerPreflight] IP lookup failed: ${ipApiResponse.error || 'unknown error'}`);
}
} catch (ipErr: any) {
console.log(`[PuppeteerPreflight] IP API error: ${ipErr.message}`);
result.antidetectReady = true;
} catch (cdpErr: any) {
console.log(`[PuppeteerPreflight] Failed to set timezone/geolocation: ${cdpErr.message}`);
}
// =========================================================================
// STEP 2: Preflight complete - proxy verified via ipify.org
// We skip heavy fingerprint.com/amiunique.org tests - just verify proxy works
// The actual Dutchie test happens at task time.
// STEP 3: Preflight complete
// =========================================================================
// If we got an IP from ipify.org, proxy is working
if (result.proxyIp) {
result.proxyConnected = true;
result.antidetectReady = true; // Assume stealth plugin is working
}
result.responseTimeMs = Date.now() - startTime;
// If we got here with proxyConnected=true and antidetectReady=true, we're good
if (result.proxyConnected && result.antidetectReady) {
if (result.antidetectReady) {
result.passed = true;
console.log(
`[PuppeteerPreflight] PASSED - Proxy connected, anti-detect ready (${result.responseTimeMs}ms)`
);
if (result.proxyIp) {
console.log(`[PuppeteerPreflight] Browser IP via proxy: ${result.proxyIp}`);
}
} else if (result.proxyConnected) {
// Proxy works but anti-detect check failed - still pass (anti-detect is best-effort)
result.passed = true;
result.antidetectReady = true; // Assume ready since proxy works
console.log(
`[PuppeteerPreflight] PASSED - Proxy connected (anti-detect check skipped, ${result.responseTimeMs}ms)`
`[PuppeteerPreflight] PASSED - ${geoData.city}, ${geoData.region} (${result.responseTimeMs}ms)`
);
} else {
result.error = result.error || 'Proxy connection failed';
result.error = result.error || 'Antidetect configuration failed';
console.log(`[PuppeteerPreflight] FAILED - ${result.error}`);
}
} catch (err: any) {
@@ -341,7 +339,8 @@ export async function runPuppeteerPreflight(
export async function runPuppeteerPreflightWithRetry(
crawlRotator?: CrawlRotator,
maxRetries: number = 1,
customProxyUrl?: string
customProxyUrl?: string,
targetState?: string
): Promise<PuppeteerPreflightResult> {
let lastResult: PuppeteerPreflightResult | null = null;
@@ -351,7 +350,7 @@ export async function runPuppeteerPreflightWithRetry(
await new Promise((r) => setTimeout(r, 5000)); // Wait 5s between retries
}
lastResult = await runPuppeteerPreflight(crawlRotator, customProxyUrl);
lastResult = await runPuppeteerPreflight(crawlRotator, customProxyUrl, targetState);
if (lastResult.passed) {
return lastResult;