Compare commits
3 Commits
fix/api-se
...
feat/prefl
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6bcadd9e71 | ||
|
|
a77bf8611a | ||
|
|
33feca3138 |
@@ -23,6 +23,8 @@
|
|||||||
import { Router, Request, Response } from 'express';
|
import { Router, Request, Response } from 'express';
|
||||||
import { pool } from '../db/pool';
|
import { pool } from '../db/pool';
|
||||||
import os from 'os';
|
import os from 'os';
|
||||||
|
import { runPuppeteerPreflightWithRetry } from '../services/puppeteer-preflight';
|
||||||
|
import { CrawlRotator } from '../services/crawl-rotator';
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
@@ -864,4 +866,58 @@ router.get('/pods', async (_req: Request, res: Response) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// PREFLIGHT SMOKE TEST
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/worker-registry/preflight-test
|
||||||
|
* Run an HTTP (Puppeteer) preflight test and return results
|
||||||
|
*
|
||||||
|
* This is a smoke test endpoint to verify the preflight system works.
|
||||||
|
* Returns IP, fingerprint data, bot detection results, and products fetched.
|
||||||
|
*/
|
||||||
|
router.post('/preflight-test', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
console.log('[PreflightTest] Starting HTTP preflight smoke test...');
|
||||||
|
|
||||||
|
// Create a temporary CrawlRotator for the test
|
||||||
|
const crawlRotator = new CrawlRotator();
|
||||||
|
|
||||||
|
// Run the Puppeteer preflight (with 1 retry)
|
||||||
|
const startTime = Date.now();
|
||||||
|
const result = await runPuppeteerPreflightWithRetry(crawlRotator, 1);
|
||||||
|
const duration = Date.now() - startTime;
|
||||||
|
|
||||||
|
console.log(`[PreflightTest] Completed in ${duration}ms - passed: ${result.passed}`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
test: 'http_preflight',
|
||||||
|
duration_ms: duration,
|
||||||
|
result: {
|
||||||
|
passed: result.passed,
|
||||||
|
proxy_ip: result.proxyIp,
|
||||||
|
fingerprint: result.fingerprint,
|
||||||
|
bot_detection: result.botDetection,
|
||||||
|
products_returned: result.productsReturned,
|
||||||
|
browser_user_agent: result.browserUserAgent,
|
||||||
|
ip_verified: result.ipVerified,
|
||||||
|
proxy_available: result.proxyAvailable,
|
||||||
|
proxy_connected: result.proxyConnected,
|
||||||
|
antidetect_ready: result.antidetectReady,
|
||||||
|
response_time_ms: result.responseTimeMs,
|
||||||
|
error: result.error
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[PreflightTest] Error:', error.message);
|
||||||
|
res.status(500).json({
|
||||||
|
success: false,
|
||||||
|
test: 'http_preflight',
|
||||||
|
error: error.message
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
export default router;
|
export default router;
|
||||||
|
|||||||
@@ -26,6 +26,34 @@ const TEST_PLATFORM_ID = '6405ef617056e8014d79101b';
|
|||||||
const FINGERPRINT_DEMO_URL = 'https://demo.fingerprint.com/';
|
const FINGERPRINT_DEMO_URL = 'https://demo.fingerprint.com/';
|
||||||
const AMIUNIQUE_URL = 'https://amiunique.org/fingerprint';
|
const AMIUNIQUE_URL = 'https://amiunique.org/fingerprint';
|
||||||
|
|
||||||
|
// IP geolocation API for timezone lookup (free, no key required)
|
||||||
|
const IP_API_URL = 'http://ip-api.com/json';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Look up timezone from IP address using ip-api.com
|
||||||
|
* Returns IANA timezone (e.g., 'America/New_York') or null on failure
|
||||||
|
*/
|
||||||
|
async function getTimezoneFromIp(ip: string): Promise<{ timezone: string; city?: string; region?: string } | null> {
|
||||||
|
try {
|
||||||
|
const axios = require('axios');
|
||||||
|
const response = await axios.get(`${IP_API_URL}/${ip}?fields=status,timezone,city,regionName`, {
|
||||||
|
timeout: 5000,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (response.data?.status === 'success' && response.data?.timezone) {
|
||||||
|
return {
|
||||||
|
timezone: response.data.timezone,
|
||||||
|
city: response.data.city,
|
||||||
|
region: response.data.regionName,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
} catch (err: any) {
|
||||||
|
console.log(`[PuppeteerPreflight] IP geolocation lookup failed: ${err.message}`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export interface PuppeteerPreflightResult extends PreflightResult {
|
export interface PuppeteerPreflightResult extends PreflightResult {
|
||||||
method: 'http';
|
method: 'http';
|
||||||
/** Number of products returned (proves API access) */
|
/** Number of products returned (proves API access) */
|
||||||
@@ -42,6 +70,13 @@ export interface PuppeteerPreflightResult extends PreflightResult {
|
|||||||
expectedProxyIp?: string;
|
expectedProxyIp?: string;
|
||||||
/** Whether IP verification passed (detected IP matches proxy) */
|
/** Whether IP verification passed (detected IP matches proxy) */
|
||||||
ipVerified?: boolean;
|
ipVerified?: boolean;
|
||||||
|
/** Detected timezone from IP geolocation */
|
||||||
|
detectedTimezone?: string;
|
||||||
|
/** Detected location from IP geolocation */
|
||||||
|
detectedLocation?: {
|
||||||
|
city?: string;
|
||||||
|
region?: string;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -136,7 +171,52 @@ export async function runPuppeteerPreflight(
|
|||||||
};
|
};
|
||||||
|
|
||||||
// =========================================================================
|
// =========================================================================
|
||||||
// STEP 1: Visit fingerprint.com demo to verify anti-detect and get IP
|
// STEP 1a: Get IP address directly via simple API (more reliable than scraping)
|
||||||
|
// =========================================================================
|
||||||
|
console.log(`[PuppeteerPreflight] Getting proxy IP address...`);
|
||||||
|
try {
|
||||||
|
const ipApiResponse = await page.evaluate(async () => {
|
||||||
|
try {
|
||||||
|
const response = await fetch('https://api.ipify.org?format=json');
|
||||||
|
const data = await response.json();
|
||||||
|
return { ip: data.ip, error: null };
|
||||||
|
} catch (err: any) {
|
||||||
|
return { ip: null, error: err.message };
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (ipApiResponse.ip) {
|
||||||
|
result.proxyIp = ipApiResponse.ip;
|
||||||
|
result.proxyConnected = true;
|
||||||
|
console.log(`[PuppeteerPreflight] Detected proxy IP: ${ipApiResponse.ip}`);
|
||||||
|
|
||||||
|
// Look up timezone from IP
|
||||||
|
const geoData = await getTimezoneFromIp(ipApiResponse.ip);
|
||||||
|
if (geoData) {
|
||||||
|
result.detectedTimezone = geoData.timezone;
|
||||||
|
result.detectedLocation = { city: geoData.city, region: geoData.region };
|
||||||
|
console.log(`[PuppeteerPreflight] IP Geolocation: ${geoData.city}, ${geoData.region} (${geoData.timezone})`);
|
||||||
|
|
||||||
|
// Set browser timezone to match proxy location via CDP
|
||||||
|
try {
|
||||||
|
const client = await page.target().createCDPSession();
|
||||||
|
await client.send('Emulation.setTimezoneOverride', { timezoneId: geoData.timezone });
|
||||||
|
console.log(`[PuppeteerPreflight] Browser timezone set to: ${geoData.timezone}`);
|
||||||
|
} catch (tzErr: any) {
|
||||||
|
console.log(`[PuppeteerPreflight] Failed to set browser timezone: ${tzErr.message}`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log(`[PuppeteerPreflight] WARNING: Could not determine timezone from IP - timezone mismatch possible`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log(`[PuppeteerPreflight] IP lookup failed: ${ipApiResponse.error || 'unknown error'}`);
|
||||||
|
}
|
||||||
|
} catch (ipErr: any) {
|
||||||
|
console.log(`[PuppeteerPreflight] IP API error: ${ipErr.message}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// =========================================================================
|
||||||
|
// STEP 1b: Visit fingerprint.com demo to verify anti-detect
|
||||||
// =========================================================================
|
// =========================================================================
|
||||||
console.log(`[PuppeteerPreflight] Testing anti-detect at ${FINGERPRINT_DEMO_URL}...`);
|
console.log(`[PuppeteerPreflight] Testing anti-detect at ${FINGERPRINT_DEMO_URL}...`);
|
||||||
|
|
||||||
@@ -199,6 +279,8 @@ export async function runPuppeteerPreflight(
|
|||||||
// Don't fail - residential proxies often show different egress IPs
|
// Don't fail - residential proxies often show different egress IPs
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Note: Timezone already set earlier via ipify.org IP lookup
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fingerprintData.visitorId) {
|
if (fingerprintData.visitorId) {
|
||||||
|
|||||||
@@ -435,29 +435,47 @@ export class TaskWorker {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Report preflight status to worker_registry
|
* Report preflight status to worker_registry
|
||||||
|
* Function signature: update_worker_preflight(worker_id, transport, status, ip, response_ms, error, fingerprint)
|
||||||
*/
|
*/
|
||||||
private async reportPreflightStatus(): Promise<void> {
|
private async reportPreflightStatus(): Promise<void> {
|
||||||
try {
|
try {
|
||||||
// Update worker_registry directly via SQL (more reliable than API)
|
// Update worker_registry directly via SQL (more reliable than API)
|
||||||
|
// CURL preflight - includes IP address
|
||||||
await this.pool.query(`
|
await this.pool.query(`
|
||||||
SELECT update_worker_preflight($1, 'curl', $2, $3, $4)
|
SELECT update_worker_preflight($1, 'curl', $2, $3, $4, $5, $6)
|
||||||
`, [
|
`, [
|
||||||
this.workerId,
|
this.workerId,
|
||||||
this.preflightCurlPassed ? 'passed' : 'failed',
|
this.preflightCurlPassed ? 'passed' : 'failed',
|
||||||
|
this.preflightCurlResult?.proxyIp || null,
|
||||||
this.preflightCurlResult?.responseTimeMs || null,
|
this.preflightCurlResult?.responseTimeMs || null,
|
||||||
this.preflightCurlResult?.error || null,
|
this.preflightCurlResult?.error || null,
|
||||||
|
null, // No fingerprint for curl
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
// HTTP preflight - includes IP, fingerprint, and timezone data
|
||||||
|
const httpFingerprint = this.preflightHttpResult ? {
|
||||||
|
...this.preflightHttpResult.fingerprint,
|
||||||
|
detectedTimezone: (this.preflightHttpResult as any).detectedTimezone,
|
||||||
|
detectedLocation: (this.preflightHttpResult as any).detectedLocation,
|
||||||
|
productsReturned: this.preflightHttpResult.productsReturned,
|
||||||
|
botDetection: (this.preflightHttpResult as any).botDetection,
|
||||||
|
} : null;
|
||||||
|
|
||||||
await this.pool.query(`
|
await this.pool.query(`
|
||||||
SELECT update_worker_preflight($1, 'http', $2, $3, $4)
|
SELECT update_worker_preflight($1, 'http', $2, $3, $4, $5, $6)
|
||||||
`, [
|
`, [
|
||||||
this.workerId,
|
this.workerId,
|
||||||
this.preflightHttpPassed ? 'passed' : 'failed',
|
this.preflightHttpPassed ? 'passed' : 'failed',
|
||||||
|
this.preflightHttpResult?.proxyIp || null,
|
||||||
this.preflightHttpResult?.responseTimeMs || null,
|
this.preflightHttpResult?.responseTimeMs || null,
|
||||||
this.preflightHttpResult?.error || null,
|
this.preflightHttpResult?.error || null,
|
||||||
|
httpFingerprint ? JSON.stringify(httpFingerprint) : null,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
console.log(`[TaskWorker] Preflight status reported to worker_registry`);
|
console.log(`[TaskWorker] Preflight status reported to worker_registry`);
|
||||||
|
if (this.preflightHttpResult?.proxyIp) {
|
||||||
|
console.log(`[TaskWorker] HTTP IP: ${this.preflightHttpResult.proxyIp}, Timezone: ${(this.preflightHttpResult as any).detectedTimezone || 'unknown'}`);
|
||||||
|
}
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
// Non-fatal - worker can still function
|
// Non-fatal - worker can still function
|
||||||
console.warn(`[TaskWorker] Could not report preflight status: ${err.message}`);
|
console.warn(`[TaskWorker] Could not report preflight status: ${err.message}`);
|
||||||
|
|||||||
Reference in New Issue
Block a user