Workers now run both curl and http (Puppeteer) preflights on startup: - curl-preflight.ts: Tests axios + proxy via httpbin.org - puppeteer-preflight.ts: Tests browser + StealthPlugin via fingerprint.com (with amiunique.org fallback) - Migration 084: Adds preflight columns to worker_registry and method column to worker_tasks - Workers report preflight status, IP, fingerprint, and response time - Tasks can require specific transport method (curl/http) - Dashboard shows Transport column with preflight status badges 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
101 lines
3.2 KiB
TypeScript
101 lines
3.2 KiB
TypeScript
/**
|
|
* Curl Preflight - Verify curl/axios transport works through proxy
|
|
*
|
|
* Tests:
|
|
* 1. Proxy is available and active
|
|
* 2. HTTP request through proxy succeeds
|
|
* 3. Anti-detect headers are properly set
|
|
*
|
|
* Use case: Fast, simple API requests that don't need browser fingerprint
|
|
*/
|
|
|
|
import axios from 'axios';
|
|
import { HttpsProxyAgent } from 'https-proxy-agent';
|
|
import { CrawlRotator, PreflightResult } from './crawl-rotator';
|
|
|
|
export interface CurlPreflightResult extends PreflightResult {
|
|
method: 'curl';
|
|
}
|
|
|
|
/**
|
|
* Run curl preflight check
|
|
* Tests proxy connectivity using axios/curl through the proxy
|
|
*/
|
|
export async function runCurlPreflight(
|
|
crawlRotator: CrawlRotator
|
|
): Promise<CurlPreflightResult> {
|
|
const result: CurlPreflightResult = {
|
|
method: 'curl',
|
|
passed: false,
|
|
proxyAvailable: false,
|
|
proxyConnected: false,
|
|
antidetectReady: false,
|
|
proxyIp: null,
|
|
fingerprint: null,
|
|
error: null,
|
|
responseTimeMs: null,
|
|
};
|
|
|
|
// Step 1: Check proxy is available
|
|
const currentProxy = crawlRotator.proxy.getCurrent();
|
|
if (!currentProxy) {
|
|
result.error = 'No proxy available';
|
|
console.log('[CurlPreflight] FAILED - No proxy available');
|
|
return result;
|
|
}
|
|
result.proxyAvailable = true;
|
|
result.proxyIp = currentProxy.host;
|
|
|
|
// Step 2: Check fingerprint/anti-detect is ready
|
|
const fingerprint = crawlRotator.userAgent.getCurrent();
|
|
if (!fingerprint || !fingerprint.userAgent) {
|
|
result.error = 'Anti-detect fingerprint not initialized';
|
|
console.log('[CurlPreflight] FAILED - No fingerprint');
|
|
return result;
|
|
}
|
|
result.antidetectReady = true;
|
|
result.fingerprint = {
|
|
userAgent: fingerprint.userAgent,
|
|
browserName: fingerprint.browserName,
|
|
deviceCategory: fingerprint.deviceCategory,
|
|
};
|
|
|
|
// Step 3: Test proxy connectivity with an actual HTTP request
|
|
const proxyUrl = crawlRotator.proxy.getProxyUrl(currentProxy);
|
|
const testUrl = 'https://httpbin.org/ip';
|
|
|
|
try {
|
|
const agent = new HttpsProxyAgent(proxyUrl);
|
|
const startTime = Date.now();
|
|
|
|
const response = await axios.get(testUrl, {
|
|
httpsAgent: agent,
|
|
timeout: 15000, // 15 second timeout
|
|
headers: {
|
|
'User-Agent': fingerprint.userAgent,
|
|
'Accept-Language': fingerprint.acceptLanguage,
|
|
...(fingerprint.secChUa && { 'sec-ch-ua': fingerprint.secChUa }),
|
|
...(fingerprint.secChUaPlatform && { 'sec-ch-ua-platform': fingerprint.secChUaPlatform }),
|
|
...(fingerprint.secChUaMobile && { 'sec-ch-ua-mobile': fingerprint.secChUaMobile }),
|
|
},
|
|
});
|
|
|
|
result.responseTimeMs = Date.now() - startTime;
|
|
result.proxyConnected = true;
|
|
result.passed = true;
|
|
|
|
// Mark success on proxy stats
|
|
await crawlRotator.proxy.markSuccess(currentProxy.id, result.responseTimeMs);
|
|
|
|
console.log(`[CurlPreflight] PASSED - Proxy ${currentProxy.host} connected (${result.responseTimeMs}ms), UA: ${fingerprint.browserName}/${fingerprint.deviceCategory}`);
|
|
} catch (err: any) {
|
|
result.error = `Proxy connection failed: ${err.message || 'Unknown error'}`;
|
|
console.log(`[CurlPreflight] FAILED - Proxy connection error: ${err.message}`);
|
|
|
|
// Mark failure on proxy stats
|
|
await crawlRotator.proxy.markFailed(currentProxy.id, err.message);
|
|
}
|
|
|
|
return result;
|
|
}
|