feat(workers): Add dual-transport preflight system

Workers now run both curl and http (Puppeteer) preflights on startup:
- curl-preflight.ts: Tests axios + proxy via httpbin.org
- puppeteer-preflight.ts: Tests browser + StealthPlugin via fingerprint.com
  (with amiunique.org fallback)
- Migration 084: Adds preflight columns to worker_registry and method
  column to worker_tasks
- Workers report preflight status, IP, fingerprint, and response time
- Tasks can require specific transport method (curl/http)
- Dashboard shows Transport column with preflight status badges

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-11 22:47:52 -07:00
parent a35976b9e9
commit cdab71a1ee
7 changed files with 1085 additions and 8 deletions

View File

@@ -51,6 +51,10 @@ import os from 'os';
import { CrawlRotator } from '../services/crawl-rotator';
import { setCrawlRotator } from '../platforms/dutchie';
// Dual-transport preflight system
import { runCurlPreflight, CurlPreflightResult } from '../services/curl-preflight';
import { runPuppeteerPreflightWithRetry, PuppeteerPreflightResult } from '../services/puppeteer-preflight';
// Task handlers by role
// Per TASK_WORKFLOW_2024-12-10.md: payload_fetch and product_refresh are now separate
import { handlePayloadFetch } from './handlers/payload-fetch';
@@ -189,6 +193,21 @@ export class TaskWorker {
private isBackingOff: boolean = false;
private backoffReason: string | null = null;
// ==========================================================================
// DUAL-TRANSPORT PREFLIGHT STATUS
// ==========================================================================
// Workers run BOTH preflights on startup:
// - curl: axios/proxy transport - fast, for simple API calls
// - http: Puppeteer/browser transport - anti-detect, for Dutchie GraphQL
//
// Task claiming checks method compatibility - worker must have passed
// the preflight for the task's required method.
// ==========================================================================
private preflightCurlPassed: boolean = false;
private preflightHttpPassed: boolean = false;
private preflightCurlResult: CurlPreflightResult | null = null;
private preflightHttpResult: PuppeteerPreflightResult | null = null;
constructor(role: TaskRole | null = null, workerId?: string) {
this.pool = getPool();
this.role = role;
@@ -351,6 +370,99 @@ export class TaskWorker {
}
}
/**
* Run dual-transport preflights on startup
* Tests both curl (axios/proxy) and http (Puppeteer/browser) transport methods.
* Results are reported to worker_registry and used for task claiming.
*
* NOTE: All current tasks require 'http' method, so http preflight must pass
* for the worker to claim any tasks. Curl preflight is for future use.
*/
private async runDualPreflights(): Promise<void> {
console.log(`[TaskWorker] Running dual-transport preflights...`);
// Run both preflights in parallel for efficiency
const [curlResult, httpResult] = await Promise.all([
runCurlPreflight(this.crawlRotator).catch((err): CurlPreflightResult => ({
method: 'curl',
passed: false,
proxyAvailable: false,
proxyConnected: false,
antidetectReady: false,
proxyIp: null,
fingerprint: null,
error: `Preflight error: ${err.message}`,
responseTimeMs: null,
})),
runPuppeteerPreflightWithRetry(this.crawlRotator, 1).catch((err): PuppeteerPreflightResult => ({
method: 'http',
passed: false,
proxyAvailable: false,
proxyConnected: false,
antidetectReady: false,
proxyIp: null,
fingerprint: null,
error: `Preflight error: ${err.message}`,
responseTimeMs: null,
productsReturned: 0,
})),
]);
// Store results
this.preflightCurlResult = curlResult;
this.preflightHttpResult = httpResult;
this.preflightCurlPassed = curlResult.passed;
this.preflightHttpPassed = httpResult.passed;
// Log results
console.log(`[TaskWorker] CURL preflight: ${curlResult.passed ? 'PASSED' : 'FAILED'}${curlResult.error ? ` - ${curlResult.error}` : ''}`);
console.log(`[TaskWorker] HTTP preflight: ${httpResult.passed ? 'PASSED' : 'FAILED'}${httpResult.error ? ` - ${httpResult.error}` : ''}`);
if (httpResult.passed && httpResult.productsReturned) {
console.log(`[TaskWorker] HTTP preflight returned ${httpResult.productsReturned} products from test store`);
}
// Report to worker_registry via API
await this.reportPreflightStatus();
// Since all tasks require 'http', warn if http preflight failed
if (!this.preflightHttpPassed) {
console.warn(`[TaskWorker] WARNING: HTTP preflight failed - this worker cannot claim any tasks!`);
console.warn(`[TaskWorker] Error: ${httpResult.error}`);
}
}
/**
* Report preflight status to worker_registry
*/
private async reportPreflightStatus(): Promise<void> {
try {
// Update worker_registry directly via SQL (more reliable than API)
await this.pool.query(`
SELECT update_worker_preflight($1, 'curl', $2, $3, $4)
`, [
this.workerId,
this.preflightCurlPassed ? 'passed' : 'failed',
this.preflightCurlResult?.responseTimeMs || null,
this.preflightCurlResult?.error || null,
]);
await this.pool.query(`
SELECT update_worker_preflight($1, 'http', $2, $3, $4)
`, [
this.workerId,
this.preflightHttpPassed ? 'passed' : 'failed',
this.preflightHttpResult?.responseTimeMs || null,
this.preflightHttpResult?.error || null,
]);
console.log(`[TaskWorker] Preflight status reported to worker_registry`);
} catch (err: any) {
// Non-fatal - worker can still function
console.warn(`[TaskWorker] Could not report preflight status: ${err.message}`);
}
}
/**
* Register worker with the registry (get friendly name)
*/
@@ -494,11 +606,15 @@ export class TaskWorker {
// Register with the API to get a friendly name
await this.register();
// Run dual-transport preflights
await this.runDualPreflights();
// Start registry heartbeat
this.startRegistryHeartbeat();
const roleMsg = this.role ? `for role: ${this.role}` : '(role-agnostic - any task)';
console.log(`[TaskWorker] ${this.friendlyName} starting ${roleMsg} (max ${this.maxConcurrentTasks} concurrent tasks)`);
const preflightMsg = `curl=${this.preflightCurlPassed ? '✓' : '✗'} http=${this.preflightHttpPassed ? '✓' : '✗'}`;
console.log(`[TaskWorker] ${this.friendlyName} starting ${roleMsg} (${preflightMsg}, max ${this.maxConcurrentTasks} concurrent tasks)`);
while (this.isRunning) {
try {
@@ -552,7 +668,13 @@ export class TaskWorker {
// Try to claim more tasks if we have capacity
if (this.canAcceptMoreTasks()) {
const task = await taskService.claimTask(this.role, this.workerId);
// Pass preflight capabilities to only claim compatible tasks
const task = await taskService.claimTask(
this.role,
this.workerId,
this.preflightCurlPassed,
this.preflightHttpPassed
);
if (task) {
console.log(`[TaskWorker] ${this.friendlyName} claimed task ${task.id} (${task.role}) [${this.activeTasks.size + 1}/${this.maxConcurrentTasks}]`);
@@ -738,6 +860,8 @@ export class TaskWorker {
maxConcurrentTasks: number;
isBackingOff: boolean;
backoffReason: string | null;
preflightCurlPassed: boolean;
preflightHttpPassed: boolean;
} {
return {
workerId: this.workerId,
@@ -748,6 +872,8 @@ export class TaskWorker {
maxConcurrentTasks: this.maxConcurrentTasks,
isBackingOff: this.isBackingOff,
backoffReason: this.backoffReason,
preflightCurlPassed: this.preflightCurlPassed,
preflightHttpPassed: this.preflightHttpPassed,
};
}
}