fix(ci): Remove buildx cache and add preflight enforcement

- Remove cache_from/cache_to from CI (plugin bug splitting commas)
- Add preflight() method to CrawlRotator - tests proxy + anti-detect
- Add pre-task preflight check - workers MUST pass before executing
- Add releaseTask() to release tasks back to pending on preflight fail
- Rename proxy_test task to whoami for clarity

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-11 21:37:22 -07:00
parent 3f958fbff3
commit f2864bd2ad
7 changed files with 237 additions and 71 deletions

View File

@@ -59,7 +59,7 @@ import { handleProductDiscovery } from './handlers/product-discovery';
import { handleStoreDiscovery } from './handlers/store-discovery';
import { handleEntryPointDiscovery } from './handlers/entry-point-discovery';
import { handleAnalyticsRefresh } from './handlers/analytics-refresh';
import { handleProxyTest } from './handlers/proxy-test';
import { handleWhoami } from './handlers/whoami';
const POLL_INTERVAL_MS = parseInt(process.env.POLL_INTERVAL_MS || '5000');
const HEARTBEAT_INTERVAL_MS = parseInt(process.env.HEARTBEAT_INTERVAL_MS || '30000');
@@ -111,6 +111,7 @@ export interface TaskContext {
workerId: string;
task: WorkerTask;
heartbeat: () => Promise<void>;
crawlRotator?: CrawlRotator;
}
export interface TaskResult {
@@ -134,7 +135,7 @@ const TASK_HANDLERS: Record<TaskRole, TaskHandler> = {
store_discovery: handleStoreDiscovery,
entry_point_discovery: handleEntryPointDiscovery,
analytics_refresh: handleAnalyticsRefresh,
proxy_test: handleProxyTest, // Tests proxy via ipify
whoami: handleWhoami, // Tests proxy + anti-detect
};
/**
@@ -555,6 +556,26 @@ export class TaskWorker {
if (task) {
console.log(`[TaskWorker] ${this.friendlyName} claimed task ${task.id} (${task.role}) [${this.activeTasks.size + 1}/${this.maxConcurrentTasks}]`);
// =================================================================
// PREFLIGHT CHECK - CRITICAL: Worker MUST pass before task execution
// Verifies: 1) Proxy available 2) Proxy connected 3) Anti-detect ready
// =================================================================
const preflight = await this.crawlRotator.preflight();
if (!preflight.passed) {
console.log(`[TaskWorker] ${this.friendlyName} PREFLIGHT FAILED for task ${task.id}: ${preflight.error}`);
console.log(`[TaskWorker] Releasing task ${task.id} back to pending - worker cannot proceed without proxy/anti-detect`);
// Release task back to pending so another worker can pick it up
await taskService.releaseTask(task.id);
// Wait before trying again - give proxies time to recover
await this.sleep(30000); // 30 second wait on preflight failure
return;
}
console.log(`[TaskWorker] ${this.friendlyName} preflight PASSED for task ${task.id} (proxy: ${preflight.proxyIp}, ${preflight.responseTimeMs}ms)`);
this.activeTasks.set(task.id, task);
// Start task in background (don't await)
@@ -611,6 +632,7 @@ export class TaskWorker {
heartbeat: async () => {
await taskService.heartbeat(task.id);
},
crawlRotator: this.crawlRotator,
};
// Execute the task