fix(worker): Wait for proxies instead of crashing on startup
- Task worker now waits up to 60 minutes for active proxies - Retries every 30 seconds with clear logging - Updated K8s scraper-worker.yaml with Deployment definition - Deployment uses task-worker.js entrypoint with correct liveness probe 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -116,23 +116,41 @@ export class TaskWorker {
|
||||
* Initialize stealth systems (proxy rotation, fingerprints)
|
||||
* Called once on worker startup before processing any tasks.
|
||||
*
|
||||
* IMPORTANT: Proxies are REQUIRED. Workers will fail to start if no proxies available.
|
||||
* IMPORTANT: Proxies are REQUIRED. Workers will wait until proxies are available.
|
||||
*/
|
||||
private async initializeStealth(): Promise<void> {
|
||||
// Load proxies from database
|
||||
await this.crawlRotator.initialize();
|
||||
const MAX_WAIT_MINUTES = 60;
|
||||
const RETRY_INTERVAL_MS = 30000; // 30 seconds
|
||||
const maxAttempts = (MAX_WAIT_MINUTES * 60 * 1000) / RETRY_INTERVAL_MS;
|
||||
let attempts = 0;
|
||||
|
||||
const stats = this.crawlRotator.proxy.getStats();
|
||||
if (stats.activeProxies === 0) {
|
||||
throw new Error('No active proxies available. Workers MUST use proxies for all requests. Add proxies to the database before starting workers.');
|
||||
while (attempts < maxAttempts) {
|
||||
try {
|
||||
// Load proxies from database
|
||||
await this.crawlRotator.initialize();
|
||||
|
||||
const stats = this.crawlRotator.proxy.getStats();
|
||||
if (stats.activeProxies > 0) {
|
||||
console.log(`[TaskWorker] Loaded ${stats.activeProxies} proxies (${stats.avgSuccessRate.toFixed(1)}% avg success rate)`);
|
||||
|
||||
// Wire rotator to Dutchie client - proxies will be used for ALL requests
|
||||
setCrawlRotator(this.crawlRotator);
|
||||
|
||||
console.log(`[TaskWorker] Stealth initialized: ${this.crawlRotator.userAgent.getCount()} fingerprints, proxy REQUIRED for all requests`);
|
||||
return;
|
||||
}
|
||||
|
||||
attempts++;
|
||||
console.log(`[TaskWorker] No active proxies available (attempt ${attempts}). Waiting ${RETRY_INTERVAL_MS / 1000}s for proxies to be added...`);
|
||||
await this.sleep(RETRY_INTERVAL_MS);
|
||||
} catch (error: any) {
|
||||
attempts++;
|
||||
console.log(`[TaskWorker] Error loading proxies (attempt ${attempts}): ${error.message}. Retrying in ${RETRY_INTERVAL_MS / 1000}s...`);
|
||||
await this.sleep(RETRY_INTERVAL_MS);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`[TaskWorker] Loaded ${stats.activeProxies} proxies (${stats.avgSuccessRate.toFixed(1)}% avg success rate)`);
|
||||
|
||||
// Wire rotator to Dutchie client - proxies will be used for ALL requests
|
||||
setCrawlRotator(this.crawlRotator);
|
||||
|
||||
console.log(`[TaskWorker] Stealth initialized: ${this.crawlRotator.userAgent.getCount()} fingerprints, proxy REQUIRED for all requests`);
|
||||
throw new Error(`No active proxies available after waiting ${MAX_WAIT_MINUTES} minutes. Add proxies to the database.`);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user