feat: Performance optimizations and preflight improvements
- Add missing /api/analytics/national/summary endpoint - Optimize dashboard activity queries (subquery vs JOIN+GROUP BY) - Add PreflightSummary component to Workers page with gold qualified badge - Add preflight retry logic - workers retry every 30s until qualified - Run stale task cleanup on ALL workers (not just worker-0) - Add preflight fields to worker-registry API (ip, fingerprint, is_qualified) Database indexes added: - idx_store_products_created_at (for recent products) - idx_dispensaries_last_crawl_at (for recent scrapes) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -273,6 +273,16 @@ export class TaskWorker {
|
||||
private preflightsCompleted: boolean = false;
|
||||
private initializingPromise: Promise<void> | null = null;
|
||||
|
||||
// ==========================================================================
|
||||
// PREFLIGHT RETRY SETTINGS
|
||||
// ==========================================================================
|
||||
// If preflight fails, worker retries every PREFLIGHT_RETRY_INTERVAL_MS
|
||||
// Worker is BLOCKED from claiming ANY tasks until preflight passes.
|
||||
// This ensures unqualified workers never touch the task pool.
|
||||
// ==========================================================================
|
||||
private static readonly PREFLIGHT_RETRY_INTERVAL_MS = 30000; // 30 seconds
|
||||
private isRetryingPreflight: boolean = false;
|
||||
|
||||
// ==========================================================================
|
||||
// STEP TRACKING FOR DASHBOARD VISIBILITY
|
||||
// ==========================================================================
|
||||
@@ -617,6 +627,75 @@ export class TaskWorker {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retry preflight until it passes.
|
||||
* Worker is BLOCKED from claiming ANY tasks until HTTP preflight passes.
|
||||
* This ensures unqualified workers never touch the task pool.
|
||||
*
|
||||
* All current tasks require 'http' method, so HTTP preflight is mandatory.
|
||||
*/
|
||||
private async retryPreflightUntilPass(): Promise<void> {
|
||||
if (this.preflightHttpPassed) {
|
||||
return; // Already passed
|
||||
}
|
||||
|
||||
if (this.isRetryingPreflight) {
|
||||
return; // Already retrying
|
||||
}
|
||||
|
||||
this.isRetryingPreflight = true;
|
||||
let retryCount = 0;
|
||||
|
||||
console.log(`[TaskWorker] ${this.friendlyName} HTTP preflight FAILED - entering retry loop (every ${TaskWorker.PREFLIGHT_RETRY_INTERVAL_MS / 1000}s)`);
|
||||
console.log(`[TaskWorker] ${this.friendlyName} BLOCKED from task pool until preflight passes`);
|
||||
|
||||
while (!this.preflightHttpPassed && this.isRunning) {
|
||||
retryCount++;
|
||||
|
||||
// Wait before retry
|
||||
await this.sleep(TaskWorker.PREFLIGHT_RETRY_INTERVAL_MS);
|
||||
|
||||
if (!this.isRunning) {
|
||||
break; // Worker stopping
|
||||
}
|
||||
|
||||
console.log(`[TaskWorker] ${this.friendlyName} preflight retry #${retryCount}...`);
|
||||
|
||||
// Reload proxies before retry (might have new ones)
|
||||
try {
|
||||
await this.crawlRotator.initialize();
|
||||
const stats = this.crawlRotator.proxy.getStats();
|
||||
console.log(`[TaskWorker] Proxies available: ${stats.activeProxies}`);
|
||||
} catch (err: any) {
|
||||
console.warn(`[TaskWorker] Proxy reload failed: ${err.message}`);
|
||||
}
|
||||
|
||||
// Re-run HTTP preflight
|
||||
try {
|
||||
const httpResult = await runPuppeteerPreflightWithRetry(this.crawlRotator, 1);
|
||||
this.preflightHttpResult = httpResult;
|
||||
this.preflightHttpPassed = httpResult.passed;
|
||||
|
||||
if (httpResult.passed) {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} HTTP preflight PASSED on retry #${retryCount}!`);
|
||||
console.log(`[TaskWorker] ${this.friendlyName} IP: ${httpResult.proxyIp}, Products: ${httpResult.productsReturned}`);
|
||||
console.log(`[TaskWorker] ${this.friendlyName} now QUALIFIED to claim tasks`);
|
||||
|
||||
// Report updated status
|
||||
await this.reportPreflightStatus();
|
||||
break;
|
||||
} else {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} HTTP preflight still FAILED: ${httpResult.error}`);
|
||||
console.log(`[TaskWorker] ${this.friendlyName} will retry in ${TaskWorker.PREFLIGHT_RETRY_INTERVAL_MS / 1000}s...`);
|
||||
}
|
||||
} catch (err: any) {
|
||||
console.error(`[TaskWorker] ${this.friendlyName} preflight retry error: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
this.isRetryingPreflight = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Lazy initialization of stealth systems.
|
||||
* Called BEFORE claiming first task (not at worker startup).
|
||||
@@ -855,15 +934,14 @@ export class TaskWorker {
|
||||
// Start registry heartbeat immediately
|
||||
this.startRegistryHeartbeat();
|
||||
|
||||
// Cleanup stale tasks on startup and periodically (only worker-0 does this to avoid races)
|
||||
// This handles tasks left in 'claimed'/'running' status when workers restart or crash
|
||||
if (this.workerId.endsWith('-0') || this.workerId === 'scraper-worker-0') {
|
||||
// Run immediately on startup
|
||||
await this.runStaleTaskCleanup();
|
||||
// Cleanup stale tasks on startup and periodically
|
||||
// ALL workers run cleanup to ensure stale tasks are recovered even if some workers crash
|
||||
// The cleanup query uses SELECT FOR UPDATE SKIP LOCKED to avoid races
|
||||
// Run immediately on startup
|
||||
await this.runStaleTaskCleanup();
|
||||
|
||||
// Start periodic cleanup every 10 minutes
|
||||
this.startPeriodicStaleCleanup();
|
||||
}
|
||||
// Start periodic cleanup every 10 minutes
|
||||
this.startPeriodicStaleCleanup();
|
||||
|
||||
const roleMsg = this.role ? `for role: ${this.role}` : '(role-agnostic - any task)';
|
||||
console.log(`[TaskWorker] ${this.friendlyName} starting ${roleMsg} (stealth=lazy, max ${this.maxConcurrentTasks} concurrent tasks)`);
|
||||
@@ -940,6 +1018,18 @@ export class TaskWorker {
|
||||
}
|
||||
}
|
||||
|
||||
// =================================================================
|
||||
// PREFLIGHT GATE - BLOCK unqualified workers from task pool
|
||||
// All tasks require HTTP method, so HTTP preflight MUST pass.
|
||||
// If preflight failed, worker retries every 30 seconds.
|
||||
// Worker CANNOT claim ANY tasks until preflight passes.
|
||||
// =================================================================
|
||||
if (!this.preflightHttpPassed) {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} BLOCKED - HTTP preflight not passed, cannot claim tasks`);
|
||||
await this.retryPreflightUntilPass();
|
||||
return; // Return to main loop, will re-check on next iteration
|
||||
}
|
||||
|
||||
// Pass preflight capabilities to only claim compatible tasks
|
||||
const task = await taskService.claimTask(
|
||||
this.role,
|
||||
|
||||
Reference in New Issue
Block a user