feat(scheduler): Immutable schedules and HTTP-only pipeline
## Changes - **Migration 089**: Add is_immutable and method columns to task_schedules - Per-state product_discovery schedules (4h default) - Store discovery weekly (168h) - All schedules use HTTP transport (Puppeteer/browser) - **Task Scheduler**: HTTP-only product discovery with per-state scheduling - Each state has its own immutable schedule - Schedules can be edited (interval/priority) but not deleted - **TasksDashboard UI**: Full immutability support - Lock icon for immutable schedules - State and Method columns in schedules table - Disabled delete for immutable, restricted edit fields - **Store Discovery HTTP**: Auto-queue product_discovery for new stores - **Migration 088**: Discovery payloads storage schema 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -75,6 +75,7 @@ import { handleProductRefresh } from './handlers/product-refresh';
|
||||
import { handleProductDiscovery } from './handlers/product-discovery-curl';
|
||||
import { handleProductDiscoveryHttp } from './handlers/product-discovery-http';
|
||||
import { handleStoreDiscovery } from './handlers/store-discovery';
|
||||
import { handleStoreDiscoveryHttp } from './handlers/store-discovery-http';
|
||||
import { handleEntryPointDiscovery } from './handlers/entry-point-discovery';
|
||||
import { handleAnalyticsRefresh } from './handlers/analytics-refresh';
|
||||
import { handleWhoami } from './handlers/whoami';
|
||||
@@ -160,21 +161,30 @@ const TASK_HANDLERS: Record<TaskRole, TaskHandler> = {
|
||||
/**
|
||||
* Get the appropriate handler for a task, considering both role and method.
|
||||
*
|
||||
* For product_discovery:
|
||||
* - method='http' -> handleProductDiscoveryHttp (browser-based, for Evomi proxies)
|
||||
* - method='curl' or unspecified -> handleProductDiscovery (curl-based)
|
||||
* Dual-transport handlers:
|
||||
* - product_discovery: curl (axios) or http (Puppeteer)
|
||||
* - store_discovery: curl (axios) or http (Puppeteer)
|
||||
*
|
||||
* Default method is 'http' since all GraphQL queries should use browser transport
|
||||
* for better TLS fingerprinting and session-based proxy compatibility.
|
||||
*/
|
||||
function getHandlerForTask(task: WorkerTask): TaskHandler | undefined {
|
||||
const role = task.role as TaskRole;
|
||||
const method = task.method || 'curl';
|
||||
const method = task.method || 'http'; // Default to HTTP for all GraphQL tasks
|
||||
|
||||
// Special handling for product_discovery with method='http'
|
||||
// product_discovery: dual-transport support
|
||||
if (role === 'product_discovery' && method === 'http') {
|
||||
console.log(`[TaskWorker] Using HTTP handler for product_discovery (method=${method})`);
|
||||
return handleProductDiscoveryHttp;
|
||||
}
|
||||
|
||||
// Default: use the static handler registry
|
||||
// store_discovery: dual-transport support
|
||||
if (role === 'store_discovery' && method === 'http') {
|
||||
console.log(`[TaskWorker] Using HTTP handler for store_discovery (method=${method})`);
|
||||
return handleStoreDiscoveryHttp;
|
||||
}
|
||||
|
||||
// Default: use the static handler registry (curl-based)
|
||||
return TASK_HANDLERS[role];
|
||||
}
|
||||
|
||||
@@ -719,6 +729,20 @@ export class TaskWorker {
|
||||
// Start registry heartbeat immediately
|
||||
this.startRegistryHeartbeat();
|
||||
|
||||
// Cleanup stale tasks on startup (only worker-0 does this to avoid races)
|
||||
// This handles tasks left in 'claimed'/'running' status when workers restart
|
||||
if (this.workerId.endsWith('-0') || this.workerId === 'scraper-worker-0') {
|
||||
try {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} running stale task cleanup...`);
|
||||
const cleanupResult = await taskService.cleanupStaleTasks(30); // 30 minute threshold
|
||||
if (cleanupResult.cleaned > 0) {
|
||||
console.log(`[TaskWorker] Cleaned up ${cleanupResult.cleaned} stale tasks`);
|
||||
}
|
||||
} catch (err: any) {
|
||||
console.error(`[TaskWorker] Stale task cleanup error:`, err.message);
|
||||
}
|
||||
}
|
||||
|
||||
const roleMsg = this.role ? `for role: ${this.role}` : '(role-agnostic - any task)';
|
||||
console.log(`[TaskWorker] ${this.friendlyName} starting ${roleMsg} (stealth=lazy, max ${this.maxConcurrentTasks} concurrent tasks)`);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user