feat(tasks): Add unified task-based worker architecture

Replace fragmented job systems (job_schedules, dispensary_crawl_jobs, SyncOrchestrator)
with a single unified task queue:

- Add worker_tasks table with atomic task claiming via SELECT FOR UPDATE SKIP LOCKED
- Add TaskService for CRUD, claiming, and capacity metrics
- Add TaskWorker with role-based handlers (resync, discovery, analytics)
- Add /api/tasks endpoints for management and migration from legacy systems
- Add TasksDashboard UI and integrate task counts into main dashboard
- Add comprehensive documentation

Task roles: store_discovery, entry_point_discovery, product_discovery, product_resync, analytics_refresh

Run workers with: WORKER_ROLE=product_resync npx tsx src/tasks/task-worker.ts

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-09 16:27:03 -07:00
parent 7f9cf559cf
commit 89c262ee20
18 changed files with 3167 additions and 2 deletions

View File

@@ -0,0 +1,266 @@
/**
* Task Worker
*
* A unified worker that processes tasks from the worker_tasks queue.
* Replaces the fragmented job systems (job_schedules, dispensary_crawl_jobs, etc.)
*
* Usage:
* WORKER_ROLE=product_resync npx tsx src/tasks/task-worker.ts
*
* Environment:
* WORKER_ROLE - Which task role to process (required)
* WORKER_ID - Optional custom worker ID
* POLL_INTERVAL_MS - How often to check for tasks (default: 5000)
* HEARTBEAT_INTERVAL_MS - How often to update heartbeat (default: 30000)
*/
import { Pool } from 'pg';
import { v4 as uuidv4 } from 'uuid';
import { taskService, TaskRole, WorkerTask } from './task-service';
import { pool } from '../db/pool';
// Task handlers by role
import { handleProductResync } from './handlers/product-resync';
import { handleProductDiscovery } from './handlers/product-discovery';
import { handleStoreDiscovery } from './handlers/store-discovery';
import { handleEntryPointDiscovery } from './handlers/entry-point-discovery';
import { handleAnalyticsRefresh } from './handlers/analytics-refresh';
const POLL_INTERVAL_MS = parseInt(process.env.POLL_INTERVAL_MS || '5000');
const HEARTBEAT_INTERVAL_MS = parseInt(process.env.HEARTBEAT_INTERVAL_MS || '30000');
export interface TaskContext {
pool: Pool;
workerId: string;
task: WorkerTask;
heartbeat: () => Promise<void>;
}
export interface TaskResult {
success: boolean;
productsProcessed?: number;
snapshotsCreated?: number;
storesDiscovered?: number;
error?: string;
[key: string]: unknown;
}
type TaskHandler = (ctx: TaskContext) => Promise<TaskResult>;
const TASK_HANDLERS: Record<TaskRole, TaskHandler> = {
product_resync: handleProductResync,
product_discovery: handleProductDiscovery,
store_discovery: handleStoreDiscovery,
entry_point_discovery: handleEntryPointDiscovery,
analytics_refresh: handleAnalyticsRefresh,
};
export class TaskWorker {
private pool: Pool;
private workerId: string;
private role: TaskRole;
private isRunning: boolean = false;
private heartbeatInterval: NodeJS.Timeout | null = null;
private currentTask: WorkerTask | null = null;
constructor(role: TaskRole, workerId?: string) {
this.pool = pool;
this.role = role;
this.workerId = workerId || `worker-${role}-${uuidv4().slice(0, 8)}`;
}
/**
* Start the worker loop
*/
async start(): Promise<void> {
this.isRunning = true;
console.log(`[TaskWorker] Starting worker ${this.workerId} for role: ${this.role}`);
while (this.isRunning) {
try {
await this.processNextTask();
} catch (error: any) {
console.error(`[TaskWorker] Loop error:`, error.message);
await this.sleep(POLL_INTERVAL_MS);
}
}
console.log(`[TaskWorker] Worker ${this.workerId} stopped`);
}
/**
* Stop the worker
*/
stop(): void {
this.isRunning = false;
this.stopHeartbeat();
console.log(`[TaskWorker] Stopping worker ${this.workerId}...`);
}
/**
* Process the next available task
*/
private async processNextTask(): Promise<void> {
// Try to claim a task
const task = await taskService.claimTask(this.role, this.workerId);
if (!task) {
// No tasks available, wait and retry
await this.sleep(POLL_INTERVAL_MS);
return;
}
this.currentTask = task;
console.log(`[TaskWorker] Claimed task ${task.id} (${task.role}) for dispensary ${task.dispensary_id || 'N/A'}`);
// Start heartbeat
this.startHeartbeat(task.id);
try {
// Mark as running
await taskService.startTask(task.id);
// Get handler for this role
const handler = TASK_HANDLERS[task.role];
if (!handler) {
throw new Error(`No handler registered for role: ${task.role}`);
}
// Create context
const ctx: TaskContext = {
pool: this.pool,
workerId: this.workerId,
task,
heartbeat: async () => {
await taskService.heartbeat(task.id);
},
};
// Execute the task
const result = await handler(ctx);
if (result.success) {
// Mark as completed
await taskService.completeTask(task.id, result);
console.log(`[TaskWorker] Task ${task.id} completed successfully`);
// Chain next task if applicable
const chainedTask = await taskService.chainNextTask({
...task,
status: 'completed',
result,
});
if (chainedTask) {
console.log(`[TaskWorker] Chained new task ${chainedTask.id} (${chainedTask.role})`);
}
} else {
// Mark as failed
await taskService.failTask(task.id, result.error || 'Unknown error');
console.log(`[TaskWorker] Task ${task.id} failed: ${result.error}`);
}
} catch (error: any) {
// Mark as failed
await taskService.failTask(task.id, error.message);
console.error(`[TaskWorker] Task ${task.id} threw error:`, error.message);
} finally {
this.stopHeartbeat();
this.currentTask = null;
}
}
/**
* Start heartbeat interval
*/
private startHeartbeat(taskId: number): void {
this.heartbeatInterval = setInterval(async () => {
try {
await taskService.heartbeat(taskId);
} catch (error: any) {
console.warn(`[TaskWorker] Heartbeat failed:`, error.message);
}
}, HEARTBEAT_INTERVAL_MS);
}
/**
* Stop heartbeat interval
*/
private stopHeartbeat(): void {
if (this.heartbeatInterval) {
clearInterval(this.heartbeatInterval);
this.heartbeatInterval = null;
}
}
/**
* Sleep helper
*/
private sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* Get worker info
*/
getInfo(): { workerId: string; role: TaskRole; isRunning: boolean; currentTaskId: number | null } {
return {
workerId: this.workerId,
role: this.role,
isRunning: this.isRunning,
currentTaskId: this.currentTask?.id || null,
};
}
}
// ============================================================
// CLI ENTRY POINT
// ============================================================
async function main(): Promise<void> {
const role = process.env.WORKER_ROLE as TaskRole;
if (!role) {
console.error('Error: WORKER_ROLE environment variable is required');
console.error('Valid roles: store_discovery, entry_point_discovery, product_discovery, product_resync, analytics_refresh');
process.exit(1);
}
const validRoles: TaskRole[] = [
'store_discovery',
'entry_point_discovery',
'product_discovery',
'product_resync',
'analytics_refresh',
];
if (!validRoles.includes(role)) {
console.error(`Error: Invalid WORKER_ROLE: ${role}`);
console.error(`Valid roles: ${validRoles.join(', ')}`);
process.exit(1);
}
const workerId = process.env.WORKER_ID;
const worker = new TaskWorker(role, workerId);
// Handle graceful shutdown
process.on('SIGTERM', () => {
console.log('[TaskWorker] Received SIGTERM, shutting down...');
worker.stop();
});
process.on('SIGINT', () => {
console.log('[TaskWorker] Received SIGINT, shutting down...');
worker.stop();
});
await worker.start();
}
// Run if this is the main module
if (require.main === module) {
main().catch((error) => {
console.error('[TaskWorker] Fatal error:', error);
process.exit(1);
});
}
export { main };