/** * Database-Driven Task Scheduler * * Per TASK_WORKFLOW_2024-12-10.md: * - Schedules stored in DB (survives restarts) * - Uses SELECT FOR UPDATE to prevent duplicate execution across replicas * - Polls every 60s to check if schedules are due * - Generates tasks into worker_tasks table for task-worker.ts to process * * 2024-12-10: Created to replace legacy node-cron scheduler */ import { pool } from '../db/pool'; import { taskService, TaskRole } from '../tasks/task-service'; // Per TASK_WORKFLOW_2024-12-10.md: Poll interval for checking schedules const POLL_INTERVAL_MS = 60_000; // 60 seconds interface TaskSchedule { id: number; name: string; role: TaskRole; enabled: boolean; interval_hours: number; last_run_at: Date | null; next_run_at: Date | null; state_code: string | null; priority: number; } class TaskScheduler { private pollTimer: NodeJS.Timeout | null = null; private isRunning = false; /** * Start the scheduler * Per TASK_WORKFLOW_2024-12-10.md: Called on API server startup */ async start(): Promise { if (this.isRunning) { console.log('[TaskScheduler] Already running'); return; } console.log('[TaskScheduler] Starting database-driven scheduler...'); this.isRunning = true; // Per TASK_WORKFLOW_2024-12-10.md: On startup, recover stale tasks try { const recovered = await taskService.recoverStaleTasks(10); if (recovered > 0) { console.log(`[TaskScheduler] Recovered ${recovered} stale tasks from dead workers`); } } catch (err: any) { console.error('[TaskScheduler] Failed to recover stale tasks:', err.message); } // Per TASK_WORKFLOW_2024-12-10.md: Ensure default schedules exist await this.ensureDefaultSchedules(); // Per TASK_WORKFLOW_2024-12-10.md: Check immediately on startup await this.checkAndRunDueSchedules(); // Per TASK_WORKFLOW_2024-12-10.md: Then poll every 60 seconds this.pollTimer = setInterval(async () => { await this.checkAndRunDueSchedules(); }, POLL_INTERVAL_MS); console.log('[TaskScheduler] Started - polling every 60s'); } /** * Stop the scheduler */ stop(): void { if (this.pollTimer) { clearInterval(this.pollTimer); this.pollTimer = null; } this.isRunning = false; console.log('[TaskScheduler] Stopped'); } /** * Ensure default schedules exist in the database * Per TASK_WORKFLOW_2024-12-10.md: Creates schedules if they don't exist */ private async ensureDefaultSchedules(): Promise { // Per TASK_WORKFLOW_2024-12-10.md: Default schedules for task generation // NOTE: payload_fetch replaces direct product_refresh - it chains to product_refresh const defaults = [ { name: 'payload_fetch_all', role: 'payload_fetch' as TaskRole, interval_hours: 4, priority: 0, description: 'Fetch payloads from Dutchie API for all crawl-enabled stores every 4 hours. Chains to product_refresh.', }, { name: 'store_discovery_dutchie', role: 'store_discovery' as TaskRole, interval_hours: 24, priority: 5, description: 'Discover new Dutchie stores daily', }, { name: 'analytics_refresh', role: 'analytics_refresh' as TaskRole, interval_hours: 6, priority: 0, description: 'Refresh analytics materialized views every 6 hours', }, ]; for (const sched of defaults) { try { await pool.query(` INSERT INTO task_schedules (name, role, interval_hours, priority, description, enabled, next_run_at) VALUES ($1, $2, $3, $4, $5, true, NOW()) ON CONFLICT (name) DO NOTHING `, [sched.name, sched.role, sched.interval_hours, sched.priority, sched.description]); } catch (err: any) { // Table may not exist yet - will be created by migration if (!err.message.includes('does not exist')) { console.error(`[TaskScheduler] Failed to create default schedule ${sched.name}:`, err.message); } } } } /** * Check for and run any due schedules * Per TASK_WORKFLOW_2024-12-10.md: Uses SELECT FOR UPDATE SKIP LOCKED to prevent duplicates */ private async checkAndRunDueSchedules(): Promise { const client = await pool.connect(); try { await client.query('BEGIN'); // Per TASK_WORKFLOW_2024-12-10.md: Atomic claim of due schedules const result = await client.query(` SELECT * FROM task_schedules WHERE enabled = true AND (next_run_at IS NULL OR next_run_at <= NOW()) FOR UPDATE SKIP LOCKED `); for (const schedule of result.rows) { console.log(`[TaskScheduler] Running schedule: ${schedule.name} (${schedule.role})`); try { const tasksCreated = await this.executeSchedule(schedule); console.log(`[TaskScheduler] Schedule ${schedule.name} created ${tasksCreated} tasks`); // Per TASK_WORKFLOW_2024-12-10.md: Update last_run_at and calculate next_run_at await client.query(` UPDATE task_schedules SET last_run_at = NOW(), next_run_at = NOW() + ($1 || ' hours')::interval, last_task_count = $2, updated_at = NOW() WHERE id = $3 `, [schedule.interval_hours, tasksCreated, schedule.id]); } catch (err: any) { console.error(`[TaskScheduler] Schedule ${schedule.name} failed:`, err.message); // Still update next_run_at to prevent infinite retry loop await client.query(` UPDATE task_schedules SET next_run_at = NOW() + ($1 || ' hours')::interval, last_error = $2, updated_at = NOW() WHERE id = $3 `, [schedule.interval_hours, err.message, schedule.id]); } } await client.query('COMMIT'); } catch (err: any) { await client.query('ROLLBACK'); console.error('[TaskScheduler] Failed to check schedules:', err.message); } finally { client.release(); } } /** * Execute a schedule and create tasks * Per TASK_WORKFLOW_2024-12-10.md: Different logic per role */ private async executeSchedule(schedule: TaskSchedule): Promise { switch (schedule.role) { case 'payload_fetch': // Per TASK_WORKFLOW_2024-12-10.md: payload_fetch replaces direct product_refresh return this.generatePayloadFetchTasks(schedule); case 'product_refresh': // Legacy - kept for manual triggers, but scheduled crawls use payload_fetch return this.generatePayloadFetchTasks(schedule); case 'store_discovery': return this.generateStoreDiscoveryTasks(schedule); case 'analytics_refresh': return this.generateAnalyticsRefreshTasks(schedule); default: console.warn(`[TaskScheduler] Unknown role: ${schedule.role}`); return 0; } } /** * Generate payload_fetch tasks for stores that need crawling * Per TASK_WORKFLOW_2024-12-10.md: payload_fetch hits API, saves to disk, chains to product_refresh */ private async generatePayloadFetchTasks(schedule: TaskSchedule): Promise { // Per TASK_WORKFLOW_2024-12-10.md: Find stores needing refresh const result = await pool.query(` SELECT d.id FROM dispensaries d WHERE d.crawl_enabled = true AND d.platform_dispensary_id IS NOT NULL -- No pending/running payload_fetch or product_refresh task already AND NOT EXISTS ( SELECT 1 FROM worker_tasks t WHERE t.dispensary_id = d.id AND t.role IN ('payload_fetch', 'product_refresh') AND t.status IN ('pending', 'claimed', 'running') ) -- Never fetched OR last fetch > interval ago AND ( d.last_fetch_at IS NULL OR d.last_fetch_at < NOW() - ($1 || ' hours')::interval ) ${schedule.state_code ? 'AND d.state_id = (SELECT id FROM states WHERE code = $2)' : ''} `, schedule.state_code ? [schedule.interval_hours, schedule.state_code] : [schedule.interval_hours]); const dispensaryIds = result.rows.map((r: { id: number }) => r.id); if (dispensaryIds.length === 0) { return 0; } // Per TASK_WORKFLOW_2024-12-10.md: Create payload_fetch tasks (they chain to product_refresh) const tasks = dispensaryIds.map((id: number) => ({ role: 'payload_fetch' as TaskRole, dispensary_id: id, priority: schedule.priority, })); return taskService.createTasks(tasks); } /** * Generate store_discovery tasks * Per TASK_WORKFLOW_2024-12-10.md: One task per platform */ private async generateStoreDiscoveryTasks(schedule: TaskSchedule): Promise { // Check if discovery task already pending const existing = await taskService.listTasks({ role: 'store_discovery', status: ['pending', 'claimed', 'running'], limit: 1, }); if (existing.length > 0) { console.log('[TaskScheduler] Store discovery task already pending, skipping'); return 0; } await taskService.createTask({ role: 'store_discovery', platform: 'dutchie', priority: schedule.priority, }); return 1; } /** * Generate analytics_refresh tasks * Per TASK_WORKFLOW_2024-12-10.md: Single task to refresh all MVs */ private async generateAnalyticsRefreshTasks(schedule: TaskSchedule): Promise { // Check if analytics task already pending const existing = await taskService.listTasks({ role: 'analytics_refresh', status: ['pending', 'claimed', 'running'], limit: 1, }); if (existing.length > 0) { console.log('[TaskScheduler] Analytics refresh task already pending, skipping'); return 0; } await taskService.createTask({ role: 'analytics_refresh', priority: schedule.priority, }); return 1; } /** * Get all schedules for dashboard display */ async getSchedules(): Promise { try { const result = await pool.query(` SELECT * FROM task_schedules ORDER BY name `); return result.rows as TaskSchedule[]; } catch { return []; } } /** * Update a schedule */ async updateSchedule(id: number, updates: Partial): Promise { const setClauses: string[] = []; const values: any[] = []; let paramIndex = 1; if (updates.enabled !== undefined) { setClauses.push(`enabled = $${paramIndex++}`); values.push(updates.enabled); } if (updates.interval_hours !== undefined) { setClauses.push(`interval_hours = $${paramIndex++}`); values.push(updates.interval_hours); } if (updates.priority !== undefined) { setClauses.push(`priority = $${paramIndex++}`); values.push(updates.priority); } if (setClauses.length === 0) return; setClauses.push('updated_at = NOW()'); values.push(id); await pool.query(` UPDATE task_schedules SET ${setClauses.join(', ')} WHERE id = $${paramIndex} `, values); } /** * Trigger a schedule to run immediately */ async triggerSchedule(id: number): Promise { const result = await pool.query(` SELECT * FROM task_schedules WHERE id = $1 `, [id]); if (result.rows.length === 0) { throw new Error(`Schedule ${id} not found`); } return this.executeSchedule(result.rows[0] as TaskSchedule); } } // Per TASK_WORKFLOW_2024-12-10.md: Singleton instance export const taskScheduler = new TaskScheduler();