/** * Task Queue API Routes * * Endpoints for managing worker tasks, viewing capacity metrics, * and generating batch tasks. * * SCHEDULE MANAGEMENT (added 2025-12-12): * This file now contains the canonical schedule management endpoints. * The job_schedules table has been deprecated and all schedule management * is now consolidated into task_schedules: * * Schedule endpoints: * GET /api/tasks/schedules - List all schedules * POST /api/tasks/schedules - Create new schedule * GET /api/tasks/schedules/:id - Get schedule by ID * PUT /api/tasks/schedules/:id - Update schedule * DELETE /api/tasks/schedules/:id - Delete schedule * DELETE /api/tasks/schedules - Bulk delete schedules * POST /api/tasks/schedules/:id/run-now - Trigger schedule immediately * POST /api/tasks/schedules/:id/toggle - Toggle schedule enabled/disabled * * Note: Schedule routes are defined BEFORE /:id to avoid route conflicts * (Express matches routes in order, and "schedules" would match /:id otherwise) */ import { Router, Request, Response } from 'express'; import { taskService, TaskRole, TaskStatus, TaskFilter, } from '../tasks/task-service'; import { pool } from '../db/pool'; import { isTaskPoolPaused, pauseTaskPool, resumeTaskPool, getTaskPoolStatus, } from '../tasks/task-pool-state'; const router = Router(); /** * GET /api/tasks * List tasks with optional filters * * Query params: * - role: Filter by role * - status: Filter by status (comma-separated for multiple) * - dispensary_id: Filter by dispensary * - worker_id: Filter by worker * - limit: Max results (default 100) * - offset: Pagination offset */ router.get('/', async (req: Request, res: Response) => { try { const filter: TaskFilter = {}; if (req.query.role) { filter.role = req.query.role as TaskRole; } if (req.query.status) { const statuses = (req.query.status as string).split(',') as TaskStatus[]; filter.status = statuses.length === 1 ? statuses[0] : statuses; } if (req.query.dispensary_id) { filter.dispensary_id = parseInt(req.query.dispensary_id as string, 10); } if (req.query.worker_id) { filter.worker_id = req.query.worker_id as string; } if (req.query.limit) { filter.limit = parseInt(req.query.limit as string, 10); } if (req.query.offset) { filter.offset = parseInt(req.query.offset as string, 10); } const tasks = await taskService.listTasks(filter); res.json({ tasks, count: tasks.length }); } catch (error: unknown) { console.error('Error listing tasks:', error); res.status(500).json({ error: 'Failed to list tasks' }); } }); /** * GET /api/tasks/counts * Get task counts by status */ router.get('/counts', async (_req: Request, res: Response) => { try { const counts = await taskService.getTaskCounts(); res.json(counts); } catch (error: unknown) { console.error('Error getting task counts:', error); res.status(500).json({ error: 'Failed to get task counts' }); } }); /** * GET /api/tasks/capacity * Get capacity metrics for all roles */ router.get('/capacity', async (_req: Request, res: Response) => { try { const metrics = await taskService.getCapacityMetrics(); res.json({ metrics }); } catch (error: unknown) { console.error('Error getting capacity metrics:', error); res.status(500).json({ error: 'Failed to get capacity metrics' }); } }); /** * GET /api/tasks/capacity/:role * Get capacity metrics for a specific role */ router.get('/capacity/:role', async (req: Request, res: Response) => { try { const role = req.params.role as TaskRole; const capacity = await taskService.getRoleCapacity(role); if (!capacity) { return res.status(404).json({ error: 'Role not found or no data' }); } // Calculate workers needed for different SLAs const workersFor1Hour = await taskService.calculateWorkersNeeded(role, 1); const workersFor4Hours = await taskService.calculateWorkersNeeded(role, 4); const workersFor8Hours = await taskService.calculateWorkersNeeded(role, 8); res.json({ ...capacity, workers_needed: { for_1_hour: workersFor1Hour, for_4_hours: workersFor4Hours, for_8_hours: workersFor8Hours, }, }); } catch (error: unknown) { console.error('Error getting role capacity:', error); res.status(500).json({ error: 'Failed to get role capacity' }); } }); // ============================================================ // SCHEDULE MANAGEMENT ROUTES // (Must be before /:id to avoid route conflicts) // ============================================================ /** * GET /api/tasks/schedules * List all task schedules * * Returns schedules with is_immutable flag - immutable schedules can only * have their interval_hours, priority, and enabled fields updated (not deleted). */ router.get('/schedules', async (req: Request, res: Response) => { try { const enabledOnly = req.query.enabled === 'true'; let query = ` SELECT id, name, role, description, enabled, interval_hours, priority, state_code, platform, method, COALESCE(is_immutable, false) as is_immutable, last_run_at, next_run_at, last_task_count, last_error, created_at, updated_at FROM task_schedules `; if (enabledOnly) { query += ` WHERE enabled = true`; } query += ` ORDER BY CASE role WHEN 'store_discovery' THEN 1 WHEN 'product_discovery' THEN 2 WHEN 'analytics_refresh' THEN 3 ELSE 4 END, state_code NULLS FIRST, name`; const result = await pool.query(query); res.json({ schedules: result.rows }); } catch (error: unknown) { console.error('Error listing schedules:', error); res.status(500).json({ error: 'Failed to list schedules' }); } }); /** * DELETE /api/tasks/schedules * Bulk delete schedules * * Immutable schedules are automatically skipped (not deleted). * * Body: * - ids: number[] (required) - array of schedule IDs to delete * - all: boolean (optional) - if true, delete all non-immutable schedules (ids ignored) */ router.delete('/schedules', async (req: Request, res: Response) => { try { const { ids, all } = req.body; let result; let skippedImmutable: { id: number; name: string }[] = []; if (all === true) { // First, find immutable schedules that will be skipped const immutableResult = await pool.query(` SELECT id, name FROM task_schedules WHERE is_immutable = true `); skippedImmutable = immutableResult.rows; // Delete all non-immutable schedules result = await pool.query(` DELETE FROM task_schedules WHERE COALESCE(is_immutable, false) = false RETURNING id, name `); } else if (Array.isArray(ids) && ids.length > 0) { // First, find which of the requested IDs are immutable const immutableResult = await pool.query(` SELECT id, name FROM task_schedules WHERE id = ANY($1) AND is_immutable = true `, [ids]); skippedImmutable = immutableResult.rows; // Delete only non-immutable schedules from the requested IDs result = await pool.query(` DELETE FROM task_schedules WHERE id = ANY($1) AND COALESCE(is_immutable, false) = false RETURNING id, name `, [ids]); } else { return res.status(400).json({ error: 'Either provide ids array or set all=true', }); } res.json({ success: true, deleted_count: result.rowCount, deleted: result.rows, skipped_immutable_count: skippedImmutable.length, skipped_immutable: skippedImmutable, message: skippedImmutable.length > 0 ? `Deleted ${result.rowCount} schedule(s), skipped ${skippedImmutable.length} immutable schedule(s)` : `Deleted ${result.rowCount} schedule(s)`, }); } catch (error: unknown) { console.error('Error bulk deleting schedules:', error); res.status(500).json({ error: 'Failed to delete schedules' }); } }); /** * POST /api/tasks/schedules * Create a new schedule * * Body: * - name: string (required, unique) * - role: TaskRole (required) * - description: string (optional) * - enabled: boolean (default true) * - interval_hours: number (required) * - priority: number (default 0) * - state_code: string (optional) * - platform: string (optional) */ router.post('/schedules', async (req: Request, res: Response) => { try { const { name, role, description, enabled = true, interval_hours, priority = 0, state_code, platform, } = req.body; if (!name || !role || !interval_hours) { return res.status(400).json({ error: 'name, role, and interval_hours are required', }); } // Calculate next_run_at based on interval const nextRunAt = new Date(Date.now() + interval_hours * 60 * 60 * 1000); const result = await pool.query(` INSERT INTO task_schedules (name, role, description, enabled, interval_hours, priority, state_code, platform, next_run_at) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) RETURNING id, name, role, description, enabled, interval_hours, priority, state_code, platform, last_run_at, next_run_at, last_task_count, last_error, created_at, updated_at `, [name, role, description, enabled, interval_hours, priority, state_code, platform, nextRunAt]); res.status(201).json(result.rows[0]); } catch (error: any) { if (error.code === '23505') { // Unique constraint violation return res.status(409).json({ error: 'A schedule with this name already exists' }); } console.error('Error creating schedule:', error); res.status(500).json({ error: 'Failed to create schedule' }); } }); /** * GET /api/tasks/schedules/:id * Get a specific schedule by ID */ router.get('/schedules/:id', async (req: Request, res: Response) => { try { const scheduleId = parseInt(req.params.id, 10); const result = await pool.query(` SELECT id, name, role, description, enabled, interval_hours, priority, state_code, platform, last_run_at, next_run_at, last_task_count, last_error, created_at, updated_at FROM task_schedules WHERE id = $1 `, [scheduleId]); if (result.rows.length === 0) { return res.status(404).json({ error: 'Schedule not found' }); } res.json(result.rows[0]); } catch (error: unknown) { console.error('Error getting schedule:', error); res.status(500).json({ error: 'Failed to get schedule' }); } }); /** * PUT /api/tasks/schedules/:id * Update an existing schedule * * For IMMUTABLE schedules, only these fields can be updated: * - enabled (turn on/off) * - interval_hours (change frequency) * - priority (change priority) * * For regular schedules, all fields can be updated. */ router.put('/schedules/:id', async (req: Request, res: Response) => { try { const scheduleId = parseInt(req.params.id, 10); const { name, role, description, enabled, interval_hours, priority, state_code, platform, } = req.body; // First check if schedule exists and if it's immutable const checkResult = await pool.query(` SELECT id, name, COALESCE(is_immutable, false) as is_immutable FROM task_schedules WHERE id = $1 `, [scheduleId]); if (checkResult.rows.length === 0) { return res.status(404).json({ error: 'Schedule not found' }); } const schedule = checkResult.rows[0]; const isImmutable = schedule.is_immutable; // For immutable schedules, reject attempts to change protected fields if (isImmutable) { const protectedFields: string[] = []; if (name !== undefined) protectedFields.push('name'); if (role !== undefined) protectedFields.push('role'); if (description !== undefined) protectedFields.push('description'); if (state_code !== undefined) protectedFields.push('state_code'); if (platform !== undefined) protectedFields.push('platform'); if (protectedFields.length > 0) { return res.status(403).json({ error: 'Cannot modify protected fields on immutable schedule', message: `Schedule "${schedule.name}" is immutable. Only enabled, interval_hours, and priority can be changed.`, protected_fields: protectedFields, allowed_fields: ['enabled', 'interval_hours', 'priority'], }); } } // Build dynamic update query const updates: string[] = []; const values: any[] = []; let paramIndex = 1; // These fields can only be updated on non-immutable schedules if (!isImmutable) { if (name !== undefined) { updates.push(`name = $${paramIndex++}`); values.push(name); } if (role !== undefined) { updates.push(`role = $${paramIndex++}`); values.push(role); } if (description !== undefined) { updates.push(`description = $${paramIndex++}`); values.push(description); } if (state_code !== undefined) { updates.push(`state_code = $${paramIndex++}`); values.push(state_code || null); } if (platform !== undefined) { updates.push(`platform = $${paramIndex++}`); values.push(platform || null); } } // These fields can be updated on ALL schedules (including immutable) if (enabled !== undefined) { updates.push(`enabled = $${paramIndex++}`); values.push(enabled); } if (interval_hours !== undefined) { updates.push(`interval_hours = $${paramIndex++}`); values.push(interval_hours); // Recalculate next_run_at if interval changed const nextRunAt = new Date(Date.now() + interval_hours * 60 * 60 * 1000); updates.push(`next_run_at = $${paramIndex++}`); values.push(nextRunAt); } if (priority !== undefined) { updates.push(`priority = $${paramIndex++}`); values.push(priority); } if (updates.length === 0) { return res.status(400).json({ error: 'No fields to update' }); } updates.push('updated_at = NOW()'); values.push(scheduleId); const result = await pool.query(` UPDATE task_schedules SET ${updates.join(', ')} WHERE id = $${paramIndex} RETURNING id, name, role, description, enabled, interval_hours, priority, state_code, platform, method, COALESCE(is_immutable, false) as is_immutable, last_run_at, next_run_at, last_task_count, last_error, created_at, updated_at `, values); res.json(result.rows[0]); } catch (error: any) { if (error.code === '23505') { return res.status(409).json({ error: 'A schedule with this name already exists' }); } console.error('Error updating schedule:', error); res.status(500).json({ error: 'Failed to update schedule' }); } }); /** * DELETE /api/tasks/schedules/:id * Delete a schedule * * Immutable schedules cannot be deleted - they can only be disabled. */ router.delete('/schedules/:id', async (req: Request, res: Response) => { try { const scheduleId = parseInt(req.params.id, 10); // First check if schedule exists and is immutable const checkResult = await pool.query(` SELECT id, name, COALESCE(is_immutable, false) as is_immutable FROM task_schedules WHERE id = $1 `, [scheduleId]); if (checkResult.rows.length === 0) { return res.status(404).json({ error: 'Schedule not found' }); } const schedule = checkResult.rows[0]; // Prevent deletion of immutable schedules if (schedule.is_immutable) { return res.status(403).json({ error: 'Cannot delete immutable schedule', message: `Schedule "${schedule.name}" is immutable and cannot be deleted. You can disable it instead.`, schedule_id: scheduleId, is_immutable: true, }); } // Delete the schedule await pool.query(`DELETE FROM task_schedules WHERE id = $1`, [scheduleId]); res.json({ success: true, message: `Schedule "${schedule.name}" deleted`, }); } catch (error: unknown) { console.error('Error deleting schedule:', error); res.status(500).json({ error: 'Failed to delete schedule' }); } }); /** * POST /api/tasks/schedules/:id/run-now * Manually trigger a scheduled task to run immediately * * For product_discovery schedules with state_code, this creates individual * tasks for each store in that state (fans out properly). */ router.post('/schedules/:id/run-now', async (req: Request, res: Response) => { try { const scheduleId = parseInt(req.params.id, 10); // Get the full schedule const scheduleResult = await pool.query(` SELECT id, name, role, state_code, platform, priority, interval_hours, method FROM task_schedules WHERE id = $1 `, [scheduleId]); if (scheduleResult.rows.length === 0) { return res.status(404).json({ error: 'Schedule not found' }); } const schedule = scheduleResult.rows[0]; let tasksCreated = 0; // For product_discovery with state_code, fan out to individual stores if (schedule.role === 'product_discovery' && schedule.state_code) { // Find stores in this state needing refresh const storeResult = await pool.query(` SELECT d.id FROM dispensaries d JOIN states s ON d.state_id = s.id WHERE d.crawl_enabled = true AND d.platform_dispensary_id IS NOT NULL AND s.code = $1 -- No pending/running product_discovery task already AND NOT EXISTS ( SELECT 1 FROM worker_tasks t WHERE t.dispensary_id = d.id AND t.role = 'product_discovery' AND t.status IN ('pending', 'claimed', 'running') ) ORDER BY d.last_fetch_at NULLS FIRST, d.id `, [schedule.state_code]); const dispensaryIds = storeResult.rows.map((r: { id: number }) => r.id); if (dispensaryIds.length > 0) { // Create staggered tasks for all stores const result = await taskService.createStaggeredTasks( dispensaryIds, 'product_discovery', 15, // 15 seconds stagger schedule.platform || 'dutchie', schedule.method || 'http' ); tasksCreated = result.created; } else { // No stores need refresh - return early with message return res.json({ success: true, message: `No ${schedule.state_code} stores need refresh at this time`, tasksCreated: 0, stateCode: schedule.state_code, }); } } else if (schedule.role !== 'product_discovery') { // For other schedules (store_discovery, analytics_refresh), create a single task await taskService.createTask({ role: schedule.role, platform: schedule.platform, priority: schedule.priority + 10, method: schedule.method, }); tasksCreated = 1; } else { // product_discovery without state_code - shouldn't happen, reject return res.status(400).json({ error: 'product_discovery schedules require a state_code', }); } // Update last_run_at on the schedule await pool.query(` UPDATE task_schedules SET last_run_at = NOW(), next_run_at = NOW() + (interval_hours || ' hours')::interval, last_task_count = $2, updated_at = NOW() WHERE id = $1 `, [scheduleId, tasksCreated]); res.json({ success: true, message: `Schedule "${schedule.name}" triggered`, tasksCreated, stateCode: schedule.state_code, }); } catch (error: unknown) { console.error('Error running schedule:', error); res.status(500).json({ error: 'Failed to run schedule' }); } }); /** * POST /api/tasks/schedules/:id/toggle * Toggle a schedule's enabled status */ router.post('/schedules/:id/toggle', async (req: Request, res: Response) => { try { const scheduleId = parseInt(req.params.id, 10); const result = await pool.query(` UPDATE task_schedules SET enabled = NOT enabled, updated_at = NOW() WHERE id = $1 RETURNING id, name, enabled `, [scheduleId]); if (result.rows.length === 0) { return res.status(404).json({ error: 'Schedule not found' }); } res.json({ success: true, schedule: result.rows[0], message: result.rows[0].enabled ? `Schedule "${result.rows[0].name}" enabled` : `Schedule "${result.rows[0].name}" disabled`, }); } catch (error: unknown) { console.error('Error toggling schedule:', error); res.status(500).json({ error: 'Failed to toggle schedule' }); } }); // ============================================================ // TASK-SPECIFIC ROUTES (with :id parameter) // ============================================================ /** * GET /api/tasks/:id * Get a specific task by ID */ router.get('/:id', async (req: Request, res: Response) => { try { const taskId = parseInt(req.params.id, 10); const task = await taskService.getTask(taskId); if (!task) { return res.status(404).json({ error: 'Task not found' }); } res.json(task); } catch (error: unknown) { console.error('Error getting task:', error); res.status(500).json({ error: 'Failed to get task' }); } }); /** * DELETE /api/tasks/:id * Delete a specific task by ID * Only allows deletion of failed, completed, or pending tasks (not running) */ router.delete('/:id', async (req: Request, res: Response) => { try { const taskId = parseInt(req.params.id, 10); // First check if task exists and its status const task = await taskService.getTask(taskId); if (!task) { return res.status(404).json({ error: 'Task not found' }); } // Don't allow deleting running tasks if (task.status === 'running' || task.status === 'claimed') { return res.status(400).json({ error: 'Cannot delete a running or claimed task' }); } // Delete the task await pool.query('DELETE FROM worker_tasks WHERE id = $1', [taskId]); res.json({ success: true, message: `Task ${taskId} deleted` }); } catch (error: unknown) { console.error('Error deleting task:', error); res.status(500).json({ error: 'Failed to delete task' }); } }); /** * POST /api/tasks * Create a new task * * Body: * - role: TaskRole (required) * - dispensary_id: number (optional) * - platform: string (optional) * - priority: number (optional, default 0) * - scheduled_for: ISO date string (optional) */ router.post('/', async (req: Request, res: Response) => { try { const { role, dispensary_id, platform, priority, scheduled_for } = req.body; if (!role) { return res.status(400).json({ error: 'Role is required' }); } // Check if store already has an active task if (dispensary_id) { const hasActive = await taskService.hasActiveTask(dispensary_id); if (hasActive) { return res.status(409).json({ error: 'Store already has an active task', dispensary_id, }); } } const task = await taskService.createTask({ role, dispensary_id, platform, priority, scheduled_for: scheduled_for ? new Date(scheduled_for) : undefined, }); res.status(201).json(task); } catch (error: unknown) { console.error('Error creating task:', error); res.status(500).json({ error: 'Failed to create task' }); } }); /** * POST /api/tasks/generate/resync * Generate daily resync tasks for all active stores * * Body: * - batches_per_day: number (optional, default 6 = every 4 hours) * - date: ISO date string (optional, default today) */ router.post('/generate/resync', async (req: Request, res: Response) => { try { const { batches_per_day, date } = req.body; const batchesPerDay = batches_per_day ?? 6; const targetDate = date ? new Date(date) : new Date(); const createdCount = await taskService.generateDailyResyncTasks( batchesPerDay, targetDate ); res.json({ success: true, tasks_created: createdCount, batches_per_day: batchesPerDay, date: targetDate.toISOString().split('T')[0], }); } catch (error: unknown) { console.error('Error generating resync tasks:', error); res.status(500).json({ error: 'Failed to generate resync tasks' }); } }); /** * POST /api/tasks/generate/discovery * Generate store discovery tasks for a platform * * Body: * - platform: string (required, e.g., 'dutchie') * - state_code: string (optional, e.g., 'AZ') * - priority: number (optional) */ router.post('/generate/discovery', async (req: Request, res: Response) => { try { const { platform, state_code, priority } = req.body; if (!platform) { return res.status(400).json({ error: 'Platform is required' }); } const task = await taskService.createStoreDiscoveryTask( platform, state_code, priority ?? 0 ); res.status(201).json(task); } catch (error: unknown) { console.error('Error creating discovery task:', error); res.status(500).json({ error: 'Failed to create discovery task' }); } }); /** * POST /api/tasks/recover-stale * Recover stale tasks from dead workers * * Body: * - threshold_minutes: number (optional, default 10) */ router.post('/recover-stale', async (req: Request, res: Response) => { try { const { threshold_minutes } = req.body; const recovered = await taskService.recoverStaleTasks(threshold_minutes ?? 10); res.json({ success: true, tasks_recovered: recovered, }); } catch (error: unknown) { console.error('Error recovering stale tasks:', error); res.status(500).json({ error: 'Failed to recover stale tasks' }); } }); /** * GET /api/tasks/role/:role/last-completion * Get the last completion time for a role */ router.get('/role/:role/last-completion', async (req: Request, res: Response) => { try { const role = req.params.role as TaskRole; const lastCompletion = await taskService.getLastCompletion(role); res.json({ role, last_completion: lastCompletion?.toISOString() ?? null, time_since: lastCompletion ? Math.floor((Date.now() - lastCompletion.getTime()) / 1000) : null, }); } catch (error: unknown) { console.error('Error getting last completion:', error); res.status(500).json({ error: 'Failed to get last completion' }); } }); /** * GET /api/tasks/role/:role/recent * Get recent completions for a role */ router.get('/role/:role/recent', async (req: Request, res: Response) => { try { const role = req.params.role as TaskRole; const limit = parseInt(req.query.limit as string, 10) || 10; const tasks = await taskService.getRecentCompletions(role, limit); res.json({ tasks }); } catch (error: unknown) { console.error('Error getting recent completions:', error); res.status(500).json({ error: 'Failed to get recent completions' }); } }); /** * GET /api/tasks/store/:dispensaryId/active * Check if a store has an active task */ router.get('/store/:dispensaryId/active', async (req: Request, res: Response) => { try { const dispensaryId = parseInt(req.params.dispensaryId, 10); const hasActive = await taskService.hasActiveTask(dispensaryId); res.json({ dispensary_id: dispensaryId, has_active_task: hasActive, }); } catch (error: unknown) { console.error('Error checking active task:', error); res.status(500).json({ error: 'Failed to check active task' }); } }); // ============================================================ // MIGRATION ROUTES - Disable old job systems // ============================================================ /** * GET /api/tasks/migration/status * Get status of old job systems vs new task queue */ router.get('/migration/status', async (_req: Request, res: Response) => { try { // Get old job system counts const [schedules, crawlJobs, rawPayloads, taskCounts] = await Promise.all([ pool.query(` SELECT COUNT(*) as total, COUNT(*) FILTER (WHERE enabled = true) as enabled FROM job_schedules `), pool.query(` SELECT COUNT(*) as total, COUNT(*) FILTER (WHERE status = 'pending') as pending, COUNT(*) FILTER (WHERE status = 'running') as running FROM dispensary_crawl_jobs `), pool.query(` SELECT COUNT(*) as total, COUNT(*) FILTER (WHERE processed = false) as unprocessed FROM raw_payloads `), taskService.getTaskCounts(), ]); res.json({ old_systems: { job_schedules: { total: parseInt(schedules.rows[0].total) || 0, enabled: parseInt(schedules.rows[0].enabled) || 0, }, dispensary_crawl_jobs: { total: parseInt(crawlJobs.rows[0].total) || 0, pending: parseInt(crawlJobs.rows[0].pending) || 0, running: parseInt(crawlJobs.rows[0].running) || 0, }, raw_payloads: { total: parseInt(rawPayloads.rows[0].total) || 0, unprocessed: parseInt(rawPayloads.rows[0].unprocessed) || 0, }, }, new_task_queue: taskCounts, recommendation: schedules.rows[0].enabled > 0 ? 'Disable old job schedules before switching to new task queue' : 'Ready to use new task queue', }); } catch (error: unknown) { console.error('Error getting migration status:', error); res.status(500).json({ error: 'Failed to get migration status' }); } }); /** * POST /api/tasks/migration/disable-old-schedules * Disable all old job schedules to prepare for new task queue */ router.post('/migration/disable-old-schedules', async (_req: Request, res: Response) => { try { const result = await pool.query(` UPDATE job_schedules SET enabled = false, updated_at = NOW() WHERE enabled = true RETURNING id, job_name `); res.json({ success: true, disabled_count: result.rowCount, disabled_schedules: result.rows.map(r => ({ id: r.id, job_name: r.job_name })), }); } catch (error: unknown) { console.error('Error disabling old schedules:', error); res.status(500).json({ error: 'Failed to disable old schedules' }); } }); /** * POST /api/tasks/migration/cancel-pending-crawl-jobs * Cancel all pending crawl jobs from the old system */ router.post('/migration/cancel-pending-crawl-jobs', async (_req: Request, res: Response) => { try { const result = await pool.query(` UPDATE dispensary_crawl_jobs SET status = 'cancelled', completed_at = NOW(), updated_at = NOW() WHERE status = 'pending' RETURNING id `); res.json({ success: true, cancelled_count: result.rowCount, }); } catch (error: unknown) { console.error('Error cancelling pending crawl jobs:', error); res.status(500).json({ error: 'Failed to cancel pending crawl jobs' }); } }); /** * POST /api/tasks/migration/create-resync-tasks * Create product_refresh tasks for all crawl-enabled dispensaries */ router.post('/migration/create-resync-tasks', async (req: Request, res: Response) => { try { const { priority = 0, state_code } = req.body; let query = ` SELECT id, name FROM dispensaries WHERE crawl_enabled = true AND platform_dispensary_id IS NOT NULL `; const params: any[] = []; if (state_code) { query += ` AND state_id = (SELECT id FROM states WHERE code = $1) `; params.push(state_code.toUpperCase()); } query += ` ORDER BY id`; const dispensaries = await pool.query(query, params); let created = 0; for (const disp of dispensaries.rows) { // Check if already has pending/running task const hasActive = await taskService.hasActiveTask(disp.id); if (!hasActive) { await taskService.createTask({ role: 'product_refresh', dispensary_id: disp.id, platform: 'dutchie', priority, }); created++; } } res.json({ success: true, tasks_created: created, dispensaries_checked: dispensaries.rows.length, state_filter: state_code || 'all', }); } catch (error: unknown) { console.error('Error creating resync tasks:', error); res.status(500).json({ error: 'Failed to create resync tasks' }); } }); /** * POST /api/tasks/migration/full-migrate * One-click migration: disable old systems, create new tasks */ router.post('/migration/full-migrate', async (req: Request, res: Response) => { try { const results: any = { success: true, steps: [], }; // Step 1: Disable old job schedules const disableResult = await pool.query(` UPDATE job_schedules SET enabled = false, updated_at = NOW() WHERE enabled = true RETURNING id `); results.steps.push({ step: 'disable_job_schedules', count: disableResult.rowCount, }); // Step 2: Cancel pending crawl jobs const cancelResult = await pool.query(` UPDATE dispensary_crawl_jobs SET status = 'cancelled', completed_at = NOW(), updated_at = NOW() WHERE status = 'pending' RETURNING id `); results.steps.push({ step: 'cancel_pending_crawl_jobs', count: cancelResult.rowCount, }); // Step 3: Generate initial resync tasks const resyncCount = await taskService.generateDailyResyncTasks(6); results.steps.push({ step: 'generate_resync_tasks', count: resyncCount, }); // Step 4: Create store discovery task const discoveryTask = await taskService.createStoreDiscoveryTask('dutchie', undefined, 0); results.steps.push({ step: 'create_discovery_task', task_id: discoveryTask.id, }); // Step 5: Create analytics refresh task const analyticsTask = await taskService.createTask({ role: 'analytics_refresh', priority: 0, }); results.steps.push({ step: 'create_analytics_task', task_id: analyticsTask.id, }); results.message = 'Migration complete. New task workers will pick up tasks.'; res.json(results); } catch (error: unknown) { console.error('Error during full migration:', error); res.status(500).json({ error: 'Failed to complete migration' }); } }); // ============================================================ // STAGGERED BATCH TASK CREATION // ============================================================ /** * POST /api/tasks/batch/staggered * Create multiple tasks with staggered start times * * This endpoint prevents resource contention when creating many tasks by * staggering their scheduled_for timestamps. Each task becomes eligible * for claiming only after its scheduled time. * * WORKFLOW: * 1. Tasks created with scheduled_for = NOW() + (index * stagger_seconds) * 2. Worker claims task only when scheduled_for <= NOW() * 3. Worker runs preflight on EVERY task claim * 4. If preflight passes, worker executes task * 5. If preflight fails, task released back to pending for another worker * * Body: * - dispensary_ids: number[] (required) - Array of dispensary IDs * - role: TaskRole (required) - 'product_refresh' | 'product_discovery' * - stagger_seconds: number (default: 15) - Seconds between each task start * - platform: string (default: 'dutchie') * - method: 'curl' | 'http' | null (default: null) */ router.post('/batch/staggered', async (req: Request, res: Response) => { try { const { dispensary_ids, role, stagger_seconds = 15, platform = 'dutchie', method = null, } = req.body; if (!dispensary_ids || !Array.isArray(dispensary_ids) || dispensary_ids.length === 0) { return res.status(400).json({ error: 'dispensary_ids array is required' }); } if (!role) { return res.status(400).json({ error: 'role is required' }); } const result = await taskService.createStaggeredTasks( dispensary_ids, role as TaskRole, stagger_seconds, platform, method ); const totalDuration = (dispensary_ids.length - 1) * stagger_seconds; const estimatedEndTime = new Date(Date.now() + totalDuration * 1000); res.status(201).json({ success: true, created: result.created, task_ids: result.taskIds, stagger_seconds, total_duration_seconds: totalDuration, estimated_completion: estimatedEndTime.toISOString(), message: `Created ${result.created} staggered ${role} tasks (${stagger_seconds}s apart, ~${Math.ceil(totalDuration / 60)} min total)`, }); } catch (error: unknown) { console.error('Error creating staggered tasks:', error); res.status(500).json({ error: 'Failed to create staggered tasks' }); } }); /** * POST /api/tasks/batch/az-stores * Convenience endpoint to create staggered tasks for Arizona stores * * Body: * - total_tasks: number (default: 24) - Total tasks to create * - stagger_seconds: number (default: 15) - Seconds between each task * - split_roles: boolean (default: true) - Split between product_refresh and product_discovery */ router.post('/batch/az-stores', async (req: Request, res: Response) => { try { const { total_tasks = 24, stagger_seconds = 15, split_roles = true, } = req.body; const result = await taskService.createAZStoreTasks( total_tasks, stagger_seconds, split_roles ); const totalDuration = (result.total - 1) * stagger_seconds; const estimatedEndTime = new Date(Date.now() + totalDuration * 1000); res.status(201).json({ success: true, total: result.total, product_refresh: result.product_refresh, product_discovery: result.product_discovery, task_ids: result.taskIds, stagger_seconds, total_duration_seconds: totalDuration, estimated_completion: estimatedEndTime.toISOString(), message: `Created ${result.total} staggered tasks for AZ stores (${result.product_refresh} refresh, ${result.product_discovery} discovery)`, }); } catch (error: unknown) { console.error('Error creating AZ store tasks:', error); res.status(500).json({ error: 'Failed to create AZ store tasks' }); } }); /** * POST /api/tasks/batch/entry-point-discovery * Create entry_point_discovery tasks for stores missing platform_dispensary_id * * This is idempotent - stores that already have platform_dispensary_id are skipped. * Only creates tasks for stores with menu_url set and crawl_enabled = true. * * Body (optional): * - state_code: string (optional) - Filter by state code * - stagger_seconds: number (default: 5) - Seconds between tasks * - force: boolean (default: false) - Re-run even for previously failed stores */ router.post('/batch/entry-point-discovery', async (req: Request, res: Response) => { try { const { state_code, stagger_seconds = 5, force = false, } = req.body; // Find stores that need entry point discovery const storeResult = await pool.query(` SELECT d.id, d.name, d.menu_url FROM dispensaries d JOIN states s ON d.state_id = s.id WHERE d.crawl_enabled = true AND d.menu_url IS NOT NULL AND d.platform_dispensary_id IS NULL ${state_code ? 'AND s.code = $1' : ''} ${!force ? "AND (d.id_resolution_status IS NULL OR d.id_resolution_status = 'pending')" : ''} -- No pending/running entry_point_discovery task already AND NOT EXISTS ( SELECT 1 FROM worker_tasks t WHERE t.dispensary_id = d.id AND t.role = 'entry_point_discovery' AND t.status IN ('pending', 'claimed', 'running') ) ORDER BY d.id `, state_code ? [state_code.toUpperCase()] : []); const dispensaryIds = storeResult.rows.map((r: { id: number }) => r.id); if (dispensaryIds.length === 0) { return res.json({ success: true, message: state_code ? `No ${state_code.toUpperCase()} stores need entry point discovery` : 'No stores need entry point discovery', tasks_created: 0, }); } // Create staggered tasks const taskIds: number[] = []; for (let i = 0; i < dispensaryIds.length; i++) { const scheduledFor = new Date(Date.now() + i * stagger_seconds * 1000); const result = await pool.query(` INSERT INTO worker_tasks (role, dispensary_id, priority, scheduled_for, method) VALUES ('entry_point_discovery', $1, 10, $2, 'http') RETURNING id `, [dispensaryIds[i], scheduledFor]); taskIds.push(result.rows[0].id); } const totalDuration = dispensaryIds.length * stagger_seconds; const estimatedEndTime = new Date(Date.now() + totalDuration * 1000); res.json({ success: true, tasks_created: taskIds.length, task_ids: taskIds, stores: storeResult.rows.map((r: { id: number; name: string }) => ({ id: r.id, name: r.name })), stagger_seconds, total_duration_seconds: totalDuration, estimated_completion: estimatedEndTime.toISOString(), message: `Created ${taskIds.length} entry_point_discovery tasks${state_code ? ` for ${state_code.toUpperCase()}` : ''}`, }); } catch (error: unknown) { console.error('Error creating entry point discovery tasks:', error); res.status(500).json({ error: 'Failed to create entry point discovery tasks' }); } }); // ============================================================ // STATE-BASED CRAWL ENDPOINTS // ============================================================ /** * POST /api/tasks/crawl-state/:stateCode * Create product_discovery tasks for all stores in a state * * This is the primary endpoint for triggering crawls by state. * Creates staggered tasks for all crawl-enabled stores in the specified state. * * Params: * - stateCode: State code (e.g., 'AZ', 'CA', 'CO') * * Body (optional): * - stagger_seconds: number (default: 15) - Seconds between each task * - priority: number (default: 10) - Task priority * - method: 'curl' | 'http' | null (default: 'http') * * Returns: * - tasks_created: Number of tasks created * - stores_in_state: Total stores found for the state * - skipped: Number skipped (already have active tasks) */ router.post('/crawl-state/:stateCode', async (req: Request, res: Response) => { try { const stateCode = req.params.stateCode.toUpperCase(); const { stagger_seconds = 15, priority = 10, method = 'http', } = req.body; // Verify state exists const stateResult = await pool.query(` SELECT id, code, name FROM states WHERE code = $1 `, [stateCode]); if (stateResult.rows.length === 0) { return res.status(404).json({ error: 'State not found', state_code: stateCode, }); } const state = stateResult.rows[0]; // Get all crawl-enabled dispensaries in this state const dispensariesResult = await pool.query(` SELECT d.id, d.name FROM dispensaries d WHERE d.state_id = $1 AND d.crawl_enabled = true AND d.platform_dispensary_id IS NOT NULL ORDER BY d.last_fetch_at NULLS FIRST, d.id `, [state.id]); if (dispensariesResult.rows.length === 0) { return res.status(200).json({ success: true, message: `No crawl-enabled stores found in ${state.name}`, state_code: stateCode, state_name: state.name, tasks_created: 0, stores_in_state: 0, }); } const dispensaryIds = dispensariesResult.rows.map((d: { id: number }) => d.id); // Create staggered tasks const result = await taskService.createStaggeredTasks( dispensaryIds, 'product_discovery', stagger_seconds, 'dutchie', method ); const totalDuration = (result.created - 1) * stagger_seconds; const estimatedEndTime = new Date(Date.now() + totalDuration * 1000); res.status(201).json({ success: true, state_code: stateCode, state_name: state.name, tasks_created: result.created, stores_in_state: dispensariesResult.rows.length, skipped: dispensariesResult.rows.length - result.created, stagger_seconds, total_duration_seconds: totalDuration, estimated_completion: estimatedEndTime.toISOString(), message: `Created ${result.created} product_discovery tasks for ${state.name} (${stagger_seconds}s apart, ~${Math.ceil(totalDuration / 60)} min total)`, }); } catch (error: unknown) { console.error('Error creating state crawl tasks:', error); res.status(500).json({ error: 'Failed to create state crawl tasks' }); } }); /** * GET /api/tasks/states * List all states with their store counts and crawl status */ router.get('/states', async (_req: Request, res: Response) => { try { const result = await pool.query(` SELECT s.code, s.name, COUNT(d.id)::int as total_stores, COUNT(d.id) FILTER (WHERE d.crawl_enabled = true AND d.platform_dispensary_id IS NOT NULL)::int as crawl_enabled_stores, COUNT(d.id) FILTER (WHERE d.crawl_enabled = true AND d.platform_dispensary_id IS NULL)::int as missing_platform_id, MAX(d.last_fetch_at) as last_crawl_at, (SELECT COUNT(*) FROM worker_tasks t JOIN dispensaries d2 ON t.dispensary_id = d2.id WHERE d2.state_id = s.id AND t.role = 'product_discovery' AND t.status IN ('pending', 'claimed', 'running'))::int as active_tasks FROM states s LEFT JOIN dispensaries d ON d.state_id = s.id GROUP BY s.id, s.code, s.name HAVING COUNT(d.id) > 0 ORDER BY COUNT(d.id) DESC `); res.json({ states: result.rows, total_states: result.rows.length, }); } catch (error: unknown) { console.error('Error listing states:', error); res.status(500).json({ error: 'Failed to list states' }); } }); // ============================================================ // TASK POOL MANAGEMENT // ============================================================ /** * GET /api/tasks/pool/status * Check if task pool is paused */ router.get('/pool/status', async (_req: Request, res: Response) => { const status = getTaskPoolStatus(); res.json({ success: true, ...status, }); }); /** * POST /api/tasks/pool/pause * Pause the task pool - workers won't pick up new tasks */ router.post('/pool/pause', async (_req: Request, res: Response) => { pauseTaskPool(); res.json({ success: true, paused: true, message: 'Task pool paused - workers will not pick up new tasks', }); }); /** * POST /api/tasks/pool/resume * Resume the task pool - workers will pick up tasks again */ router.post('/pool/resume', async (_req: Request, res: Response) => { resumeTaskPool(); res.json({ success: true, paused: false, message: 'Task pool resumed - workers will pick up new tasks', }); }); export default router;