/** * Job Queue Management API Routes * * Endpoints for viewing and managing the crawl job queue: * GET /api/job-queue - List all jobs (with filters) * GET /api/job-queue/stats - Queue statistics * GET /api/job-queue/:id - Get single job details * PUT /api/job-queue/:id/priority - Update job priority * POST /api/job-queue/:id/cancel - Cancel a pending job * POST /api/job-queue/:id/retry - Retry a failed job * POST /api/job-queue/bulk-priority - Bulk update priorities * POST /api/job-queue/pause - Pause queue processing * POST /api/job-queue/resume - Resume queue processing */ import { Router, Request, Response } from 'express'; import { pool } from '../db/pool'; const router = Router(); // In-memory queue state (would be in Redis in production) let queuePaused = false; /** * GET /api/job-queue - List jobs with filters */ router.get('/', async (req: Request, res: Response) => { try { const { status = 'pending', limit = '50', offset = '0', job_type, dispensary_id, sort_by = 'priority', sort_order = 'desc' } = req.query; let query = ` SELECT j.id, j.dispensary_id, d.name as dispensary_name, d.city, d.state, j.job_type, j.trigger_type, j.priority, j.status, j.scheduled_at, j.started_at, j.completed_at, j.duration_ms, j.products_found, j.error_message, j.retry_count, j.max_retries, j.worker_id, j.locked_by, j.created_at FROM dispensary_crawl_jobs j LEFT JOIN dispensaries d ON d.id = j.dispensary_id WHERE 1=1 `; const params: any[] = []; let paramIndex = 1; if (status && status !== 'all') { params.push(status); query += ` AND j.status = $${paramIndex++}`; } if (job_type) { params.push(job_type); query += ` AND j.job_type = $${paramIndex++}`; } if (dispensary_id) { params.push(dispensary_id); query += ` AND j.dispensary_id = $${paramIndex++}`; } // Sorting const validSortColumns = ['priority', 'created_at', 'scheduled_at', 'dispensary_name']; const sortCol = validSortColumns.includes(sort_by as string) ? sort_by : 'priority'; const sortDir = sort_order === 'asc' ? 'ASC' : 'DESC'; if (sortCol === 'dispensary_name') { query += ` ORDER BY d.name ${sortDir} NULLS LAST`; } else { query += ` ORDER BY j.${sortCol} ${sortDir} NULLS LAST`; } // Add secondary sort by created_at for consistent ordering if (sortCol !== 'created_at') { query += `, j.created_at ASC`; } params.push(parseInt(limit as string)); query += ` LIMIT $${paramIndex++}`; params.push(parseInt(offset as string)); query += ` OFFSET $${paramIndex++}`; const { rows } = await pool.query(query, params); // Get total count for pagination let countQuery = ` SELECT COUNT(*) as total FROM dispensary_crawl_jobs j WHERE 1=1 `; const countParams: any[] = []; let countParamIndex = 1; if (status && status !== 'all') { countParams.push(status); countQuery += ` AND j.status = $${countParamIndex++}`; } if (job_type) { countParams.push(job_type); countQuery += ` AND j.job_type = $${countParamIndex++}`; } if (dispensary_id) { countParams.push(dispensary_id); countQuery += ` AND j.dispensary_id = $${countParamIndex++}`; } const countResult = await pool.query(countQuery, countParams); const total = parseInt(countResult.rows[0].total); res.json({ success: true, jobs: rows, total, limit: parseInt(limit as string), offset: parseInt(offset as string), queue_paused: queuePaused }); } catch (error: any) { console.error('[JobQueue] Error listing jobs:', error); res.status(500).json({ success: false, error: error.message }); } }); /** * GET /api/job-queue/available - List dispensaries available for crawling * Query: { state_code?: string, limit?: number } * NOTE: Must be defined BEFORE /:id route to avoid conflict */ router.get('/available', async (req: Request, res: Response) => { try { const { state_code, limit = '100' } = req.query; let query = ` SELECT d.id, d.name, d.city, s.code as state_code, d.platform_dispensary_id, d.crawl_enabled, (SELECT MAX(created_at) FROM dispensary_crawl_jobs WHERE dispensary_id = d.id AND status = 'completed') as last_crawl, EXISTS ( SELECT 1 FROM dispensary_crawl_jobs WHERE dispensary_id = d.id AND status IN ('pending', 'running') ) as has_pending_job FROM dispensaries d LEFT JOIN states s ON s.id = d.state_id WHERE d.crawl_enabled = true AND d.platform_dispensary_id IS NOT NULL `; const params: any[] = []; let paramIndex = 1; if (state_code) { params.push((state_code as string).toUpperCase()); query += ` AND s.code = $${paramIndex++}`; } query += ` ORDER BY d.name LIMIT $${paramIndex}`; params.push(parseInt(limit as string)); const { rows } = await pool.query(query, params); // Get counts by state const { rows: stateCounts } = await pool.query(` SELECT s.code, COUNT(*) as count FROM dispensaries d JOIN states s ON s.id = d.state_id WHERE d.crawl_enabled = true AND d.platform_dispensary_id IS NOT NULL GROUP BY s.code ORDER BY count DESC `); res.json({ success: true, dispensaries: rows, total: rows.length, by_state: stateCounts }); } catch (error: any) { console.error('[JobQueue] Error listing available:', error); res.status(500).json({ success: false, error: error.message }); } }); /** * GET /api/job-queue/history - Get recent job history with results * Query: { state_code?: string, status?: string, limit?: number, hours?: number } * NOTE: Must be defined BEFORE /:id route to avoid conflict */ router.get('/history', async (req: Request, res: Response) => { try { const { state_code, status, limit = '50', hours = '24' } = req.query; let query = ` SELECT j.id, j.dispensary_id, d.name as dispensary_name, s.code as state_code, j.job_type, j.status, j.products_found, j.error_message, j.started_at, j.completed_at, j.duration_ms, j.created_at FROM dispensary_crawl_jobs j LEFT JOIN dispensaries d ON d.id = j.dispensary_id LEFT JOIN states s ON s.id = d.state_id WHERE j.created_at > NOW() - INTERVAL '${parseInt(hours as string)} hours' `; const params: any[] = []; let paramIndex = 1; if (status && status !== 'all') { params.push(status); query += ` AND j.status = $${paramIndex++}`; } if (state_code) { params.push((state_code as string).toUpperCase()); query += ` AND s.code = $${paramIndex++}`; } query += ` ORDER BY j.created_at DESC LIMIT $${paramIndex}`; params.push(parseInt(limit as string)); const { rows } = await pool.query(query, params); // Get summary stats const { rows: stats } = await pool.query(` SELECT COUNT(*) FILTER (WHERE status = 'completed') as completed, COUNT(*) FILTER (WHERE status = 'failed') as failed, COUNT(*) FILTER (WHERE status = 'running') as running, COUNT(*) FILTER (WHERE status = 'pending') as pending, SUM(products_found) FILTER (WHERE status = 'completed') as total_products, AVG(duration_ms) FILTER (WHERE status = 'completed') as avg_duration_ms FROM dispensary_crawl_jobs WHERE created_at > NOW() - INTERVAL '${parseInt(hours as string)} hours' `); res.json({ success: true, jobs: rows, summary: { completed: parseInt(stats[0].completed) || 0, failed: parseInt(stats[0].failed) || 0, running: parseInt(stats[0].running) || 0, pending: parseInt(stats[0].pending) || 0, total_products: parseInt(stats[0].total_products) || 0, avg_duration_ms: Math.round(parseFloat(stats[0].avg_duration_ms)) || null }, hours: parseInt(hours as string) }); } catch (error: any) { console.error('[JobQueue] Error getting history:', error); res.status(500).json({ success: false, error: error.message }); } }); /** * GET /api/job-queue/stats - Queue statistics */ router.get('/stats', async (_req: Request, res: Response) => { try { const { rows } = await pool.query(` SELECT COUNT(*) FILTER (WHERE status = 'pending') as pending_count, COUNT(*) FILTER (WHERE status = 'running') as running_count, COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours') as completed_24h, COUNT(*) FILTER (WHERE status = 'failed' AND completed_at > NOW() - INTERVAL '24 hours') as failed_24h, COUNT(*) FILTER (WHERE status = 'cancelled') as cancelled_count, AVG(duration_ms) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours') as avg_duration_ms, MAX(priority) FILTER (WHERE status = 'pending') as max_priority, MIN(created_at) FILTER (WHERE status = 'pending') as oldest_pending FROM dispensary_crawl_jobs `); const stats = rows[0]; // Get jobs by type const { rows: byType } = await pool.query(` SELECT job_type, COUNT(*) as count FROM dispensary_crawl_jobs WHERE status = 'pending' GROUP BY job_type ORDER BY count DESC `); // Get top priority jobs const { rows: topPriority } = await pool.query(` SELECT j.id, j.dispensary_id, d.name as dispensary_name, j.job_type, j.priority, j.created_at FROM dispensary_crawl_jobs j LEFT JOIN dispensaries d ON d.id = j.dispensary_id WHERE j.status = 'pending' ORDER BY j.priority DESC, j.created_at ASC LIMIT 5 `); // Estimate wait time based on avg processing rate const pendingCount = parseInt(stats.pending_count) || 0; const avgDuration = parseFloat(stats.avg_duration_ms) || 30000; // default 30s const runningCount = parseInt(stats.running_count) || 1; const estimatedWaitMs = (pendingCount * avgDuration) / Math.max(runningCount, 1); res.json({ success: true, stats: { pending: parseInt(stats.pending_count) || 0, running: parseInt(stats.running_count) || 0, completed_24h: parseInt(stats.completed_24h) || 0, failed_24h: parseInt(stats.failed_24h) || 0, cancelled: parseInt(stats.cancelled_count) || 0, avg_duration_ms: Math.round(parseFloat(stats.avg_duration_ms)) || null, max_priority: parseInt(stats.max_priority) || 0, oldest_pending: stats.oldest_pending, estimated_wait_ms: Math.round(estimatedWaitMs), queue_paused: queuePaused }, by_type: byType, top_priority: topPriority }); } catch (error: any) { console.error('[JobQueue] Error getting stats:', error); res.status(500).json({ success: false, error: error.message }); } }); /** * GET /api/job-queue/:id - Get single job */ router.get('/:id', async (req: Request, res: Response) => { try { const { id } = req.params; const { rows } = await pool.query(` SELECT j.*, d.name as dispensary_name, d.city, d.state, d.menu_url FROM dispensary_crawl_jobs j LEFT JOIN dispensaries d ON d.id = j.dispensary_id WHERE j.id = $1 `, [id]); if (rows.length === 0) { return res.status(404).json({ success: false, error: 'Job not found' }); } res.json({ success: true, job: rows[0] }); } catch (error: any) { console.error('[JobQueue] Error getting job:', error); res.status(500).json({ success: false, error: error.message }); } }); /** * PUT /api/job-queue/:id/priority - Update job priority */ router.put('/:id/priority', async (req: Request, res: Response) => { try { const { id } = req.params; const { priority } = req.body; if (typeof priority !== 'number' || priority < 0 || priority > 100) { return res.status(400).json({ success: false, error: 'Priority must be a number between 0 and 100' }); } const { rows } = await pool.query(` UPDATE dispensary_crawl_jobs SET priority = $1, updated_at = NOW() WHERE id = $2 AND status = 'pending' RETURNING id, priority, status `, [priority, id]); if (rows.length === 0) { return res.status(404).json({ success: false, error: 'Job not found or not in pending status' }); } res.json({ success: true, job: rows[0] }); } catch (error: any) { console.error('[JobQueue] Error updating priority:', error); res.status(500).json({ success: false, error: error.message }); } }); /** * POST /api/job-queue/:id/cancel - Cancel a pending job */ router.post('/:id/cancel', async (req: Request, res: Response) => { try { const { id } = req.params; const { rows } = await pool.query(` UPDATE dispensary_crawl_jobs SET status = 'cancelled', completed_at = NOW(), updated_at = NOW() WHERE id = $1 AND status = 'pending' RETURNING id, status `, [id]); if (rows.length === 0) { return res.status(404).json({ success: false, error: 'Job not found or not in pending status' }); } res.json({ success: true, job: rows[0], message: 'Job cancelled' }); } catch (error: any) { console.error('[JobQueue] Error cancelling job:', error); res.status(500).json({ success: false, error: error.message }); } }); /** * POST /api/job-queue/:id/retry - Retry a failed job */ router.post('/:id/retry', async (req: Request, res: Response) => { try { const { id } = req.params; const { priority } = req.body; const { rows } = await pool.query(` UPDATE dispensary_crawl_jobs SET status = 'pending', priority = COALESCE($2, priority), error_message = NULL, started_at = NULL, completed_at = NULL, duration_ms = NULL, worker_id = NULL, locked_by = NULL, locked_at = NULL, retry_count = retry_count + 1, updated_at = NOW() WHERE id = $1 AND status IN ('failed', 'cancelled') RETURNING id, status, priority, retry_count `, [id, priority]); if (rows.length === 0) { return res.status(404).json({ success: false, error: 'Job not found or not in failed/cancelled status' }); } res.json({ success: true, job: rows[0], message: 'Job queued for retry' }); } catch (error: any) { console.error('[JobQueue] Error retrying job:', error); res.status(500).json({ success: false, error: error.message }); } }); /** * POST /api/job-queue/bulk-priority - Bulk update priorities */ router.post('/bulk-priority', async (req: Request, res: Response) => { try { const { jobs } = req.body; // Array of { id, priority } if (!Array.isArray(jobs) || jobs.length === 0) { return res.status(400).json({ success: false, error: 'jobs array is required' }); } const client = await pool.connect(); try { await client.query('BEGIN'); let updated = 0; for (const job of jobs) { if (typeof job.id === 'number' && typeof job.priority === 'number') { const result = await client.query(` UPDATE dispensary_crawl_jobs SET priority = $1, updated_at = NOW() WHERE id = $2 AND status = 'pending' `, [job.priority, job.id]); updated += result.rowCount || 0; } } await client.query('COMMIT'); res.json({ success: true, updated, message: `Updated ${updated} jobs` }); } catch (err) { await client.query('ROLLBACK'); throw err; } finally { client.release(); } } catch (error: any) { console.error('[JobQueue] Error bulk updating priorities:', error); res.status(500).json({ success: false, error: error.message }); } }); /** * POST /api/job-queue/enqueue - Add a new job to the queue * * 2024-12-10: Rewired to use worker_tasks via taskService. * Legacy dispensary_crawl_jobs code commented out below. */ router.post('/enqueue', async (req: Request, res: Response) => { try { const { dispensary_id, job_type = 'dutchie_product_crawl', priority = 0 } = req.body; if (!dispensary_id) { return res.status(400).json({ success: false, error: 'dispensary_id is required' }); } // 2024-12-10: Map legacy job_type to new task role const roleMap: Record = { 'dutchie_product_crawl': 'product_refresh', 'menu_detection': 'entry_point_discovery', 'menu_detection_single': 'entry_point_discovery', 'product_discovery': 'product_discovery', 'store_discovery': 'store_discovery', }; const role = roleMap[job_type] || 'product_refresh'; // 2024-12-10: Use taskService to create task in worker_tasks table const { taskService } = await import('../tasks/task-service'); // Check if task already pending for this dispensary const existingTasks = await taskService.listTasks({ dispensary_id, role: role as any, status: ['pending', 'claimed', 'running'], limit: 1, }); if (existingTasks.length > 0) { return res.json({ success: true, task_id: existingTasks[0].id, message: 'Task already queued' }); } const task = await taskService.createTask({ role: role as any, dispensary_id, priority, }); res.json({ success: true, task_id: task.id, message: 'Task enqueued' }); } catch (error: any) { console.error('[JobQueue] Error enqueuing task:', error); res.status(500).json({ success: false, error: error.message }); } }); /* * LEGACY CODE - 2024-12-10: Commented out, was using orphaned dispensary_crawl_jobs table * router.post('/enqueue', async (req: Request, res: Response) => { try { const { dispensary_id, job_type = 'dutchie_product_crawl', priority = 0 } = req.body; if (!dispensary_id) { return res.status(400).json({ success: false, error: 'dispensary_id is required' }); } // Check if job already pending for this dispensary const existing = await pool.query(` SELECT id FROM dispensary_crawl_jobs WHERE dispensary_id = $1 AND job_type = $2 AND status = 'pending' `, [dispensary_id, job_type]); if (existing.rows.length > 0) { // Update priority if higher await pool.query(` UPDATE dispensary_crawl_jobs SET priority = GREATEST(priority, $1), updated_at = NOW() WHERE id = $2 `, [priority, existing.rows[0].id]); return res.json({ success: true, job_id: existing.rows[0].id, message: 'Job already queued, priority updated' }); } const { rows } = await pool.query(` INSERT INTO dispensary_crawl_jobs (dispensary_id, job_type, priority, trigger_type) VALUES ($1, $2, $3, 'manual') RETURNING id `, [dispensary_id, job_type, priority]); res.json({ success: true, job_id: rows[0].id, message: 'Job enqueued' }); } catch (error: any) { console.error('[JobQueue] Error enqueuing job:', error); res.status(500).json({ success: false, error: error.message }); } }); */ /** * POST /api/job-queue/pause - Pause queue processing */ router.post('/pause', async (_req: Request, res: Response) => { queuePaused = true; res.json({ success: true, queue_paused: true, message: 'Queue paused' }); }); /** * POST /api/job-queue/resume - Resume queue processing */ router.post('/resume', async (_req: Request, res: Response) => { queuePaused = false; res.json({ success: true, queue_paused: false, message: 'Queue resumed' }); }); /** * GET /api/job-queue/paused - Check if queue is paused */ router.get('/paused', async (_req: Request, res: Response) => { res.json({ success: true, queue_paused: queuePaused }); }); /** * POST /api/job-queue/enqueue-batch - Queue multiple dispensaries at once * Body: { dispensary_ids: number[], job_type?: string, priority?: number } * * 2024-12-10: Rewired to use worker_tasks via taskService. */ router.post('/enqueue-batch', async (req: Request, res: Response) => { try { const { dispensary_ids, job_type = 'dutchie_product_crawl', priority = 0 } = req.body; if (!Array.isArray(dispensary_ids) || dispensary_ids.length === 0) { return res.status(400).json({ success: false, error: 'dispensary_ids array is required' }); } if (dispensary_ids.length > 500) { return res.status(400).json({ success: false, error: 'Maximum 500 dispensaries per batch' }); } // 2024-12-10: Map legacy job_type to new task role const roleMap: Record = { 'dutchie_product_crawl': 'product_refresh', 'menu_detection': 'entry_point_discovery', 'product_discovery': 'product_discovery', }; const role = roleMap[job_type] || 'product_refresh'; // 2024-12-10: Use taskService to create tasks in worker_tasks table const { taskService } = await import('../tasks/task-service'); const tasks = dispensary_ids.map(dispensary_id => ({ role: role as any, dispensary_id, priority, })); const createdCount = await taskService.createTasks(tasks); res.json({ success: true, queued: createdCount, requested: dispensary_ids.length, message: `Queued ${createdCount} of ${dispensary_ids.length} dispensaries` }); } catch (error: any) { console.error('[JobQueue] Error batch enqueuing:', error); res.status(500).json({ success: false, error: error.message }); } }); /** * POST /api/job-queue/enqueue-state - Queue all crawl-enabled dispensaries for a state * Body: { state_code: string, job_type?: string, priority?: number, limit?: number } * * 2024-12-10: Rewired to use worker_tasks via taskService. */ router.post('/enqueue-state', async (req: Request, res: Response) => { try { const { state_code, job_type = 'dutchie_product_crawl', priority = 0, limit = 200 } = req.body; if (!state_code) { return res.status(400).json({ success: false, error: 'state_code is required (e.g., "AZ")' }); } // 2024-12-10: Map legacy job_type to new task role const roleMap: Record = { 'dutchie_product_crawl': 'product_refresh', 'menu_detection': 'entry_point_discovery', 'product_discovery': 'product_discovery', }; const role = roleMap[job_type] || 'product_refresh'; // Get dispensary IDs for the state const dispensaryResult = await pool.query(` SELECT d.id FROM dispensaries d JOIN states s ON s.id = d.state_id WHERE s.code = $1 AND d.crawl_enabled = true AND d.platform_dispensary_id IS NOT NULL LIMIT $2 `, [state_code.toUpperCase(), limit]); const dispensary_ids = dispensaryResult.rows.map((r: any) => r.id); // 2024-12-10: Use taskService to create tasks in worker_tasks table const { taskService } = await import('../tasks/task-service'); const tasks = dispensary_ids.map((dispensary_id: number) => ({ role: role as any, dispensary_id, priority, })); const createdCount = await taskService.createTasks(tasks); // Get total available count const countResult = await pool.query(` SELECT COUNT(*) as total FROM dispensaries d JOIN states s ON s.id = d.state_id WHERE s.code = $1 AND d.crawl_enabled = true AND d.platform_dispensary_id IS NOT NULL `, [state_code.toUpperCase()]); res.json({ success: true, queued: createdCount, total_available: parseInt(countResult.rows[0].total), state: state_code.toUpperCase(), role, message: `Queued ${createdCount} dispensaries for ${state_code.toUpperCase()}` }); } catch (error: any) { console.error('[JobQueue] Error enqueuing state:', error); res.status(500).json({ success: false, error: error.message }); } }); /** * POST /api/job-queue/clear-pending - Clear all pending jobs (optionally filtered) * Body: { state_code?: string, job_type?: string } */ router.post('/clear-pending', async (req: Request, res: Response) => { try { const { state_code, job_type } = req.body; let query = ` UPDATE dispensary_crawl_jobs SET status = 'cancelled', completed_at = NOW(), updated_at = NOW() WHERE status = 'pending' `; const params: any[] = []; let paramIndex = 1; if (job_type) { params.push(job_type); query += ` AND job_type = $${paramIndex++}`; } if (state_code) { params.push((state_code as string).toUpperCase()); query += ` AND dispensary_id IN ( SELECT d.id FROM dispensaries d JOIN states s ON s.id = d.state_id WHERE s.code = $${paramIndex++} )`; } const result = await pool.query(query, params); res.json({ success: true, cleared: result.rowCount, message: `Cancelled ${result.rowCount} pending jobs` }); } catch (error: any) { console.error('[JobQueue] Error clearing pending:', error); res.status(500).json({ success: false, error: error.message }); } }); export default router; export { queuePaused };