Files
cannaiq/backend/src/routes/job-queue.ts
Kelly bbe039c868 feat(api): Add job queue management endpoints and fix SQL type errors
- Add GET /api/job-queue/available - list dispensaries available for crawling
- Add GET /api/job-queue/history - get recent job history with results
- Add POST /api/job-queue/enqueue-batch - queue multiple dispensaries at once
- Add POST /api/job-queue/enqueue-state - queue all crawl-enabled dispensaries for a state
- Add POST /api/job-queue/clear-pending - clear pending jobs with optional filters
- Fix SQL parameter type errors by adding explicit casts ($2::text, $3::integer)
- Fix route ordering to prevent /:id from matching /available and /history

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 14:10:55 -07:00

774 lines
23 KiB
TypeScript

/**
* Job Queue Management API Routes
*
* Endpoints for viewing and managing the crawl job queue:
* GET /api/job-queue - List all jobs (with filters)
* GET /api/job-queue/stats - Queue statistics
* GET /api/job-queue/:id - Get single job details
* PUT /api/job-queue/:id/priority - Update job priority
* POST /api/job-queue/:id/cancel - Cancel a pending job
* POST /api/job-queue/:id/retry - Retry a failed job
* POST /api/job-queue/bulk-priority - Bulk update priorities
* POST /api/job-queue/pause - Pause queue processing
* POST /api/job-queue/resume - Resume queue processing
*/
import { Router, Request, Response } from 'express';
import { pool } from '../db/pool';
const router = Router();
// In-memory queue state (would be in Redis in production)
let queuePaused = false;
/**
* GET /api/job-queue - List jobs with filters
*/
router.get('/', async (req: Request, res: Response) => {
try {
const {
status = 'pending',
limit = '50',
offset = '0',
job_type,
dispensary_id,
sort_by = 'priority',
sort_order = 'desc'
} = req.query;
let query = `
SELECT
j.id,
j.dispensary_id,
d.name as dispensary_name,
d.city,
d.state,
j.job_type,
j.trigger_type,
j.priority,
j.status,
j.scheduled_at,
j.started_at,
j.completed_at,
j.duration_ms,
j.products_found,
j.error_message,
j.retry_count,
j.max_retries,
j.worker_id,
j.locked_by,
j.created_at
FROM dispensary_crawl_jobs j
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
WHERE 1=1
`;
const params: any[] = [];
let paramIndex = 1;
if (status && status !== 'all') {
params.push(status);
query += ` AND j.status = $${paramIndex++}`;
}
if (job_type) {
params.push(job_type);
query += ` AND j.job_type = $${paramIndex++}`;
}
if (dispensary_id) {
params.push(dispensary_id);
query += ` AND j.dispensary_id = $${paramIndex++}`;
}
// Sorting
const validSortColumns = ['priority', 'created_at', 'scheduled_at', 'dispensary_name'];
const sortCol = validSortColumns.includes(sort_by as string) ? sort_by : 'priority';
const sortDir = sort_order === 'asc' ? 'ASC' : 'DESC';
if (sortCol === 'dispensary_name') {
query += ` ORDER BY d.name ${sortDir} NULLS LAST`;
} else {
query += ` ORDER BY j.${sortCol} ${sortDir} NULLS LAST`;
}
// Add secondary sort by created_at for consistent ordering
if (sortCol !== 'created_at') {
query += `, j.created_at ASC`;
}
params.push(parseInt(limit as string));
query += ` LIMIT $${paramIndex++}`;
params.push(parseInt(offset as string));
query += ` OFFSET $${paramIndex++}`;
const { rows } = await pool.query(query, params);
// Get total count for pagination
let countQuery = `
SELECT COUNT(*) as total
FROM dispensary_crawl_jobs j
WHERE 1=1
`;
const countParams: any[] = [];
let countParamIndex = 1;
if (status && status !== 'all') {
countParams.push(status);
countQuery += ` AND j.status = $${countParamIndex++}`;
}
if (job_type) {
countParams.push(job_type);
countQuery += ` AND j.job_type = $${countParamIndex++}`;
}
if (dispensary_id) {
countParams.push(dispensary_id);
countQuery += ` AND j.dispensary_id = $${countParamIndex++}`;
}
const countResult = await pool.query(countQuery, countParams);
const total = parseInt(countResult.rows[0].total);
res.json({
success: true,
jobs: rows,
total,
limit: parseInt(limit as string),
offset: parseInt(offset as string),
queue_paused: queuePaused
});
} catch (error: any) {
console.error('[JobQueue] Error listing jobs:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* GET /api/job-queue/available - List dispensaries available for crawling
* Query: { state_code?: string, limit?: number }
* NOTE: Must be defined BEFORE /:id route to avoid conflict
*/
router.get('/available', async (req: Request, res: Response) => {
try {
const { state_code, limit = '100' } = req.query;
let query = `
SELECT
d.id,
d.name,
d.city,
s.code as state_code,
d.platform_dispensary_id,
d.crawl_enabled,
(SELECT MAX(created_at) FROM dispensary_crawl_jobs WHERE dispensary_id = d.id AND status = 'completed') as last_crawl,
EXISTS (
SELECT 1 FROM dispensary_crawl_jobs
WHERE dispensary_id = d.id AND status IN ('pending', 'running')
) as has_pending_job
FROM dispensaries d
LEFT JOIN states s ON s.id = d.state_id
WHERE d.crawl_enabled = true
AND d.platform_dispensary_id IS NOT NULL
`;
const params: any[] = [];
let paramIndex = 1;
if (state_code) {
params.push((state_code as string).toUpperCase());
query += ` AND s.code = $${paramIndex++}`;
}
query += ` ORDER BY d.name LIMIT $${paramIndex}`;
params.push(parseInt(limit as string));
const { rows } = await pool.query(query, params);
// Get counts by state
const { rows: stateCounts } = await pool.query(`
SELECT s.code, COUNT(*) as count
FROM dispensaries d
JOIN states s ON s.id = d.state_id
WHERE d.crawl_enabled = true
AND d.platform_dispensary_id IS NOT NULL
GROUP BY s.code
ORDER BY count DESC
`);
res.json({
success: true,
dispensaries: rows,
total: rows.length,
by_state: stateCounts
});
} catch (error: any) {
console.error('[JobQueue] Error listing available:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* GET /api/job-queue/history - Get recent job history with results
* Query: { state_code?: string, status?: string, limit?: number, hours?: number }
* NOTE: Must be defined BEFORE /:id route to avoid conflict
*/
router.get('/history', async (req: Request, res: Response) => {
try {
const {
state_code,
status,
limit = '50',
hours = '24'
} = req.query;
let query = `
SELECT
j.id,
j.dispensary_id,
d.name as dispensary_name,
s.code as state_code,
j.job_type,
j.status,
j.products_found,
j.error_message,
j.started_at,
j.completed_at,
j.duration_ms,
j.created_at
FROM dispensary_crawl_jobs j
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
LEFT JOIN states s ON s.id = d.state_id
WHERE j.created_at > NOW() - INTERVAL '${parseInt(hours as string)} hours'
`;
const params: any[] = [];
let paramIndex = 1;
if (status && status !== 'all') {
params.push(status);
query += ` AND j.status = $${paramIndex++}`;
}
if (state_code) {
params.push((state_code as string).toUpperCase());
query += ` AND s.code = $${paramIndex++}`;
}
query += ` ORDER BY j.created_at DESC LIMIT $${paramIndex}`;
params.push(parseInt(limit as string));
const { rows } = await pool.query(query, params);
// Get summary stats
const { rows: stats } = await pool.query(`
SELECT
COUNT(*) FILTER (WHERE status = 'completed') as completed,
COUNT(*) FILTER (WHERE status = 'failed') as failed,
COUNT(*) FILTER (WHERE status = 'running') as running,
COUNT(*) FILTER (WHERE status = 'pending') as pending,
SUM(products_found) FILTER (WHERE status = 'completed') as total_products,
AVG(duration_ms) FILTER (WHERE status = 'completed') as avg_duration_ms
FROM dispensary_crawl_jobs
WHERE created_at > NOW() - INTERVAL '${parseInt(hours as string)} hours'
`);
res.json({
success: true,
jobs: rows,
summary: {
completed: parseInt(stats[0].completed) || 0,
failed: parseInt(stats[0].failed) || 0,
running: parseInt(stats[0].running) || 0,
pending: parseInt(stats[0].pending) || 0,
total_products: parseInt(stats[0].total_products) || 0,
avg_duration_ms: Math.round(parseFloat(stats[0].avg_duration_ms)) || null
},
hours: parseInt(hours as string)
});
} catch (error: any) {
console.error('[JobQueue] Error getting history:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* GET /api/job-queue/stats - Queue statistics
*/
router.get('/stats', async (_req: Request, res: Response) => {
try {
const { rows } = await pool.query(`
SELECT
COUNT(*) FILTER (WHERE status = 'pending') as pending_count,
COUNT(*) FILTER (WHERE status = 'running') as running_count,
COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours') as completed_24h,
COUNT(*) FILTER (WHERE status = 'failed' AND completed_at > NOW() - INTERVAL '24 hours') as failed_24h,
COUNT(*) FILTER (WHERE status = 'cancelled') as cancelled_count,
AVG(duration_ms) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours') as avg_duration_ms,
MAX(priority) FILTER (WHERE status = 'pending') as max_priority,
MIN(created_at) FILTER (WHERE status = 'pending') as oldest_pending
FROM dispensary_crawl_jobs
`);
const stats = rows[0];
// Get jobs by type
const { rows: byType } = await pool.query(`
SELECT job_type, COUNT(*) as count
FROM dispensary_crawl_jobs
WHERE status = 'pending'
GROUP BY job_type
ORDER BY count DESC
`);
// Get top priority jobs
const { rows: topPriority } = await pool.query(`
SELECT
j.id,
j.dispensary_id,
d.name as dispensary_name,
j.job_type,
j.priority,
j.created_at
FROM dispensary_crawl_jobs j
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
WHERE j.status = 'pending'
ORDER BY j.priority DESC, j.created_at ASC
LIMIT 5
`);
// Estimate wait time based on avg processing rate
const pendingCount = parseInt(stats.pending_count) || 0;
const avgDuration = parseFloat(stats.avg_duration_ms) || 30000; // default 30s
const runningCount = parseInt(stats.running_count) || 1;
const estimatedWaitMs = (pendingCount * avgDuration) / Math.max(runningCount, 1);
res.json({
success: true,
stats: {
pending: parseInt(stats.pending_count) || 0,
running: parseInt(stats.running_count) || 0,
completed_24h: parseInt(stats.completed_24h) || 0,
failed_24h: parseInt(stats.failed_24h) || 0,
cancelled: parseInt(stats.cancelled_count) || 0,
avg_duration_ms: Math.round(parseFloat(stats.avg_duration_ms)) || null,
max_priority: parseInt(stats.max_priority) || 0,
oldest_pending: stats.oldest_pending,
estimated_wait_ms: Math.round(estimatedWaitMs),
queue_paused: queuePaused
},
by_type: byType,
top_priority: topPriority
});
} catch (error: any) {
console.error('[JobQueue] Error getting stats:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* GET /api/job-queue/:id - Get single job
*/
router.get('/:id', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { rows } = await pool.query(`
SELECT
j.*,
d.name as dispensary_name,
d.city,
d.state,
d.menu_url
FROM dispensary_crawl_jobs j
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
WHERE j.id = $1
`, [id]);
if (rows.length === 0) {
return res.status(404).json({ success: false, error: 'Job not found' });
}
res.json({ success: true, job: rows[0] });
} catch (error: any) {
console.error('[JobQueue] Error getting job:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* PUT /api/job-queue/:id/priority - Update job priority
*/
router.put('/:id/priority', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { priority } = req.body;
if (typeof priority !== 'number' || priority < 0 || priority > 100) {
return res.status(400).json({
success: false,
error: 'Priority must be a number between 0 and 100'
});
}
const { rows } = await pool.query(`
UPDATE dispensary_crawl_jobs
SET priority = $1, updated_at = NOW()
WHERE id = $2 AND status = 'pending'
RETURNING id, priority, status
`, [priority, id]);
if (rows.length === 0) {
return res.status(404).json({
success: false,
error: 'Job not found or not in pending status'
});
}
res.json({ success: true, job: rows[0] });
} catch (error: any) {
console.error('[JobQueue] Error updating priority:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/job-queue/:id/cancel - Cancel a pending job
*/
router.post('/:id/cancel', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { rows } = await pool.query(`
UPDATE dispensary_crawl_jobs
SET status = 'cancelled', completed_at = NOW(), updated_at = NOW()
WHERE id = $1 AND status = 'pending'
RETURNING id, status
`, [id]);
if (rows.length === 0) {
return res.status(404).json({
success: false,
error: 'Job not found or not in pending status'
});
}
res.json({ success: true, job: rows[0], message: 'Job cancelled' });
} catch (error: any) {
console.error('[JobQueue] Error cancelling job:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/job-queue/:id/retry - Retry a failed job
*/
router.post('/:id/retry', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { priority } = req.body;
const { rows } = await pool.query(`
UPDATE dispensary_crawl_jobs
SET
status = 'pending',
priority = COALESCE($2, priority),
error_message = NULL,
started_at = NULL,
completed_at = NULL,
duration_ms = NULL,
worker_id = NULL,
locked_by = NULL,
locked_at = NULL,
retry_count = retry_count + 1,
updated_at = NOW()
WHERE id = $1 AND status IN ('failed', 'cancelled')
RETURNING id, status, priority, retry_count
`, [id, priority]);
if (rows.length === 0) {
return res.status(404).json({
success: false,
error: 'Job not found or not in failed/cancelled status'
});
}
res.json({ success: true, job: rows[0], message: 'Job queued for retry' });
} catch (error: any) {
console.error('[JobQueue] Error retrying job:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/job-queue/bulk-priority - Bulk update priorities
*/
router.post('/bulk-priority', async (req: Request, res: Response) => {
try {
const { jobs } = req.body; // Array of { id, priority }
if (!Array.isArray(jobs) || jobs.length === 0) {
return res.status(400).json({
success: false,
error: 'jobs array is required'
});
}
const client = await pool.connect();
try {
await client.query('BEGIN');
let updated = 0;
for (const job of jobs) {
if (typeof job.id === 'number' && typeof job.priority === 'number') {
const result = await client.query(`
UPDATE dispensary_crawl_jobs
SET priority = $1, updated_at = NOW()
WHERE id = $2 AND status = 'pending'
`, [job.priority, job.id]);
updated += result.rowCount || 0;
}
}
await client.query('COMMIT');
res.json({ success: true, updated, message: `Updated ${updated} jobs` });
} catch (err) {
await client.query('ROLLBACK');
throw err;
} finally {
client.release();
}
} catch (error: any) {
console.error('[JobQueue] Error bulk updating priorities:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/job-queue/enqueue - Add a new job to the queue
*/
router.post('/enqueue', async (req: Request, res: Response) => {
try {
const { dispensary_id, job_type = 'dutchie_product_crawl', priority = 0 } = req.body;
if (!dispensary_id) {
return res.status(400).json({ success: false, error: 'dispensary_id is required' });
}
// Check if job already pending for this dispensary
const existing = await pool.query(`
SELECT id FROM dispensary_crawl_jobs
WHERE dispensary_id = $1 AND job_type = $2 AND status = 'pending'
`, [dispensary_id, job_type]);
if (existing.rows.length > 0) {
// Update priority if higher
await pool.query(`
UPDATE dispensary_crawl_jobs
SET priority = GREATEST(priority, $1), updated_at = NOW()
WHERE id = $2
`, [priority, existing.rows[0].id]);
return res.json({
success: true,
job_id: existing.rows[0].id,
message: 'Job already queued, priority updated'
});
}
const { rows } = await pool.query(`
INSERT INTO dispensary_crawl_jobs (dispensary_id, job_type, priority, trigger_type)
VALUES ($1, $2, $3, 'manual')
RETURNING id
`, [dispensary_id, job_type, priority]);
res.json({ success: true, job_id: rows[0].id, message: 'Job enqueued' });
} catch (error: any) {
console.error('[JobQueue] Error enqueuing job:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/job-queue/pause - Pause queue processing
*/
router.post('/pause', async (_req: Request, res: Response) => {
queuePaused = true;
res.json({ success: true, queue_paused: true, message: 'Queue paused' });
});
/**
* POST /api/job-queue/resume - Resume queue processing
*/
router.post('/resume', async (_req: Request, res: Response) => {
queuePaused = false;
res.json({ success: true, queue_paused: false, message: 'Queue resumed' });
});
/**
* GET /api/job-queue/paused - Check if queue is paused
*/
router.get('/paused', async (_req: Request, res: Response) => {
res.json({ success: true, queue_paused: queuePaused });
});
/**
* POST /api/job-queue/enqueue-batch - Queue multiple dispensaries at once
* Body: { dispensary_ids: number[], job_type?: string, priority?: number }
*/
router.post('/enqueue-batch', async (req: Request, res: Response) => {
try {
const { dispensary_ids, job_type = 'dutchie_product_crawl', priority = 0 } = req.body;
if (!Array.isArray(dispensary_ids) || dispensary_ids.length === 0) {
return res.status(400).json({ success: false, error: 'dispensary_ids array is required' });
}
if (dispensary_ids.length > 500) {
return res.status(400).json({ success: false, error: 'Maximum 500 dispensaries per batch' });
}
// Insert jobs, skipping duplicates
const { rows } = await pool.query(`
INSERT INTO dispensary_crawl_jobs (dispensary_id, job_type, priority, trigger_type, status, created_at)
SELECT
d.id,
$2::text,
$3::integer,
'api_batch',
'pending',
NOW()
FROM dispensaries d
WHERE d.id = ANY($1::int[])
AND d.crawl_enabled = true
AND d.platform_dispensary_id IS NOT NULL
AND NOT EXISTS (
SELECT 1 FROM dispensary_crawl_jobs cj
WHERE cj.dispensary_id = d.id
AND cj.job_type = $2::text
AND cj.status IN ('pending', 'running')
)
RETURNING id, dispensary_id
`, [dispensary_ids, job_type, priority]);
res.json({
success: true,
queued: rows.length,
requested: dispensary_ids.length,
job_ids: rows.map(r => r.id),
message: `Queued ${rows.length} of ${dispensary_ids.length} dispensaries`
});
} catch (error: any) {
console.error('[JobQueue] Error batch enqueuing:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/job-queue/enqueue-state - Queue all crawl-enabled dispensaries for a state
* Body: { state_code: string, job_type?: string, priority?: number, limit?: number }
*/
router.post('/enqueue-state', async (req: Request, res: Response) => {
try {
const { state_code, job_type = 'dutchie_product_crawl', priority = 0, limit = 200 } = req.body;
if (!state_code) {
return res.status(400).json({ success: false, error: 'state_code is required (e.g., "AZ")' });
}
// Get state_id and queue jobs
const { rows } = await pool.query(`
WITH target_state AS (
SELECT id FROM states WHERE code = $1
)
INSERT INTO dispensary_crawl_jobs (dispensary_id, job_type, priority, trigger_type, status, created_at)
SELECT
d.id,
$2::text,
$3::integer,
'api_state',
'pending',
NOW()
FROM dispensaries d, target_state
WHERE d.state_id = target_state.id
AND d.crawl_enabled = true
AND d.platform_dispensary_id IS NOT NULL
AND NOT EXISTS (
SELECT 1 FROM dispensary_crawl_jobs cj
WHERE cj.dispensary_id = d.id
AND cj.job_type = $2::text
AND cj.status IN ('pending', 'running')
)
LIMIT $4::integer
RETURNING id, dispensary_id
`, [state_code.toUpperCase(), job_type, priority, limit]);
// Get total available count
const countResult = await pool.query(`
WITH target_state AS (
SELECT id FROM states WHERE code = $1
)
SELECT COUNT(*) as total
FROM dispensaries d, target_state
WHERE d.state_id = target_state.id
AND d.crawl_enabled = true
AND d.platform_dispensary_id IS NOT NULL
`, [state_code.toUpperCase()]);
res.json({
success: true,
queued: rows.length,
total_available: parseInt(countResult.rows[0].total),
state: state_code.toUpperCase(),
job_type,
message: `Queued ${rows.length} dispensaries for ${state_code.toUpperCase()}`
});
} catch (error: any) {
console.error('[JobQueue] Error enqueuing state:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/job-queue/clear-pending - Clear all pending jobs (optionally filtered)
* Body: { state_code?: string, job_type?: string }
*/
router.post('/clear-pending', async (req: Request, res: Response) => {
try {
const { state_code, job_type } = req.body;
let query = `
UPDATE dispensary_crawl_jobs
SET status = 'cancelled', completed_at = NOW(), updated_at = NOW()
WHERE status = 'pending'
`;
const params: any[] = [];
let paramIndex = 1;
if (job_type) {
params.push(job_type);
query += ` AND job_type = $${paramIndex++}`;
}
if (state_code) {
params.push((state_code as string).toUpperCase());
query += ` AND dispensary_id IN (
SELECT d.id FROM dispensaries d
JOIN states s ON s.id = d.state_id
WHERE s.code = $${paramIndex++}
)`;
}
const result = await pool.query(query, params);
res.json({
success: true,
cleared: result.rowCount,
message: `Cancelled ${result.rowCount} pending jobs`
});
} catch (error: any) {
console.error('[JobQueue] Error clearing pending:', error);
res.status(500).json({ success: false, error: error.message });
}
});
export default router;
export { queuePaused };