feat: SEO template library, discovery pipeline, and orchestrator enhancements
## SEO Template Library - Add complete template library with 7 page types (state, city, category, brand, product, search, regeneration) - Add Template Library tab in SEO Orchestrator with accordion-based editors - Add template preview, validation, and variable injection engine - Add API endpoints: /api/seo/templates, preview, validate, generate, regenerate ## Discovery Pipeline - Add promotion.ts for discovery location validation and promotion - Add discover-all-states.ts script for multi-state discovery - Add promotion log migration (067) - Enhance discovery routes and types ## Orchestrator & Admin - Add crawl_enabled filter to stores page - Add API permissions page - Add job queue management - Add price analytics routes - Add markets and intelligence routes - Enhance dashboard and worker monitoring ## Infrastructure - Add migrations for worker definitions, SEO settings, field alignment - Add canonical pipeline for scraper v2 - Update hydration and sync orchestrator - Enhance multi-state query service 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
467
backend/src/routes/job-queue.ts
Normal file
467
backend/src/routes/job-queue.ts
Normal file
@@ -0,0 +1,467 @@
|
||||
/**
|
||||
* Job Queue Management API Routes
|
||||
*
|
||||
* Endpoints for viewing and managing the crawl job queue:
|
||||
* GET /api/job-queue - List all jobs (with filters)
|
||||
* GET /api/job-queue/stats - Queue statistics
|
||||
* GET /api/job-queue/:id - Get single job details
|
||||
* PUT /api/job-queue/:id/priority - Update job priority
|
||||
* POST /api/job-queue/:id/cancel - Cancel a pending job
|
||||
* POST /api/job-queue/:id/retry - Retry a failed job
|
||||
* POST /api/job-queue/bulk-priority - Bulk update priorities
|
||||
* POST /api/job-queue/pause - Pause queue processing
|
||||
* POST /api/job-queue/resume - Resume queue processing
|
||||
*/
|
||||
|
||||
import { Router, Request, Response } from 'express';
|
||||
import { pool } from '../db/pool';
|
||||
|
||||
const router = Router();
|
||||
|
||||
// In-memory queue state (would be in Redis in production)
|
||||
let queuePaused = false;
|
||||
|
||||
/**
|
||||
* GET /api/job-queue - List jobs with filters
|
||||
*/
|
||||
router.get('/', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const {
|
||||
status = 'pending',
|
||||
limit = '50',
|
||||
offset = '0',
|
||||
job_type,
|
||||
dispensary_id,
|
||||
sort_by = 'priority',
|
||||
sort_order = 'desc'
|
||||
} = req.query;
|
||||
|
||||
let query = `
|
||||
SELECT
|
||||
j.id,
|
||||
j.dispensary_id,
|
||||
d.name as dispensary_name,
|
||||
d.city,
|
||||
d.state,
|
||||
j.job_type,
|
||||
j.trigger_type,
|
||||
j.priority,
|
||||
j.status,
|
||||
j.scheduled_at,
|
||||
j.started_at,
|
||||
j.completed_at,
|
||||
j.duration_ms,
|
||||
j.products_found,
|
||||
j.error_message,
|
||||
j.retry_count,
|
||||
j.max_retries,
|
||||
j.worker_id,
|
||||
j.locked_by,
|
||||
j.created_at
|
||||
FROM dispensary_crawl_jobs j
|
||||
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
|
||||
WHERE 1=1
|
||||
`;
|
||||
const params: any[] = [];
|
||||
let paramIndex = 1;
|
||||
|
||||
if (status && status !== 'all') {
|
||||
params.push(status);
|
||||
query += ` AND j.status = $${paramIndex++}`;
|
||||
}
|
||||
|
||||
if (job_type) {
|
||||
params.push(job_type);
|
||||
query += ` AND j.job_type = $${paramIndex++}`;
|
||||
}
|
||||
|
||||
if (dispensary_id) {
|
||||
params.push(dispensary_id);
|
||||
query += ` AND j.dispensary_id = $${paramIndex++}`;
|
||||
}
|
||||
|
||||
// Sorting
|
||||
const validSortColumns = ['priority', 'created_at', 'scheduled_at', 'dispensary_name'];
|
||||
const sortCol = validSortColumns.includes(sort_by as string) ? sort_by : 'priority';
|
||||
const sortDir = sort_order === 'asc' ? 'ASC' : 'DESC';
|
||||
|
||||
if (sortCol === 'dispensary_name') {
|
||||
query += ` ORDER BY d.name ${sortDir} NULLS LAST`;
|
||||
} else {
|
||||
query += ` ORDER BY j.${sortCol} ${sortDir} NULLS LAST`;
|
||||
}
|
||||
|
||||
// Add secondary sort by created_at for consistent ordering
|
||||
if (sortCol !== 'created_at') {
|
||||
query += `, j.created_at ASC`;
|
||||
}
|
||||
|
||||
params.push(parseInt(limit as string));
|
||||
query += ` LIMIT $${paramIndex++}`;
|
||||
|
||||
params.push(parseInt(offset as string));
|
||||
query += ` OFFSET $${paramIndex++}`;
|
||||
|
||||
const { rows } = await pool.query(query, params);
|
||||
|
||||
// Get total count for pagination
|
||||
let countQuery = `
|
||||
SELECT COUNT(*) as total
|
||||
FROM dispensary_crawl_jobs j
|
||||
WHERE 1=1
|
||||
`;
|
||||
const countParams: any[] = [];
|
||||
let countParamIndex = 1;
|
||||
|
||||
if (status && status !== 'all') {
|
||||
countParams.push(status);
|
||||
countQuery += ` AND j.status = $${countParamIndex++}`;
|
||||
}
|
||||
if (job_type) {
|
||||
countParams.push(job_type);
|
||||
countQuery += ` AND j.job_type = $${countParamIndex++}`;
|
||||
}
|
||||
if (dispensary_id) {
|
||||
countParams.push(dispensary_id);
|
||||
countQuery += ` AND j.dispensary_id = $${countParamIndex++}`;
|
||||
}
|
||||
|
||||
const countResult = await pool.query(countQuery, countParams);
|
||||
const total = parseInt(countResult.rows[0].total);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
jobs: rows,
|
||||
total,
|
||||
limit: parseInt(limit as string),
|
||||
offset: parseInt(offset as string),
|
||||
queue_paused: queuePaused
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('[JobQueue] Error listing jobs:', error);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/job-queue/stats - Queue statistics
|
||||
*/
|
||||
router.get('/stats', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const { rows } = await pool.query(`
|
||||
SELECT
|
||||
COUNT(*) FILTER (WHERE status = 'pending') as pending_count,
|
||||
COUNT(*) FILTER (WHERE status = 'running') as running_count,
|
||||
COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours') as completed_24h,
|
||||
COUNT(*) FILTER (WHERE status = 'failed' AND completed_at > NOW() - INTERVAL '24 hours') as failed_24h,
|
||||
COUNT(*) FILTER (WHERE status = 'cancelled') as cancelled_count,
|
||||
AVG(duration_ms) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours') as avg_duration_ms,
|
||||
MAX(priority) FILTER (WHERE status = 'pending') as max_priority,
|
||||
MIN(created_at) FILTER (WHERE status = 'pending') as oldest_pending
|
||||
FROM dispensary_crawl_jobs
|
||||
`);
|
||||
|
||||
const stats = rows[0];
|
||||
|
||||
// Get jobs by type
|
||||
const { rows: byType } = await pool.query(`
|
||||
SELECT job_type, COUNT(*) as count
|
||||
FROM dispensary_crawl_jobs
|
||||
WHERE status = 'pending'
|
||||
GROUP BY job_type
|
||||
ORDER BY count DESC
|
||||
`);
|
||||
|
||||
// Get top priority jobs
|
||||
const { rows: topPriority } = await pool.query(`
|
||||
SELECT
|
||||
j.id,
|
||||
j.dispensary_id,
|
||||
d.name as dispensary_name,
|
||||
j.job_type,
|
||||
j.priority,
|
||||
j.created_at
|
||||
FROM dispensary_crawl_jobs j
|
||||
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
|
||||
WHERE j.status = 'pending'
|
||||
ORDER BY j.priority DESC, j.created_at ASC
|
||||
LIMIT 5
|
||||
`);
|
||||
|
||||
// Estimate wait time based on avg processing rate
|
||||
const pendingCount = parseInt(stats.pending_count) || 0;
|
||||
const avgDuration = parseFloat(stats.avg_duration_ms) || 30000; // default 30s
|
||||
const runningCount = parseInt(stats.running_count) || 1;
|
||||
const estimatedWaitMs = (pendingCount * avgDuration) / Math.max(runningCount, 1);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
stats: {
|
||||
pending: parseInt(stats.pending_count) || 0,
|
||||
running: parseInt(stats.running_count) || 0,
|
||||
completed_24h: parseInt(stats.completed_24h) || 0,
|
||||
failed_24h: parseInt(stats.failed_24h) || 0,
|
||||
cancelled: parseInt(stats.cancelled_count) || 0,
|
||||
avg_duration_ms: Math.round(parseFloat(stats.avg_duration_ms)) || null,
|
||||
max_priority: parseInt(stats.max_priority) || 0,
|
||||
oldest_pending: stats.oldest_pending,
|
||||
estimated_wait_ms: Math.round(estimatedWaitMs),
|
||||
queue_paused: queuePaused
|
||||
},
|
||||
by_type: byType,
|
||||
top_priority: topPriority
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('[JobQueue] Error getting stats:', error);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/job-queue/:id - Get single job
|
||||
*/
|
||||
router.get('/:id', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
|
||||
const { rows } = await pool.query(`
|
||||
SELECT
|
||||
j.*,
|
||||
d.name as dispensary_name,
|
||||
d.city,
|
||||
d.state,
|
||||
d.menu_url
|
||||
FROM dispensary_crawl_jobs j
|
||||
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
|
||||
WHERE j.id = $1
|
||||
`, [id]);
|
||||
|
||||
if (rows.length === 0) {
|
||||
return res.status(404).json({ success: false, error: 'Job not found' });
|
||||
}
|
||||
|
||||
res.json({ success: true, job: rows[0] });
|
||||
} catch (error: any) {
|
||||
console.error('[JobQueue] Error getting job:', error);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* PUT /api/job-queue/:id/priority - Update job priority
|
||||
*/
|
||||
router.put('/:id/priority', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
const { priority } = req.body;
|
||||
|
||||
if (typeof priority !== 'number' || priority < 0 || priority > 100) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: 'Priority must be a number between 0 and 100'
|
||||
});
|
||||
}
|
||||
|
||||
const { rows } = await pool.query(`
|
||||
UPDATE dispensary_crawl_jobs
|
||||
SET priority = $1, updated_at = NOW()
|
||||
WHERE id = $2 AND status = 'pending'
|
||||
RETURNING id, priority, status
|
||||
`, [priority, id]);
|
||||
|
||||
if (rows.length === 0) {
|
||||
return res.status(404).json({
|
||||
success: false,
|
||||
error: 'Job not found or not in pending status'
|
||||
});
|
||||
}
|
||||
|
||||
res.json({ success: true, job: rows[0] });
|
||||
} catch (error: any) {
|
||||
console.error('[JobQueue] Error updating priority:', error);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/job-queue/:id/cancel - Cancel a pending job
|
||||
*/
|
||||
router.post('/:id/cancel', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
|
||||
const { rows } = await pool.query(`
|
||||
UPDATE dispensary_crawl_jobs
|
||||
SET status = 'cancelled', completed_at = NOW(), updated_at = NOW()
|
||||
WHERE id = $1 AND status = 'pending'
|
||||
RETURNING id, status
|
||||
`, [id]);
|
||||
|
||||
if (rows.length === 0) {
|
||||
return res.status(404).json({
|
||||
success: false,
|
||||
error: 'Job not found or not in pending status'
|
||||
});
|
||||
}
|
||||
|
||||
res.json({ success: true, job: rows[0], message: 'Job cancelled' });
|
||||
} catch (error: any) {
|
||||
console.error('[JobQueue] Error cancelling job:', error);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/job-queue/:id/retry - Retry a failed job
|
||||
*/
|
||||
router.post('/:id/retry', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
const { priority } = req.body;
|
||||
|
||||
const { rows } = await pool.query(`
|
||||
UPDATE dispensary_crawl_jobs
|
||||
SET
|
||||
status = 'pending',
|
||||
priority = COALESCE($2, priority),
|
||||
error_message = NULL,
|
||||
started_at = NULL,
|
||||
completed_at = NULL,
|
||||
duration_ms = NULL,
|
||||
worker_id = NULL,
|
||||
locked_by = NULL,
|
||||
locked_at = NULL,
|
||||
retry_count = retry_count + 1,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1 AND status IN ('failed', 'cancelled')
|
||||
RETURNING id, status, priority, retry_count
|
||||
`, [id, priority]);
|
||||
|
||||
if (rows.length === 0) {
|
||||
return res.status(404).json({
|
||||
success: false,
|
||||
error: 'Job not found or not in failed/cancelled status'
|
||||
});
|
||||
}
|
||||
|
||||
res.json({ success: true, job: rows[0], message: 'Job queued for retry' });
|
||||
} catch (error: any) {
|
||||
console.error('[JobQueue] Error retrying job:', error);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/job-queue/bulk-priority - Bulk update priorities
|
||||
*/
|
||||
router.post('/bulk-priority', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { jobs } = req.body; // Array of { id, priority }
|
||||
|
||||
if (!Array.isArray(jobs) || jobs.length === 0) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: 'jobs array is required'
|
||||
});
|
||||
}
|
||||
|
||||
const client = await pool.connect();
|
||||
try {
|
||||
await client.query('BEGIN');
|
||||
|
||||
let updated = 0;
|
||||
for (const job of jobs) {
|
||||
if (typeof job.id === 'number' && typeof job.priority === 'number') {
|
||||
const result = await client.query(`
|
||||
UPDATE dispensary_crawl_jobs
|
||||
SET priority = $1, updated_at = NOW()
|
||||
WHERE id = $2 AND status = 'pending'
|
||||
`, [job.priority, job.id]);
|
||||
updated += result.rowCount || 0;
|
||||
}
|
||||
}
|
||||
|
||||
await client.query('COMMIT');
|
||||
res.json({ success: true, updated, message: `Updated ${updated} jobs` });
|
||||
} catch (err) {
|
||||
await client.query('ROLLBACK');
|
||||
throw err;
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.error('[JobQueue] Error bulk updating priorities:', error);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/job-queue/enqueue - Add a new job to the queue
|
||||
*/
|
||||
router.post('/enqueue', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { dispensary_id, job_type = 'dutchie_product_crawl', priority = 0 } = req.body;
|
||||
|
||||
if (!dispensary_id) {
|
||||
return res.status(400).json({ success: false, error: 'dispensary_id is required' });
|
||||
}
|
||||
|
||||
// Check if job already pending for this dispensary
|
||||
const existing = await pool.query(`
|
||||
SELECT id FROM dispensary_crawl_jobs
|
||||
WHERE dispensary_id = $1 AND job_type = $2 AND status = 'pending'
|
||||
`, [dispensary_id, job_type]);
|
||||
|
||||
if (existing.rows.length > 0) {
|
||||
// Update priority if higher
|
||||
await pool.query(`
|
||||
UPDATE dispensary_crawl_jobs
|
||||
SET priority = GREATEST(priority, $1), updated_at = NOW()
|
||||
WHERE id = $2
|
||||
`, [priority, existing.rows[0].id]);
|
||||
|
||||
return res.json({
|
||||
success: true,
|
||||
job_id: existing.rows[0].id,
|
||||
message: 'Job already queued, priority updated'
|
||||
});
|
||||
}
|
||||
|
||||
const { rows } = await pool.query(`
|
||||
INSERT INTO dispensary_crawl_jobs (dispensary_id, job_type, priority, trigger_type)
|
||||
VALUES ($1, $2, $3, 'manual')
|
||||
RETURNING id
|
||||
`, [dispensary_id, job_type, priority]);
|
||||
|
||||
res.json({ success: true, job_id: rows[0].id, message: 'Job enqueued' });
|
||||
} catch (error: any) {
|
||||
console.error('[JobQueue] Error enqueuing job:', error);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/job-queue/pause - Pause queue processing
|
||||
*/
|
||||
router.post('/pause', async (_req: Request, res: Response) => {
|
||||
queuePaused = true;
|
||||
res.json({ success: true, queue_paused: true, message: 'Queue paused' });
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/job-queue/resume - Resume queue processing
|
||||
*/
|
||||
router.post('/resume', async (_req: Request, res: Response) => {
|
||||
queuePaused = false;
|
||||
res.json({ success: true, queue_paused: false, message: 'Queue resumed' });
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/job-queue/paused - Check if queue is paused
|
||||
*/
|
||||
router.get('/paused', async (_req: Request, res: Response) => {
|
||||
res.json({ success: true, queue_paused: queuePaused });
|
||||
});
|
||||
|
||||
export default router;
|
||||
export { queuePaused };
|
||||
Reference in New Issue
Block a user