feat: SEO template library, discovery pipeline, and orchestrator enhancements

## SEO Template Library
- Add complete template library with 7 page types (state, city, category, brand, product, search, regeneration)
- Add Template Library tab in SEO Orchestrator with accordion-based editors
- Add template preview, validation, and variable injection engine
- Add API endpoints: /api/seo/templates, preview, validate, generate, regenerate

## Discovery Pipeline
- Add promotion.ts for discovery location validation and promotion
- Add discover-all-states.ts script for multi-state discovery
- Add promotion log migration (067)
- Enhance discovery routes and types

## Orchestrator & Admin
- Add crawl_enabled filter to stores page
- Add API permissions page
- Add job queue management
- Add price analytics routes
- Add markets and intelligence routes
- Enhance dashboard and worker monitoring

## Infrastructure
- Add migrations for worker definitions, SEO settings, field alignment
- Add canonical pipeline for scraper v2
- Update hydration and sync orchestrator
- Enhance multi-state query service

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-09 00:05:34 -07:00
parent 9711d594db
commit 2f483b3084
83 changed files with 16700 additions and 1277 deletions

View File

@@ -0,0 +1,467 @@
/**
* Job Queue Management API Routes
*
* Endpoints for viewing and managing the crawl job queue:
* GET /api/job-queue - List all jobs (with filters)
* GET /api/job-queue/stats - Queue statistics
* GET /api/job-queue/:id - Get single job details
* PUT /api/job-queue/:id/priority - Update job priority
* POST /api/job-queue/:id/cancel - Cancel a pending job
* POST /api/job-queue/:id/retry - Retry a failed job
* POST /api/job-queue/bulk-priority - Bulk update priorities
* POST /api/job-queue/pause - Pause queue processing
* POST /api/job-queue/resume - Resume queue processing
*/
import { Router, Request, Response } from 'express';
import { pool } from '../db/pool';
const router = Router();
// In-memory queue state (would be in Redis in production)
let queuePaused = false;
/**
* GET /api/job-queue - List jobs with filters
*/
router.get('/', async (req: Request, res: Response) => {
try {
const {
status = 'pending',
limit = '50',
offset = '0',
job_type,
dispensary_id,
sort_by = 'priority',
sort_order = 'desc'
} = req.query;
let query = `
SELECT
j.id,
j.dispensary_id,
d.name as dispensary_name,
d.city,
d.state,
j.job_type,
j.trigger_type,
j.priority,
j.status,
j.scheduled_at,
j.started_at,
j.completed_at,
j.duration_ms,
j.products_found,
j.error_message,
j.retry_count,
j.max_retries,
j.worker_id,
j.locked_by,
j.created_at
FROM dispensary_crawl_jobs j
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
WHERE 1=1
`;
const params: any[] = [];
let paramIndex = 1;
if (status && status !== 'all') {
params.push(status);
query += ` AND j.status = $${paramIndex++}`;
}
if (job_type) {
params.push(job_type);
query += ` AND j.job_type = $${paramIndex++}`;
}
if (dispensary_id) {
params.push(dispensary_id);
query += ` AND j.dispensary_id = $${paramIndex++}`;
}
// Sorting
const validSortColumns = ['priority', 'created_at', 'scheduled_at', 'dispensary_name'];
const sortCol = validSortColumns.includes(sort_by as string) ? sort_by : 'priority';
const sortDir = sort_order === 'asc' ? 'ASC' : 'DESC';
if (sortCol === 'dispensary_name') {
query += ` ORDER BY d.name ${sortDir} NULLS LAST`;
} else {
query += ` ORDER BY j.${sortCol} ${sortDir} NULLS LAST`;
}
// Add secondary sort by created_at for consistent ordering
if (sortCol !== 'created_at') {
query += `, j.created_at ASC`;
}
params.push(parseInt(limit as string));
query += ` LIMIT $${paramIndex++}`;
params.push(parseInt(offset as string));
query += ` OFFSET $${paramIndex++}`;
const { rows } = await pool.query(query, params);
// Get total count for pagination
let countQuery = `
SELECT COUNT(*) as total
FROM dispensary_crawl_jobs j
WHERE 1=1
`;
const countParams: any[] = [];
let countParamIndex = 1;
if (status && status !== 'all') {
countParams.push(status);
countQuery += ` AND j.status = $${countParamIndex++}`;
}
if (job_type) {
countParams.push(job_type);
countQuery += ` AND j.job_type = $${countParamIndex++}`;
}
if (dispensary_id) {
countParams.push(dispensary_id);
countQuery += ` AND j.dispensary_id = $${countParamIndex++}`;
}
const countResult = await pool.query(countQuery, countParams);
const total = parseInt(countResult.rows[0].total);
res.json({
success: true,
jobs: rows,
total,
limit: parseInt(limit as string),
offset: parseInt(offset as string),
queue_paused: queuePaused
});
} catch (error: any) {
console.error('[JobQueue] Error listing jobs:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* GET /api/job-queue/stats - Queue statistics
*/
router.get('/stats', async (_req: Request, res: Response) => {
try {
const { rows } = await pool.query(`
SELECT
COUNT(*) FILTER (WHERE status = 'pending') as pending_count,
COUNT(*) FILTER (WHERE status = 'running') as running_count,
COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours') as completed_24h,
COUNT(*) FILTER (WHERE status = 'failed' AND completed_at > NOW() - INTERVAL '24 hours') as failed_24h,
COUNT(*) FILTER (WHERE status = 'cancelled') as cancelled_count,
AVG(duration_ms) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours') as avg_duration_ms,
MAX(priority) FILTER (WHERE status = 'pending') as max_priority,
MIN(created_at) FILTER (WHERE status = 'pending') as oldest_pending
FROM dispensary_crawl_jobs
`);
const stats = rows[0];
// Get jobs by type
const { rows: byType } = await pool.query(`
SELECT job_type, COUNT(*) as count
FROM dispensary_crawl_jobs
WHERE status = 'pending'
GROUP BY job_type
ORDER BY count DESC
`);
// Get top priority jobs
const { rows: topPriority } = await pool.query(`
SELECT
j.id,
j.dispensary_id,
d.name as dispensary_name,
j.job_type,
j.priority,
j.created_at
FROM dispensary_crawl_jobs j
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
WHERE j.status = 'pending'
ORDER BY j.priority DESC, j.created_at ASC
LIMIT 5
`);
// Estimate wait time based on avg processing rate
const pendingCount = parseInt(stats.pending_count) || 0;
const avgDuration = parseFloat(stats.avg_duration_ms) || 30000; // default 30s
const runningCount = parseInt(stats.running_count) || 1;
const estimatedWaitMs = (pendingCount * avgDuration) / Math.max(runningCount, 1);
res.json({
success: true,
stats: {
pending: parseInt(stats.pending_count) || 0,
running: parseInt(stats.running_count) || 0,
completed_24h: parseInt(stats.completed_24h) || 0,
failed_24h: parseInt(stats.failed_24h) || 0,
cancelled: parseInt(stats.cancelled_count) || 0,
avg_duration_ms: Math.round(parseFloat(stats.avg_duration_ms)) || null,
max_priority: parseInt(stats.max_priority) || 0,
oldest_pending: stats.oldest_pending,
estimated_wait_ms: Math.round(estimatedWaitMs),
queue_paused: queuePaused
},
by_type: byType,
top_priority: topPriority
});
} catch (error: any) {
console.error('[JobQueue] Error getting stats:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* GET /api/job-queue/:id - Get single job
*/
router.get('/:id', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { rows } = await pool.query(`
SELECT
j.*,
d.name as dispensary_name,
d.city,
d.state,
d.menu_url
FROM dispensary_crawl_jobs j
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
WHERE j.id = $1
`, [id]);
if (rows.length === 0) {
return res.status(404).json({ success: false, error: 'Job not found' });
}
res.json({ success: true, job: rows[0] });
} catch (error: any) {
console.error('[JobQueue] Error getting job:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* PUT /api/job-queue/:id/priority - Update job priority
*/
router.put('/:id/priority', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { priority } = req.body;
if (typeof priority !== 'number' || priority < 0 || priority > 100) {
return res.status(400).json({
success: false,
error: 'Priority must be a number between 0 and 100'
});
}
const { rows } = await pool.query(`
UPDATE dispensary_crawl_jobs
SET priority = $1, updated_at = NOW()
WHERE id = $2 AND status = 'pending'
RETURNING id, priority, status
`, [priority, id]);
if (rows.length === 0) {
return res.status(404).json({
success: false,
error: 'Job not found or not in pending status'
});
}
res.json({ success: true, job: rows[0] });
} catch (error: any) {
console.error('[JobQueue] Error updating priority:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/job-queue/:id/cancel - Cancel a pending job
*/
router.post('/:id/cancel', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { rows } = await pool.query(`
UPDATE dispensary_crawl_jobs
SET status = 'cancelled', completed_at = NOW(), updated_at = NOW()
WHERE id = $1 AND status = 'pending'
RETURNING id, status
`, [id]);
if (rows.length === 0) {
return res.status(404).json({
success: false,
error: 'Job not found or not in pending status'
});
}
res.json({ success: true, job: rows[0], message: 'Job cancelled' });
} catch (error: any) {
console.error('[JobQueue] Error cancelling job:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/job-queue/:id/retry - Retry a failed job
*/
router.post('/:id/retry', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { priority } = req.body;
const { rows } = await pool.query(`
UPDATE dispensary_crawl_jobs
SET
status = 'pending',
priority = COALESCE($2, priority),
error_message = NULL,
started_at = NULL,
completed_at = NULL,
duration_ms = NULL,
worker_id = NULL,
locked_by = NULL,
locked_at = NULL,
retry_count = retry_count + 1,
updated_at = NOW()
WHERE id = $1 AND status IN ('failed', 'cancelled')
RETURNING id, status, priority, retry_count
`, [id, priority]);
if (rows.length === 0) {
return res.status(404).json({
success: false,
error: 'Job not found or not in failed/cancelled status'
});
}
res.json({ success: true, job: rows[0], message: 'Job queued for retry' });
} catch (error: any) {
console.error('[JobQueue] Error retrying job:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/job-queue/bulk-priority - Bulk update priorities
*/
router.post('/bulk-priority', async (req: Request, res: Response) => {
try {
const { jobs } = req.body; // Array of { id, priority }
if (!Array.isArray(jobs) || jobs.length === 0) {
return res.status(400).json({
success: false,
error: 'jobs array is required'
});
}
const client = await pool.connect();
try {
await client.query('BEGIN');
let updated = 0;
for (const job of jobs) {
if (typeof job.id === 'number' && typeof job.priority === 'number') {
const result = await client.query(`
UPDATE dispensary_crawl_jobs
SET priority = $1, updated_at = NOW()
WHERE id = $2 AND status = 'pending'
`, [job.priority, job.id]);
updated += result.rowCount || 0;
}
}
await client.query('COMMIT');
res.json({ success: true, updated, message: `Updated ${updated} jobs` });
} catch (err) {
await client.query('ROLLBACK');
throw err;
} finally {
client.release();
}
} catch (error: any) {
console.error('[JobQueue] Error bulk updating priorities:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/job-queue/enqueue - Add a new job to the queue
*/
router.post('/enqueue', async (req: Request, res: Response) => {
try {
const { dispensary_id, job_type = 'dutchie_product_crawl', priority = 0 } = req.body;
if (!dispensary_id) {
return res.status(400).json({ success: false, error: 'dispensary_id is required' });
}
// Check if job already pending for this dispensary
const existing = await pool.query(`
SELECT id FROM dispensary_crawl_jobs
WHERE dispensary_id = $1 AND job_type = $2 AND status = 'pending'
`, [dispensary_id, job_type]);
if (existing.rows.length > 0) {
// Update priority if higher
await pool.query(`
UPDATE dispensary_crawl_jobs
SET priority = GREATEST(priority, $1), updated_at = NOW()
WHERE id = $2
`, [priority, existing.rows[0].id]);
return res.json({
success: true,
job_id: existing.rows[0].id,
message: 'Job already queued, priority updated'
});
}
const { rows } = await pool.query(`
INSERT INTO dispensary_crawl_jobs (dispensary_id, job_type, priority, trigger_type)
VALUES ($1, $2, $3, 'manual')
RETURNING id
`, [dispensary_id, job_type, priority]);
res.json({ success: true, job_id: rows[0].id, message: 'Job enqueued' });
} catch (error: any) {
console.error('[JobQueue] Error enqueuing job:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/job-queue/pause - Pause queue processing
*/
router.post('/pause', async (_req: Request, res: Response) => {
queuePaused = true;
res.json({ success: true, queue_paused: true, message: 'Queue paused' });
});
/**
* POST /api/job-queue/resume - Resume queue processing
*/
router.post('/resume', async (_req: Request, res: Response) => {
queuePaused = false;
res.json({ success: true, queue_paused: false, message: 'Queue resumed' });
});
/**
* GET /api/job-queue/paused - Check if queue is paused
*/
router.get('/paused', async (_req: Request, res: Response) => {
res.json({ success: true, queue_paused: queuePaused });
});
export default router;
export { queuePaused };