Major changes: - Split crawl into payload_fetch (API → disk) and product_refresh (disk → DB) - Add task chaining: store_discovery → product_discovery → payload_fetch → product_refresh - Add payload storage utilities for gzipped JSON on filesystem - Add /api/payloads endpoints for payload access and diffing - Add DB-driven TaskScheduler with schedule persistence - Track newDispensaryIds through discovery promotion for chaining - Add stealth improvements: HTTP fingerprinting, proxy rotation enhancements - Add Workers dashboard K8s scaling controls New files: - src/tasks/handlers/payload-fetch.ts - Fetches from API, saves to disk - src/services/task-scheduler.ts - DB-driven schedule management - src/utils/payload-storage.ts - Payload save/load utilities - src/routes/payloads.ts - Payload API endpoints - src/services/http-fingerprint.ts - Browser fingerprint generation - docs/TASK_WORKFLOW_2024-12-10.md - Complete workflow documentation Migrations: - 078: Proxy consecutive 403 tracking - 079: task_schedules table - 080: raw_crawl_payloads table - 081: payload column and last_fetch_at 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
833 lines
25 KiB
TypeScript
833 lines
25 KiB
TypeScript
/**
|
|
* Job Queue Management API Routes
|
|
*
|
|
* Endpoints for viewing and managing the crawl job queue:
|
|
* GET /api/job-queue - List all jobs (with filters)
|
|
* GET /api/job-queue/stats - Queue statistics
|
|
* GET /api/job-queue/:id - Get single job details
|
|
* PUT /api/job-queue/:id/priority - Update job priority
|
|
* POST /api/job-queue/:id/cancel - Cancel a pending job
|
|
* POST /api/job-queue/:id/retry - Retry a failed job
|
|
* POST /api/job-queue/bulk-priority - Bulk update priorities
|
|
* POST /api/job-queue/pause - Pause queue processing
|
|
* POST /api/job-queue/resume - Resume queue processing
|
|
*/
|
|
|
|
import { Router, Request, Response } from 'express';
|
|
import { pool } from '../db/pool';
|
|
|
|
const router = Router();
|
|
|
|
// In-memory queue state (would be in Redis in production)
|
|
let queuePaused = false;
|
|
|
|
/**
|
|
* GET /api/job-queue - List jobs with filters
|
|
*/
|
|
router.get('/', async (req: Request, res: Response) => {
|
|
try {
|
|
const {
|
|
status = 'pending',
|
|
limit = '50',
|
|
offset = '0',
|
|
job_type,
|
|
dispensary_id,
|
|
sort_by = 'priority',
|
|
sort_order = 'desc'
|
|
} = req.query;
|
|
|
|
let query = `
|
|
SELECT
|
|
j.id,
|
|
j.dispensary_id,
|
|
d.name as dispensary_name,
|
|
d.city,
|
|
d.state,
|
|
j.job_type,
|
|
j.trigger_type,
|
|
j.priority,
|
|
j.status,
|
|
j.scheduled_at,
|
|
j.started_at,
|
|
j.completed_at,
|
|
j.duration_ms,
|
|
j.products_found,
|
|
j.error_message,
|
|
j.retry_count,
|
|
j.max_retries,
|
|
j.worker_id,
|
|
j.locked_by,
|
|
j.created_at
|
|
FROM dispensary_crawl_jobs j
|
|
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
|
|
WHERE 1=1
|
|
`;
|
|
const params: any[] = [];
|
|
let paramIndex = 1;
|
|
|
|
if (status && status !== 'all') {
|
|
params.push(status);
|
|
query += ` AND j.status = $${paramIndex++}`;
|
|
}
|
|
|
|
if (job_type) {
|
|
params.push(job_type);
|
|
query += ` AND j.job_type = $${paramIndex++}`;
|
|
}
|
|
|
|
if (dispensary_id) {
|
|
params.push(dispensary_id);
|
|
query += ` AND j.dispensary_id = $${paramIndex++}`;
|
|
}
|
|
|
|
// Sorting
|
|
const validSortColumns = ['priority', 'created_at', 'scheduled_at', 'dispensary_name'];
|
|
const sortCol = validSortColumns.includes(sort_by as string) ? sort_by : 'priority';
|
|
const sortDir = sort_order === 'asc' ? 'ASC' : 'DESC';
|
|
|
|
if (sortCol === 'dispensary_name') {
|
|
query += ` ORDER BY d.name ${sortDir} NULLS LAST`;
|
|
} else {
|
|
query += ` ORDER BY j.${sortCol} ${sortDir} NULLS LAST`;
|
|
}
|
|
|
|
// Add secondary sort by created_at for consistent ordering
|
|
if (sortCol !== 'created_at') {
|
|
query += `, j.created_at ASC`;
|
|
}
|
|
|
|
params.push(parseInt(limit as string));
|
|
query += ` LIMIT $${paramIndex++}`;
|
|
|
|
params.push(parseInt(offset as string));
|
|
query += ` OFFSET $${paramIndex++}`;
|
|
|
|
const { rows } = await pool.query(query, params);
|
|
|
|
// Get total count for pagination
|
|
let countQuery = `
|
|
SELECT COUNT(*) as total
|
|
FROM dispensary_crawl_jobs j
|
|
WHERE 1=1
|
|
`;
|
|
const countParams: any[] = [];
|
|
let countParamIndex = 1;
|
|
|
|
if (status && status !== 'all') {
|
|
countParams.push(status);
|
|
countQuery += ` AND j.status = $${countParamIndex++}`;
|
|
}
|
|
if (job_type) {
|
|
countParams.push(job_type);
|
|
countQuery += ` AND j.job_type = $${countParamIndex++}`;
|
|
}
|
|
if (dispensary_id) {
|
|
countParams.push(dispensary_id);
|
|
countQuery += ` AND j.dispensary_id = $${countParamIndex++}`;
|
|
}
|
|
|
|
const countResult = await pool.query(countQuery, countParams);
|
|
const total = parseInt(countResult.rows[0].total);
|
|
|
|
res.json({
|
|
success: true,
|
|
jobs: rows,
|
|
total,
|
|
limit: parseInt(limit as string),
|
|
offset: parseInt(offset as string),
|
|
queue_paused: queuePaused
|
|
});
|
|
} catch (error: any) {
|
|
console.error('[JobQueue] Error listing jobs:', error);
|
|
res.status(500).json({ success: false, error: error.message });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* GET /api/job-queue/available - List dispensaries available for crawling
|
|
* Query: { state_code?: string, limit?: number }
|
|
* NOTE: Must be defined BEFORE /:id route to avoid conflict
|
|
*/
|
|
router.get('/available', async (req: Request, res: Response) => {
|
|
try {
|
|
const { state_code, limit = '100' } = req.query;
|
|
|
|
let query = `
|
|
SELECT
|
|
d.id,
|
|
d.name,
|
|
d.city,
|
|
s.code as state_code,
|
|
d.platform_dispensary_id,
|
|
d.crawl_enabled,
|
|
(SELECT MAX(created_at) FROM dispensary_crawl_jobs WHERE dispensary_id = d.id AND status = 'completed') as last_crawl,
|
|
EXISTS (
|
|
SELECT 1 FROM dispensary_crawl_jobs
|
|
WHERE dispensary_id = d.id AND status IN ('pending', 'running')
|
|
) as has_pending_job
|
|
FROM dispensaries d
|
|
LEFT JOIN states s ON s.id = d.state_id
|
|
WHERE d.crawl_enabled = true
|
|
AND d.platform_dispensary_id IS NOT NULL
|
|
`;
|
|
const params: any[] = [];
|
|
let paramIndex = 1;
|
|
|
|
if (state_code) {
|
|
params.push((state_code as string).toUpperCase());
|
|
query += ` AND s.code = $${paramIndex++}`;
|
|
}
|
|
|
|
query += ` ORDER BY d.name LIMIT $${paramIndex}`;
|
|
params.push(parseInt(limit as string));
|
|
|
|
const { rows } = await pool.query(query, params);
|
|
|
|
// Get counts by state
|
|
const { rows: stateCounts } = await pool.query(`
|
|
SELECT s.code, COUNT(*) as count
|
|
FROM dispensaries d
|
|
JOIN states s ON s.id = d.state_id
|
|
WHERE d.crawl_enabled = true
|
|
AND d.platform_dispensary_id IS NOT NULL
|
|
GROUP BY s.code
|
|
ORDER BY count DESC
|
|
`);
|
|
|
|
res.json({
|
|
success: true,
|
|
dispensaries: rows,
|
|
total: rows.length,
|
|
by_state: stateCounts
|
|
});
|
|
} catch (error: any) {
|
|
console.error('[JobQueue] Error listing available:', error);
|
|
res.status(500).json({ success: false, error: error.message });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* GET /api/job-queue/history - Get recent job history with results
|
|
* Query: { state_code?: string, status?: string, limit?: number, hours?: number }
|
|
* NOTE: Must be defined BEFORE /:id route to avoid conflict
|
|
*/
|
|
router.get('/history', async (req: Request, res: Response) => {
|
|
try {
|
|
const {
|
|
state_code,
|
|
status,
|
|
limit = '50',
|
|
hours = '24'
|
|
} = req.query;
|
|
|
|
let query = `
|
|
SELECT
|
|
j.id,
|
|
j.dispensary_id,
|
|
d.name as dispensary_name,
|
|
s.code as state_code,
|
|
j.job_type,
|
|
j.status,
|
|
j.products_found,
|
|
j.error_message,
|
|
j.started_at,
|
|
j.completed_at,
|
|
j.duration_ms,
|
|
j.created_at
|
|
FROM dispensary_crawl_jobs j
|
|
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
|
|
LEFT JOIN states s ON s.id = d.state_id
|
|
WHERE j.created_at > NOW() - INTERVAL '${parseInt(hours as string)} hours'
|
|
`;
|
|
const params: any[] = [];
|
|
let paramIndex = 1;
|
|
|
|
if (status && status !== 'all') {
|
|
params.push(status);
|
|
query += ` AND j.status = $${paramIndex++}`;
|
|
}
|
|
|
|
if (state_code) {
|
|
params.push((state_code as string).toUpperCase());
|
|
query += ` AND s.code = $${paramIndex++}`;
|
|
}
|
|
|
|
query += ` ORDER BY j.created_at DESC LIMIT $${paramIndex}`;
|
|
params.push(parseInt(limit as string));
|
|
|
|
const { rows } = await pool.query(query, params);
|
|
|
|
// Get summary stats
|
|
const { rows: stats } = await pool.query(`
|
|
SELECT
|
|
COUNT(*) FILTER (WHERE status = 'completed') as completed,
|
|
COUNT(*) FILTER (WHERE status = 'failed') as failed,
|
|
COUNT(*) FILTER (WHERE status = 'running') as running,
|
|
COUNT(*) FILTER (WHERE status = 'pending') as pending,
|
|
SUM(products_found) FILTER (WHERE status = 'completed') as total_products,
|
|
AVG(duration_ms) FILTER (WHERE status = 'completed') as avg_duration_ms
|
|
FROM dispensary_crawl_jobs
|
|
WHERE created_at > NOW() - INTERVAL '${parseInt(hours as string)} hours'
|
|
`);
|
|
|
|
res.json({
|
|
success: true,
|
|
jobs: rows,
|
|
summary: {
|
|
completed: parseInt(stats[0].completed) || 0,
|
|
failed: parseInt(stats[0].failed) || 0,
|
|
running: parseInt(stats[0].running) || 0,
|
|
pending: parseInt(stats[0].pending) || 0,
|
|
total_products: parseInt(stats[0].total_products) || 0,
|
|
avg_duration_ms: Math.round(parseFloat(stats[0].avg_duration_ms)) || null
|
|
},
|
|
hours: parseInt(hours as string)
|
|
});
|
|
} catch (error: any) {
|
|
console.error('[JobQueue] Error getting history:', error);
|
|
res.status(500).json({ success: false, error: error.message });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* GET /api/job-queue/stats - Queue statistics
|
|
*/
|
|
router.get('/stats', async (_req: Request, res: Response) => {
|
|
try {
|
|
const { rows } = await pool.query(`
|
|
SELECT
|
|
COUNT(*) FILTER (WHERE status = 'pending') as pending_count,
|
|
COUNT(*) FILTER (WHERE status = 'running') as running_count,
|
|
COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours') as completed_24h,
|
|
COUNT(*) FILTER (WHERE status = 'failed' AND completed_at > NOW() - INTERVAL '24 hours') as failed_24h,
|
|
COUNT(*) FILTER (WHERE status = 'cancelled') as cancelled_count,
|
|
AVG(duration_ms) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours') as avg_duration_ms,
|
|
MAX(priority) FILTER (WHERE status = 'pending') as max_priority,
|
|
MIN(created_at) FILTER (WHERE status = 'pending') as oldest_pending
|
|
FROM dispensary_crawl_jobs
|
|
`);
|
|
|
|
const stats = rows[0];
|
|
|
|
// Get jobs by type
|
|
const { rows: byType } = await pool.query(`
|
|
SELECT job_type, COUNT(*) as count
|
|
FROM dispensary_crawl_jobs
|
|
WHERE status = 'pending'
|
|
GROUP BY job_type
|
|
ORDER BY count DESC
|
|
`);
|
|
|
|
// Get top priority jobs
|
|
const { rows: topPriority } = await pool.query(`
|
|
SELECT
|
|
j.id,
|
|
j.dispensary_id,
|
|
d.name as dispensary_name,
|
|
j.job_type,
|
|
j.priority,
|
|
j.created_at
|
|
FROM dispensary_crawl_jobs j
|
|
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
|
|
WHERE j.status = 'pending'
|
|
ORDER BY j.priority DESC, j.created_at ASC
|
|
LIMIT 5
|
|
`);
|
|
|
|
// Estimate wait time based on avg processing rate
|
|
const pendingCount = parseInt(stats.pending_count) || 0;
|
|
const avgDuration = parseFloat(stats.avg_duration_ms) || 30000; // default 30s
|
|
const runningCount = parseInt(stats.running_count) || 1;
|
|
const estimatedWaitMs = (pendingCount * avgDuration) / Math.max(runningCount, 1);
|
|
|
|
res.json({
|
|
success: true,
|
|
stats: {
|
|
pending: parseInt(stats.pending_count) || 0,
|
|
running: parseInt(stats.running_count) || 0,
|
|
completed_24h: parseInt(stats.completed_24h) || 0,
|
|
failed_24h: parseInt(stats.failed_24h) || 0,
|
|
cancelled: parseInt(stats.cancelled_count) || 0,
|
|
avg_duration_ms: Math.round(parseFloat(stats.avg_duration_ms)) || null,
|
|
max_priority: parseInt(stats.max_priority) || 0,
|
|
oldest_pending: stats.oldest_pending,
|
|
estimated_wait_ms: Math.round(estimatedWaitMs),
|
|
queue_paused: queuePaused
|
|
},
|
|
by_type: byType,
|
|
top_priority: topPriority
|
|
});
|
|
} catch (error: any) {
|
|
console.error('[JobQueue] Error getting stats:', error);
|
|
res.status(500).json({ success: false, error: error.message });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* GET /api/job-queue/:id - Get single job
|
|
*/
|
|
router.get('/:id', async (req: Request, res: Response) => {
|
|
try {
|
|
const { id } = req.params;
|
|
|
|
const { rows } = await pool.query(`
|
|
SELECT
|
|
j.*,
|
|
d.name as dispensary_name,
|
|
d.city,
|
|
d.state,
|
|
d.menu_url
|
|
FROM dispensary_crawl_jobs j
|
|
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
|
|
WHERE j.id = $1
|
|
`, [id]);
|
|
|
|
if (rows.length === 0) {
|
|
return res.status(404).json({ success: false, error: 'Job not found' });
|
|
}
|
|
|
|
res.json({ success: true, job: rows[0] });
|
|
} catch (error: any) {
|
|
console.error('[JobQueue] Error getting job:', error);
|
|
res.status(500).json({ success: false, error: error.message });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* PUT /api/job-queue/:id/priority - Update job priority
|
|
*/
|
|
router.put('/:id/priority', async (req: Request, res: Response) => {
|
|
try {
|
|
const { id } = req.params;
|
|
const { priority } = req.body;
|
|
|
|
if (typeof priority !== 'number' || priority < 0 || priority > 100) {
|
|
return res.status(400).json({
|
|
success: false,
|
|
error: 'Priority must be a number between 0 and 100'
|
|
});
|
|
}
|
|
|
|
const { rows } = await pool.query(`
|
|
UPDATE dispensary_crawl_jobs
|
|
SET priority = $1, updated_at = NOW()
|
|
WHERE id = $2 AND status = 'pending'
|
|
RETURNING id, priority, status
|
|
`, [priority, id]);
|
|
|
|
if (rows.length === 0) {
|
|
return res.status(404).json({
|
|
success: false,
|
|
error: 'Job not found or not in pending status'
|
|
});
|
|
}
|
|
|
|
res.json({ success: true, job: rows[0] });
|
|
} catch (error: any) {
|
|
console.error('[JobQueue] Error updating priority:', error);
|
|
res.status(500).json({ success: false, error: error.message });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/job-queue/:id/cancel - Cancel a pending job
|
|
*/
|
|
router.post('/:id/cancel', async (req: Request, res: Response) => {
|
|
try {
|
|
const { id } = req.params;
|
|
|
|
const { rows } = await pool.query(`
|
|
UPDATE dispensary_crawl_jobs
|
|
SET status = 'cancelled', completed_at = NOW(), updated_at = NOW()
|
|
WHERE id = $1 AND status = 'pending'
|
|
RETURNING id, status
|
|
`, [id]);
|
|
|
|
if (rows.length === 0) {
|
|
return res.status(404).json({
|
|
success: false,
|
|
error: 'Job not found or not in pending status'
|
|
});
|
|
}
|
|
|
|
res.json({ success: true, job: rows[0], message: 'Job cancelled' });
|
|
} catch (error: any) {
|
|
console.error('[JobQueue] Error cancelling job:', error);
|
|
res.status(500).json({ success: false, error: error.message });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/job-queue/:id/retry - Retry a failed job
|
|
*/
|
|
router.post('/:id/retry', async (req: Request, res: Response) => {
|
|
try {
|
|
const { id } = req.params;
|
|
const { priority } = req.body;
|
|
|
|
const { rows } = await pool.query(`
|
|
UPDATE dispensary_crawl_jobs
|
|
SET
|
|
status = 'pending',
|
|
priority = COALESCE($2, priority),
|
|
error_message = NULL,
|
|
started_at = NULL,
|
|
completed_at = NULL,
|
|
duration_ms = NULL,
|
|
worker_id = NULL,
|
|
locked_by = NULL,
|
|
locked_at = NULL,
|
|
retry_count = retry_count + 1,
|
|
updated_at = NOW()
|
|
WHERE id = $1 AND status IN ('failed', 'cancelled')
|
|
RETURNING id, status, priority, retry_count
|
|
`, [id, priority]);
|
|
|
|
if (rows.length === 0) {
|
|
return res.status(404).json({
|
|
success: false,
|
|
error: 'Job not found or not in failed/cancelled status'
|
|
});
|
|
}
|
|
|
|
res.json({ success: true, job: rows[0], message: 'Job queued for retry' });
|
|
} catch (error: any) {
|
|
console.error('[JobQueue] Error retrying job:', error);
|
|
res.status(500).json({ success: false, error: error.message });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/job-queue/bulk-priority - Bulk update priorities
|
|
*/
|
|
router.post('/bulk-priority', async (req: Request, res: Response) => {
|
|
try {
|
|
const { jobs } = req.body; // Array of { id, priority }
|
|
|
|
if (!Array.isArray(jobs) || jobs.length === 0) {
|
|
return res.status(400).json({
|
|
success: false,
|
|
error: 'jobs array is required'
|
|
});
|
|
}
|
|
|
|
const client = await pool.connect();
|
|
try {
|
|
await client.query('BEGIN');
|
|
|
|
let updated = 0;
|
|
for (const job of jobs) {
|
|
if (typeof job.id === 'number' && typeof job.priority === 'number') {
|
|
const result = await client.query(`
|
|
UPDATE dispensary_crawl_jobs
|
|
SET priority = $1, updated_at = NOW()
|
|
WHERE id = $2 AND status = 'pending'
|
|
`, [job.priority, job.id]);
|
|
updated += result.rowCount || 0;
|
|
}
|
|
}
|
|
|
|
await client.query('COMMIT');
|
|
res.json({ success: true, updated, message: `Updated ${updated} jobs` });
|
|
} catch (err) {
|
|
await client.query('ROLLBACK');
|
|
throw err;
|
|
} finally {
|
|
client.release();
|
|
}
|
|
} catch (error: any) {
|
|
console.error('[JobQueue] Error bulk updating priorities:', error);
|
|
res.status(500).json({ success: false, error: error.message });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/job-queue/enqueue - Add a new job to the queue
|
|
*
|
|
* 2024-12-10: Rewired to use worker_tasks via taskService.
|
|
* Legacy dispensary_crawl_jobs code commented out below.
|
|
*/
|
|
router.post('/enqueue', async (req: Request, res: Response) => {
|
|
try {
|
|
const { dispensary_id, job_type = 'dutchie_product_crawl', priority = 0 } = req.body;
|
|
|
|
if (!dispensary_id) {
|
|
return res.status(400).json({ success: false, error: 'dispensary_id is required' });
|
|
}
|
|
|
|
// 2024-12-10: Map legacy job_type to new task role
|
|
const roleMap: Record<string, string> = {
|
|
'dutchie_product_crawl': 'product_refresh',
|
|
'menu_detection': 'entry_point_discovery',
|
|
'menu_detection_single': 'entry_point_discovery',
|
|
'product_discovery': 'product_discovery',
|
|
'store_discovery': 'store_discovery',
|
|
};
|
|
const role = roleMap[job_type] || 'product_refresh';
|
|
|
|
// 2024-12-10: Use taskService to create task in worker_tasks table
|
|
const { taskService } = await import('../tasks/task-service');
|
|
|
|
// Check if task already pending for this dispensary
|
|
const existingTasks = await taskService.listTasks({
|
|
dispensary_id,
|
|
role: role as any,
|
|
status: ['pending', 'claimed', 'running'],
|
|
limit: 1,
|
|
});
|
|
|
|
if (existingTasks.length > 0) {
|
|
return res.json({
|
|
success: true,
|
|
task_id: existingTasks[0].id,
|
|
message: 'Task already queued'
|
|
});
|
|
}
|
|
|
|
const task = await taskService.createTask({
|
|
role: role as any,
|
|
dispensary_id,
|
|
priority,
|
|
});
|
|
|
|
res.json({ success: true, task_id: task.id, message: 'Task enqueued' });
|
|
} catch (error: any) {
|
|
console.error('[JobQueue] Error enqueuing task:', error);
|
|
res.status(500).json({ success: false, error: error.message });
|
|
}
|
|
});
|
|
|
|
/*
|
|
* LEGACY CODE - 2024-12-10: Commented out, was using orphaned dispensary_crawl_jobs table
|
|
*
|
|
router.post('/enqueue', async (req: Request, res: Response) => {
|
|
try {
|
|
const { dispensary_id, job_type = 'dutchie_product_crawl', priority = 0 } = req.body;
|
|
|
|
if (!dispensary_id) {
|
|
return res.status(400).json({ success: false, error: 'dispensary_id is required' });
|
|
}
|
|
|
|
// Check if job already pending for this dispensary
|
|
const existing = await pool.query(`
|
|
SELECT id FROM dispensary_crawl_jobs
|
|
WHERE dispensary_id = $1 AND job_type = $2 AND status = 'pending'
|
|
`, [dispensary_id, job_type]);
|
|
|
|
if (existing.rows.length > 0) {
|
|
// Update priority if higher
|
|
await pool.query(`
|
|
UPDATE dispensary_crawl_jobs
|
|
SET priority = GREATEST(priority, $1), updated_at = NOW()
|
|
WHERE id = $2
|
|
`, [priority, existing.rows[0].id]);
|
|
|
|
return res.json({
|
|
success: true,
|
|
job_id: existing.rows[0].id,
|
|
message: 'Job already queued, priority updated'
|
|
});
|
|
}
|
|
|
|
const { rows } = await pool.query(`
|
|
INSERT INTO dispensary_crawl_jobs (dispensary_id, job_type, priority, trigger_type)
|
|
VALUES ($1, $2, $3, 'manual')
|
|
RETURNING id
|
|
`, [dispensary_id, job_type, priority]);
|
|
|
|
res.json({ success: true, job_id: rows[0].id, message: 'Job enqueued' });
|
|
} catch (error: any) {
|
|
console.error('[JobQueue] Error enqueuing job:', error);
|
|
res.status(500).json({ success: false, error: error.message });
|
|
}
|
|
});
|
|
*/
|
|
|
|
/**
|
|
* POST /api/job-queue/pause - Pause queue processing
|
|
*/
|
|
router.post('/pause', async (_req: Request, res: Response) => {
|
|
queuePaused = true;
|
|
res.json({ success: true, queue_paused: true, message: 'Queue paused' });
|
|
});
|
|
|
|
/**
|
|
* POST /api/job-queue/resume - Resume queue processing
|
|
*/
|
|
router.post('/resume', async (_req: Request, res: Response) => {
|
|
queuePaused = false;
|
|
res.json({ success: true, queue_paused: false, message: 'Queue resumed' });
|
|
});
|
|
|
|
/**
|
|
* GET /api/job-queue/paused - Check if queue is paused
|
|
*/
|
|
router.get('/paused', async (_req: Request, res: Response) => {
|
|
res.json({ success: true, queue_paused: queuePaused });
|
|
});
|
|
|
|
/**
|
|
* POST /api/job-queue/enqueue-batch - Queue multiple dispensaries at once
|
|
* Body: { dispensary_ids: number[], job_type?: string, priority?: number }
|
|
*
|
|
* 2024-12-10: Rewired to use worker_tasks via taskService.
|
|
*/
|
|
router.post('/enqueue-batch', async (req: Request, res: Response) => {
|
|
try {
|
|
const { dispensary_ids, job_type = 'dutchie_product_crawl', priority = 0 } = req.body;
|
|
|
|
if (!Array.isArray(dispensary_ids) || dispensary_ids.length === 0) {
|
|
return res.status(400).json({ success: false, error: 'dispensary_ids array is required' });
|
|
}
|
|
|
|
if (dispensary_ids.length > 500) {
|
|
return res.status(400).json({ success: false, error: 'Maximum 500 dispensaries per batch' });
|
|
}
|
|
|
|
// 2024-12-10: Map legacy job_type to new task role
|
|
const roleMap: Record<string, string> = {
|
|
'dutchie_product_crawl': 'product_refresh',
|
|
'menu_detection': 'entry_point_discovery',
|
|
'product_discovery': 'product_discovery',
|
|
};
|
|
const role = roleMap[job_type] || 'product_refresh';
|
|
|
|
// 2024-12-10: Use taskService to create tasks in worker_tasks table
|
|
const { taskService } = await import('../tasks/task-service');
|
|
|
|
const tasks = dispensary_ids.map(dispensary_id => ({
|
|
role: role as any,
|
|
dispensary_id,
|
|
priority,
|
|
}));
|
|
|
|
const createdCount = await taskService.createTasks(tasks);
|
|
|
|
res.json({
|
|
success: true,
|
|
queued: createdCount,
|
|
requested: dispensary_ids.length,
|
|
message: `Queued ${createdCount} of ${dispensary_ids.length} dispensaries`
|
|
});
|
|
} catch (error: any) {
|
|
console.error('[JobQueue] Error batch enqueuing:', error);
|
|
res.status(500).json({ success: false, error: error.message });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/job-queue/enqueue-state - Queue all crawl-enabled dispensaries for a state
|
|
* Body: { state_code: string, job_type?: string, priority?: number, limit?: number }
|
|
*
|
|
* 2024-12-10: Rewired to use worker_tasks via taskService.
|
|
*/
|
|
router.post('/enqueue-state', async (req: Request, res: Response) => {
|
|
try {
|
|
const { state_code, job_type = 'dutchie_product_crawl', priority = 0, limit = 200 } = req.body;
|
|
|
|
if (!state_code) {
|
|
return res.status(400).json({ success: false, error: 'state_code is required (e.g., "AZ")' });
|
|
}
|
|
|
|
// 2024-12-10: Map legacy job_type to new task role
|
|
const roleMap: Record<string, string> = {
|
|
'dutchie_product_crawl': 'product_refresh',
|
|
'menu_detection': 'entry_point_discovery',
|
|
'product_discovery': 'product_discovery',
|
|
};
|
|
const role = roleMap[job_type] || 'product_refresh';
|
|
|
|
// Get dispensary IDs for the state
|
|
const dispensaryResult = await pool.query(`
|
|
SELECT d.id
|
|
FROM dispensaries d
|
|
JOIN states s ON s.id = d.state_id
|
|
WHERE s.code = $1
|
|
AND d.crawl_enabled = true
|
|
AND d.platform_dispensary_id IS NOT NULL
|
|
LIMIT $2
|
|
`, [state_code.toUpperCase(), limit]);
|
|
|
|
const dispensary_ids = dispensaryResult.rows.map((r: any) => r.id);
|
|
|
|
// 2024-12-10: Use taskService to create tasks in worker_tasks table
|
|
const { taskService } = await import('../tasks/task-service');
|
|
|
|
const tasks = dispensary_ids.map((dispensary_id: number) => ({
|
|
role: role as any,
|
|
dispensary_id,
|
|
priority,
|
|
}));
|
|
|
|
const createdCount = await taskService.createTasks(tasks);
|
|
|
|
// Get total available count
|
|
const countResult = await pool.query(`
|
|
SELECT COUNT(*) as total
|
|
FROM dispensaries d
|
|
JOIN states s ON s.id = d.state_id
|
|
WHERE s.code = $1
|
|
AND d.crawl_enabled = true
|
|
AND d.platform_dispensary_id IS NOT NULL
|
|
`, [state_code.toUpperCase()]);
|
|
|
|
res.json({
|
|
success: true,
|
|
queued: createdCount,
|
|
total_available: parseInt(countResult.rows[0].total),
|
|
state: state_code.toUpperCase(),
|
|
role,
|
|
message: `Queued ${createdCount} dispensaries for ${state_code.toUpperCase()}`
|
|
});
|
|
} catch (error: any) {
|
|
console.error('[JobQueue] Error enqueuing state:', error);
|
|
res.status(500).json({ success: false, error: error.message });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/job-queue/clear-pending - Clear all pending jobs (optionally filtered)
|
|
* Body: { state_code?: string, job_type?: string }
|
|
*/
|
|
router.post('/clear-pending', async (req: Request, res: Response) => {
|
|
try {
|
|
const { state_code, job_type } = req.body;
|
|
|
|
let query = `
|
|
UPDATE dispensary_crawl_jobs
|
|
SET status = 'cancelled', completed_at = NOW(), updated_at = NOW()
|
|
WHERE status = 'pending'
|
|
`;
|
|
const params: any[] = [];
|
|
let paramIndex = 1;
|
|
|
|
if (job_type) {
|
|
params.push(job_type);
|
|
query += ` AND job_type = $${paramIndex++}`;
|
|
}
|
|
|
|
if (state_code) {
|
|
params.push((state_code as string).toUpperCase());
|
|
query += ` AND dispensary_id IN (
|
|
SELECT d.id FROM dispensaries d
|
|
JOIN states s ON s.id = d.state_id
|
|
WHERE s.code = $${paramIndex++}
|
|
)`;
|
|
}
|
|
|
|
const result = await pool.query(query, params);
|
|
|
|
res.json({
|
|
success: true,
|
|
cleared: result.rowCount,
|
|
message: `Cancelled ${result.rowCount} pending jobs`
|
|
});
|
|
} catch (error: any) {
|
|
console.error('[JobQueue] Error clearing pending:', error);
|
|
res.status(500).json({ success: false, error: error.message });
|
|
}
|
|
});
|
|
|
|
export default router;
|
|
export { queuePaused };
|