Files
cannaiq/backend/src/routes/tasks.ts
Kelly 832ef1cf83 feat(scheduler): Immutable schedules and HTTP-only pipeline
## Changes
- **Migration 089**: Add is_immutable and method columns to task_schedules
  - Per-state product_discovery schedules (4h default)
  - Store discovery weekly (168h)
  - All schedules use HTTP transport (Puppeteer/browser)
- **Task Scheduler**: HTTP-only product discovery with per-state scheduling
  - Each state has its own immutable schedule
  - Schedules can be edited (interval/priority) but not deleted
- **TasksDashboard UI**: Full immutability support
  - Lock icon for immutable schedules
  - State and Method columns in schedules table
  - Disabled delete for immutable, restricted edit fields
- **Store Discovery HTTP**: Auto-queue product_discovery for new stores
- **Migration 088**: Discovery payloads storage schema

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 09:24:08 -07:00

1233 lines
37 KiB
TypeScript

/**
* Task Queue API Routes
*
* Endpoints for managing worker tasks, viewing capacity metrics,
* and generating batch tasks.
*
* SCHEDULE MANAGEMENT (added 2025-12-12):
* This file now contains the canonical schedule management endpoints.
* The job_schedules table has been deprecated and all schedule management
* is now consolidated into task_schedules:
*
* Schedule endpoints:
* GET /api/tasks/schedules - List all schedules
* POST /api/tasks/schedules - Create new schedule
* GET /api/tasks/schedules/:id - Get schedule by ID
* PUT /api/tasks/schedules/:id - Update schedule
* DELETE /api/tasks/schedules/:id - Delete schedule
* DELETE /api/tasks/schedules - Bulk delete schedules
* POST /api/tasks/schedules/:id/run-now - Trigger schedule immediately
* POST /api/tasks/schedules/:id/toggle - Toggle schedule enabled/disabled
*
* Note: Schedule routes are defined BEFORE /:id to avoid route conflicts
* (Express matches routes in order, and "schedules" would match /:id otherwise)
*/
import { Router, Request, Response } from 'express';
import {
taskService,
TaskRole,
TaskStatus,
TaskFilter,
} from '../tasks/task-service';
import { pool } from '../db/pool';
import {
isTaskPoolPaused,
pauseTaskPool,
resumeTaskPool,
getTaskPoolStatus,
} from '../tasks/task-pool-state';
const router = Router();
/**
* GET /api/tasks
* List tasks with optional filters
*
* Query params:
* - role: Filter by role
* - status: Filter by status (comma-separated for multiple)
* - dispensary_id: Filter by dispensary
* - worker_id: Filter by worker
* - limit: Max results (default 100)
* - offset: Pagination offset
*/
router.get('/', async (req: Request, res: Response) => {
try {
const filter: TaskFilter = {};
if (req.query.role) {
filter.role = req.query.role as TaskRole;
}
if (req.query.status) {
const statuses = (req.query.status as string).split(',') as TaskStatus[];
filter.status = statuses.length === 1 ? statuses[0] : statuses;
}
if (req.query.dispensary_id) {
filter.dispensary_id = parseInt(req.query.dispensary_id as string, 10);
}
if (req.query.worker_id) {
filter.worker_id = req.query.worker_id as string;
}
if (req.query.limit) {
filter.limit = parseInt(req.query.limit as string, 10);
}
if (req.query.offset) {
filter.offset = parseInt(req.query.offset as string, 10);
}
const tasks = await taskService.listTasks(filter);
res.json({ tasks, count: tasks.length });
} catch (error: unknown) {
console.error('Error listing tasks:', error);
res.status(500).json({ error: 'Failed to list tasks' });
}
});
/**
* GET /api/tasks/counts
* Get task counts by status
*/
router.get('/counts', async (_req: Request, res: Response) => {
try {
const counts = await taskService.getTaskCounts();
res.json(counts);
} catch (error: unknown) {
console.error('Error getting task counts:', error);
res.status(500).json({ error: 'Failed to get task counts' });
}
});
/**
* GET /api/tasks/capacity
* Get capacity metrics for all roles
*/
router.get('/capacity', async (_req: Request, res: Response) => {
try {
const metrics = await taskService.getCapacityMetrics();
res.json({ metrics });
} catch (error: unknown) {
console.error('Error getting capacity metrics:', error);
res.status(500).json({ error: 'Failed to get capacity metrics' });
}
});
/**
* GET /api/tasks/capacity/:role
* Get capacity metrics for a specific role
*/
router.get('/capacity/:role', async (req: Request, res: Response) => {
try {
const role = req.params.role as TaskRole;
const capacity = await taskService.getRoleCapacity(role);
if (!capacity) {
return res.status(404).json({ error: 'Role not found or no data' });
}
// Calculate workers needed for different SLAs
const workersFor1Hour = await taskService.calculateWorkersNeeded(role, 1);
const workersFor4Hours = await taskService.calculateWorkersNeeded(role, 4);
const workersFor8Hours = await taskService.calculateWorkersNeeded(role, 8);
res.json({
...capacity,
workers_needed: {
for_1_hour: workersFor1Hour,
for_4_hours: workersFor4Hours,
for_8_hours: workersFor8Hours,
},
});
} catch (error: unknown) {
console.error('Error getting role capacity:', error);
res.status(500).json({ error: 'Failed to get role capacity' });
}
});
// ============================================================
// SCHEDULE MANAGEMENT ROUTES
// (Must be before /:id to avoid route conflicts)
// ============================================================
/**
* GET /api/tasks/schedules
* List all task schedules
*
* Returns schedules with is_immutable flag - immutable schedules can only
* have their interval_hours, priority, and enabled fields updated (not deleted).
*/
router.get('/schedules', async (req: Request, res: Response) => {
try {
const enabledOnly = req.query.enabled === 'true';
let query = `
SELECT id, name, role, description, enabled, interval_hours,
priority, state_code, platform, method,
COALESCE(is_immutable, false) as is_immutable,
last_run_at, next_run_at,
last_task_count, last_error, created_at, updated_at
FROM task_schedules
`;
if (enabledOnly) {
query += ` WHERE enabled = true`;
}
query += ` ORDER BY
CASE role
WHEN 'store_discovery' THEN 1
WHEN 'product_discovery' THEN 2
WHEN 'analytics_refresh' THEN 3
ELSE 4
END,
state_code NULLS FIRST,
name`;
const result = await pool.query(query);
res.json({ schedules: result.rows });
} catch (error: unknown) {
console.error('Error listing schedules:', error);
res.status(500).json({ error: 'Failed to list schedules' });
}
});
/**
* DELETE /api/tasks/schedules
* Bulk delete schedules
*
* Immutable schedules are automatically skipped (not deleted).
*
* Body:
* - ids: number[] (required) - array of schedule IDs to delete
* - all: boolean (optional) - if true, delete all non-immutable schedules (ids ignored)
*/
router.delete('/schedules', async (req: Request, res: Response) => {
try {
const { ids, all } = req.body;
let result;
let skippedImmutable: { id: number; name: string }[] = [];
if (all === true) {
// First, find immutable schedules that will be skipped
const immutableResult = await pool.query(`
SELECT id, name FROM task_schedules WHERE is_immutable = true
`);
skippedImmutable = immutableResult.rows;
// Delete all non-immutable schedules
result = await pool.query(`
DELETE FROM task_schedules
WHERE COALESCE(is_immutable, false) = false
RETURNING id, name
`);
} else if (Array.isArray(ids) && ids.length > 0) {
// First, find which of the requested IDs are immutable
const immutableResult = await pool.query(`
SELECT id, name FROM task_schedules
WHERE id = ANY($1) AND is_immutable = true
`, [ids]);
skippedImmutable = immutableResult.rows;
// Delete only non-immutable schedules from the requested IDs
result = await pool.query(`
DELETE FROM task_schedules
WHERE id = ANY($1) AND COALESCE(is_immutable, false) = false
RETURNING id, name
`, [ids]);
} else {
return res.status(400).json({
error: 'Either provide ids array or set all=true',
});
}
res.json({
success: true,
deleted_count: result.rowCount,
deleted: result.rows,
skipped_immutable_count: skippedImmutable.length,
skipped_immutable: skippedImmutable,
message: skippedImmutable.length > 0
? `Deleted ${result.rowCount} schedule(s), skipped ${skippedImmutable.length} immutable schedule(s)`
: `Deleted ${result.rowCount} schedule(s)`,
});
} catch (error: unknown) {
console.error('Error bulk deleting schedules:', error);
res.status(500).json({ error: 'Failed to delete schedules' });
}
});
/**
* POST /api/tasks/schedules
* Create a new schedule
*
* Body:
* - name: string (required, unique)
* - role: TaskRole (required)
* - description: string (optional)
* - enabled: boolean (default true)
* - interval_hours: number (required)
* - priority: number (default 0)
* - state_code: string (optional)
* - platform: string (optional)
*/
router.post('/schedules', async (req: Request, res: Response) => {
try {
const {
name,
role,
description,
enabled = true,
interval_hours,
priority = 0,
state_code,
platform,
} = req.body;
if (!name || !role || !interval_hours) {
return res.status(400).json({
error: 'name, role, and interval_hours are required',
});
}
// Calculate next_run_at based on interval
const nextRunAt = new Date(Date.now() + interval_hours * 60 * 60 * 1000);
const result = await pool.query(`
INSERT INTO task_schedules
(name, role, description, enabled, interval_hours, priority, state_code, platform, next_run_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
RETURNING id, name, role, description, enabled, interval_hours,
priority, state_code, platform, last_run_at, next_run_at,
last_task_count, last_error, created_at, updated_at
`, [name, role, description, enabled, interval_hours, priority, state_code, platform, nextRunAt]);
res.status(201).json(result.rows[0]);
} catch (error: any) {
if (error.code === '23505') {
// Unique constraint violation
return res.status(409).json({ error: 'A schedule with this name already exists' });
}
console.error('Error creating schedule:', error);
res.status(500).json({ error: 'Failed to create schedule' });
}
});
/**
* GET /api/tasks/schedules/:id
* Get a specific schedule by ID
*/
router.get('/schedules/:id', async (req: Request, res: Response) => {
try {
const scheduleId = parseInt(req.params.id, 10);
const result = await pool.query(`
SELECT id, name, role, description, enabled, interval_hours,
priority, state_code, platform, last_run_at, next_run_at,
last_task_count, last_error, created_at, updated_at
FROM task_schedules
WHERE id = $1
`, [scheduleId]);
if (result.rows.length === 0) {
return res.status(404).json({ error: 'Schedule not found' });
}
res.json(result.rows[0]);
} catch (error: unknown) {
console.error('Error getting schedule:', error);
res.status(500).json({ error: 'Failed to get schedule' });
}
});
/**
* PUT /api/tasks/schedules/:id
* Update an existing schedule
*
* For IMMUTABLE schedules, only these fields can be updated:
* - enabled (turn on/off)
* - interval_hours (change frequency)
* - priority (change priority)
*
* For regular schedules, all fields can be updated.
*/
router.put('/schedules/:id', async (req: Request, res: Response) => {
try {
const scheduleId = parseInt(req.params.id, 10);
const {
name,
role,
description,
enabled,
interval_hours,
priority,
state_code,
platform,
} = req.body;
// First check if schedule exists and if it's immutable
const checkResult = await pool.query(`
SELECT id, name, COALESCE(is_immutable, false) as is_immutable
FROM task_schedules WHERE id = $1
`, [scheduleId]);
if (checkResult.rows.length === 0) {
return res.status(404).json({ error: 'Schedule not found' });
}
const schedule = checkResult.rows[0];
const isImmutable = schedule.is_immutable;
// For immutable schedules, reject attempts to change protected fields
if (isImmutable) {
const protectedFields: string[] = [];
if (name !== undefined) protectedFields.push('name');
if (role !== undefined) protectedFields.push('role');
if (description !== undefined) protectedFields.push('description');
if (state_code !== undefined) protectedFields.push('state_code');
if (platform !== undefined) protectedFields.push('platform');
if (protectedFields.length > 0) {
return res.status(403).json({
error: 'Cannot modify protected fields on immutable schedule',
message: `Schedule "${schedule.name}" is immutable. Only enabled, interval_hours, and priority can be changed.`,
protected_fields: protectedFields,
allowed_fields: ['enabled', 'interval_hours', 'priority'],
});
}
}
// Build dynamic update query
const updates: string[] = [];
const values: any[] = [];
let paramIndex = 1;
// These fields can only be updated on non-immutable schedules
if (!isImmutable) {
if (name !== undefined) {
updates.push(`name = $${paramIndex++}`);
values.push(name);
}
if (role !== undefined) {
updates.push(`role = $${paramIndex++}`);
values.push(role);
}
if (description !== undefined) {
updates.push(`description = $${paramIndex++}`);
values.push(description);
}
if (state_code !== undefined) {
updates.push(`state_code = $${paramIndex++}`);
values.push(state_code || null);
}
if (platform !== undefined) {
updates.push(`platform = $${paramIndex++}`);
values.push(platform || null);
}
}
// These fields can be updated on ALL schedules (including immutable)
if (enabled !== undefined) {
updates.push(`enabled = $${paramIndex++}`);
values.push(enabled);
}
if (interval_hours !== undefined) {
updates.push(`interval_hours = $${paramIndex++}`);
values.push(interval_hours);
// Recalculate next_run_at if interval changed
const nextRunAt = new Date(Date.now() + interval_hours * 60 * 60 * 1000);
updates.push(`next_run_at = $${paramIndex++}`);
values.push(nextRunAt);
}
if (priority !== undefined) {
updates.push(`priority = $${paramIndex++}`);
values.push(priority);
}
if (updates.length === 0) {
return res.status(400).json({ error: 'No fields to update' });
}
updates.push('updated_at = NOW()');
values.push(scheduleId);
const result = await pool.query(`
UPDATE task_schedules
SET ${updates.join(', ')}
WHERE id = $${paramIndex}
RETURNING id, name, role, description, enabled, interval_hours,
priority, state_code, platform, method,
COALESCE(is_immutable, false) as is_immutable,
last_run_at, next_run_at,
last_task_count, last_error, created_at, updated_at
`, values);
res.json(result.rows[0]);
} catch (error: any) {
if (error.code === '23505') {
return res.status(409).json({ error: 'A schedule with this name already exists' });
}
console.error('Error updating schedule:', error);
res.status(500).json({ error: 'Failed to update schedule' });
}
});
/**
* DELETE /api/tasks/schedules/:id
* Delete a schedule
*
* Immutable schedules cannot be deleted - they can only be disabled.
*/
router.delete('/schedules/:id', async (req: Request, res: Response) => {
try {
const scheduleId = parseInt(req.params.id, 10);
// First check if schedule exists and is immutable
const checkResult = await pool.query(`
SELECT id, name, COALESCE(is_immutable, false) as is_immutable
FROM task_schedules WHERE id = $1
`, [scheduleId]);
if (checkResult.rows.length === 0) {
return res.status(404).json({ error: 'Schedule not found' });
}
const schedule = checkResult.rows[0];
// Prevent deletion of immutable schedules
if (schedule.is_immutable) {
return res.status(403).json({
error: 'Cannot delete immutable schedule',
message: `Schedule "${schedule.name}" is immutable and cannot be deleted. You can disable it instead.`,
schedule_id: scheduleId,
is_immutable: true,
});
}
// Delete the schedule
await pool.query(`DELETE FROM task_schedules WHERE id = $1`, [scheduleId]);
res.json({
success: true,
message: `Schedule "${schedule.name}" deleted`,
});
} catch (error: unknown) {
console.error('Error deleting schedule:', error);
res.status(500).json({ error: 'Failed to delete schedule' });
}
});
/**
* POST /api/tasks/schedules/:id/run-now
* Manually trigger a scheduled task to run immediately
*/
router.post('/schedules/:id/run-now', async (req: Request, res: Response) => {
try {
const scheduleId = parseInt(req.params.id, 10);
// Get the schedule
const scheduleResult = await pool.query(`
SELECT id, name, role, state_code, platform, priority
FROM task_schedules WHERE id = $1
`, [scheduleId]);
if (scheduleResult.rows.length === 0) {
return res.status(404).json({ error: 'Schedule not found' });
}
const schedule = scheduleResult.rows[0];
// Create a task based on the schedule
const task = await taskService.createTask({
role: schedule.role,
platform: schedule.platform,
priority: schedule.priority + 10, // Boost priority for manual runs
});
// Update last_run_at on the schedule
await pool.query(`
UPDATE task_schedules
SET last_run_at = NOW(),
next_run_at = NOW() + (interval_hours || ' hours')::interval,
updated_at = NOW()
WHERE id = $1
`, [scheduleId]);
res.json({
success: true,
message: `Schedule "${schedule.name}" triggered`,
task,
});
} catch (error: unknown) {
console.error('Error running schedule:', error);
res.status(500).json({ error: 'Failed to run schedule' });
}
});
/**
* POST /api/tasks/schedules/:id/toggle
* Toggle a schedule's enabled status
*/
router.post('/schedules/:id/toggle', async (req: Request, res: Response) => {
try {
const scheduleId = parseInt(req.params.id, 10);
const result = await pool.query(`
UPDATE task_schedules
SET enabled = NOT enabled,
updated_at = NOW()
WHERE id = $1
RETURNING id, name, enabled
`, [scheduleId]);
if (result.rows.length === 0) {
return res.status(404).json({ error: 'Schedule not found' });
}
res.json({
success: true,
schedule: result.rows[0],
message: result.rows[0].enabled
? `Schedule "${result.rows[0].name}" enabled`
: `Schedule "${result.rows[0].name}" disabled`,
});
} catch (error: unknown) {
console.error('Error toggling schedule:', error);
res.status(500).json({ error: 'Failed to toggle schedule' });
}
});
// ============================================================
// TASK-SPECIFIC ROUTES (with :id parameter)
// ============================================================
/**
* GET /api/tasks/:id
* Get a specific task by ID
*/
router.get('/:id', async (req: Request, res: Response) => {
try {
const taskId = parseInt(req.params.id, 10);
const task = await taskService.getTask(taskId);
if (!task) {
return res.status(404).json({ error: 'Task not found' });
}
res.json(task);
} catch (error: unknown) {
console.error('Error getting task:', error);
res.status(500).json({ error: 'Failed to get task' });
}
});
/**
* DELETE /api/tasks/:id
* Delete a specific task by ID
* Only allows deletion of failed, completed, or pending tasks (not running)
*/
router.delete('/:id', async (req: Request, res: Response) => {
try {
const taskId = parseInt(req.params.id, 10);
// First check if task exists and its status
const task = await taskService.getTask(taskId);
if (!task) {
return res.status(404).json({ error: 'Task not found' });
}
// Don't allow deleting running tasks
if (task.status === 'running' || task.status === 'claimed') {
return res.status(400).json({ error: 'Cannot delete a running or claimed task' });
}
// Delete the task
await pool.query('DELETE FROM worker_tasks WHERE id = $1', [taskId]);
res.json({ success: true, message: `Task ${taskId} deleted` });
} catch (error: unknown) {
console.error('Error deleting task:', error);
res.status(500).json({ error: 'Failed to delete task' });
}
});
/**
* POST /api/tasks
* Create a new task
*
* Body:
* - role: TaskRole (required)
* - dispensary_id: number (optional)
* - platform: string (optional)
* - priority: number (optional, default 0)
* - scheduled_for: ISO date string (optional)
*/
router.post('/', async (req: Request, res: Response) => {
try {
const { role, dispensary_id, platform, priority, scheduled_for } = req.body;
if (!role) {
return res.status(400).json({ error: 'Role is required' });
}
// Check if store already has an active task
if (dispensary_id) {
const hasActive = await taskService.hasActiveTask(dispensary_id);
if (hasActive) {
return res.status(409).json({
error: 'Store already has an active task',
dispensary_id,
});
}
}
const task = await taskService.createTask({
role,
dispensary_id,
platform,
priority,
scheduled_for: scheduled_for ? new Date(scheduled_for) : undefined,
});
res.status(201).json(task);
} catch (error: unknown) {
console.error('Error creating task:', error);
res.status(500).json({ error: 'Failed to create task' });
}
});
/**
* POST /api/tasks/generate/resync
* Generate daily resync tasks for all active stores
*
* Body:
* - batches_per_day: number (optional, default 6 = every 4 hours)
* - date: ISO date string (optional, default today)
*/
router.post('/generate/resync', async (req: Request, res: Response) => {
try {
const { batches_per_day, date } = req.body;
const batchesPerDay = batches_per_day ?? 6;
const targetDate = date ? new Date(date) : new Date();
const createdCount = await taskService.generateDailyResyncTasks(
batchesPerDay,
targetDate
);
res.json({
success: true,
tasks_created: createdCount,
batches_per_day: batchesPerDay,
date: targetDate.toISOString().split('T')[0],
});
} catch (error: unknown) {
console.error('Error generating resync tasks:', error);
res.status(500).json({ error: 'Failed to generate resync tasks' });
}
});
/**
* POST /api/tasks/generate/discovery
* Generate store discovery tasks for a platform
*
* Body:
* - platform: string (required, e.g., 'dutchie')
* - state_code: string (optional, e.g., 'AZ')
* - priority: number (optional)
*/
router.post('/generate/discovery', async (req: Request, res: Response) => {
try {
const { platform, state_code, priority } = req.body;
if (!platform) {
return res.status(400).json({ error: 'Platform is required' });
}
const task = await taskService.createStoreDiscoveryTask(
platform,
state_code,
priority ?? 0
);
res.status(201).json(task);
} catch (error: unknown) {
console.error('Error creating discovery task:', error);
res.status(500).json({ error: 'Failed to create discovery task' });
}
});
/**
* POST /api/tasks/recover-stale
* Recover stale tasks from dead workers
*
* Body:
* - threshold_minutes: number (optional, default 10)
*/
router.post('/recover-stale', async (req: Request, res: Response) => {
try {
const { threshold_minutes } = req.body;
const recovered = await taskService.recoverStaleTasks(threshold_minutes ?? 10);
res.json({
success: true,
tasks_recovered: recovered,
});
} catch (error: unknown) {
console.error('Error recovering stale tasks:', error);
res.status(500).json({ error: 'Failed to recover stale tasks' });
}
});
/**
* GET /api/tasks/role/:role/last-completion
* Get the last completion time for a role
*/
router.get('/role/:role/last-completion', async (req: Request, res: Response) => {
try {
const role = req.params.role as TaskRole;
const lastCompletion = await taskService.getLastCompletion(role);
res.json({
role,
last_completion: lastCompletion?.toISOString() ?? null,
time_since: lastCompletion
? Math.floor((Date.now() - lastCompletion.getTime()) / 1000)
: null,
});
} catch (error: unknown) {
console.error('Error getting last completion:', error);
res.status(500).json({ error: 'Failed to get last completion' });
}
});
/**
* GET /api/tasks/role/:role/recent
* Get recent completions for a role
*/
router.get('/role/:role/recent', async (req: Request, res: Response) => {
try {
const role = req.params.role as TaskRole;
const limit = parseInt(req.query.limit as string, 10) || 10;
const tasks = await taskService.getRecentCompletions(role, limit);
res.json({ tasks });
} catch (error: unknown) {
console.error('Error getting recent completions:', error);
res.status(500).json({ error: 'Failed to get recent completions' });
}
});
/**
* GET /api/tasks/store/:dispensaryId/active
* Check if a store has an active task
*/
router.get('/store/:dispensaryId/active', async (req: Request, res: Response) => {
try {
const dispensaryId = parseInt(req.params.dispensaryId, 10);
const hasActive = await taskService.hasActiveTask(dispensaryId);
res.json({
dispensary_id: dispensaryId,
has_active_task: hasActive,
});
} catch (error: unknown) {
console.error('Error checking active task:', error);
res.status(500).json({ error: 'Failed to check active task' });
}
});
// ============================================================
// MIGRATION ROUTES - Disable old job systems
// ============================================================
/**
* GET /api/tasks/migration/status
* Get status of old job systems vs new task queue
*/
router.get('/migration/status', async (_req: Request, res: Response) => {
try {
// Get old job system counts
const [schedules, crawlJobs, rawPayloads, taskCounts] = await Promise.all([
pool.query(`
SELECT
COUNT(*) as total,
COUNT(*) FILTER (WHERE enabled = true) as enabled
FROM job_schedules
`),
pool.query(`
SELECT
COUNT(*) as total,
COUNT(*) FILTER (WHERE status = 'pending') as pending,
COUNT(*) FILTER (WHERE status = 'running') as running
FROM dispensary_crawl_jobs
`),
pool.query(`
SELECT
COUNT(*) as total,
COUNT(*) FILTER (WHERE processed = false) as unprocessed
FROM raw_payloads
`),
taskService.getTaskCounts(),
]);
res.json({
old_systems: {
job_schedules: {
total: parseInt(schedules.rows[0].total) || 0,
enabled: parseInt(schedules.rows[0].enabled) || 0,
},
dispensary_crawl_jobs: {
total: parseInt(crawlJobs.rows[0].total) || 0,
pending: parseInt(crawlJobs.rows[0].pending) || 0,
running: parseInt(crawlJobs.rows[0].running) || 0,
},
raw_payloads: {
total: parseInt(rawPayloads.rows[0].total) || 0,
unprocessed: parseInt(rawPayloads.rows[0].unprocessed) || 0,
},
},
new_task_queue: taskCounts,
recommendation: schedules.rows[0].enabled > 0
? 'Disable old job schedules before switching to new task queue'
: 'Ready to use new task queue',
});
} catch (error: unknown) {
console.error('Error getting migration status:', error);
res.status(500).json({ error: 'Failed to get migration status' });
}
});
/**
* POST /api/tasks/migration/disable-old-schedules
* Disable all old job schedules to prepare for new task queue
*/
router.post('/migration/disable-old-schedules', async (_req: Request, res: Response) => {
try {
const result = await pool.query(`
UPDATE job_schedules
SET enabled = false,
updated_at = NOW()
WHERE enabled = true
RETURNING id, job_name
`);
res.json({
success: true,
disabled_count: result.rowCount,
disabled_schedules: result.rows.map(r => ({ id: r.id, job_name: r.job_name })),
});
} catch (error: unknown) {
console.error('Error disabling old schedules:', error);
res.status(500).json({ error: 'Failed to disable old schedules' });
}
});
/**
* POST /api/tasks/migration/cancel-pending-crawl-jobs
* Cancel all pending crawl jobs from the old system
*/
router.post('/migration/cancel-pending-crawl-jobs', async (_req: Request, res: Response) => {
try {
const result = await pool.query(`
UPDATE dispensary_crawl_jobs
SET status = 'cancelled',
completed_at = NOW(),
updated_at = NOW()
WHERE status = 'pending'
RETURNING id
`);
res.json({
success: true,
cancelled_count: result.rowCount,
});
} catch (error: unknown) {
console.error('Error cancelling pending crawl jobs:', error);
res.status(500).json({ error: 'Failed to cancel pending crawl jobs' });
}
});
/**
* POST /api/tasks/migration/create-resync-tasks
* Create product_refresh tasks for all crawl-enabled dispensaries
*/
router.post('/migration/create-resync-tasks', async (req: Request, res: Response) => {
try {
const { priority = 0, state_code } = req.body;
let query = `
SELECT id, name FROM dispensaries
WHERE crawl_enabled = true
AND platform_dispensary_id IS NOT NULL
`;
const params: any[] = [];
if (state_code) {
query += `
AND state_id = (SELECT id FROM states WHERE code = $1)
`;
params.push(state_code.toUpperCase());
}
query += ` ORDER BY id`;
const dispensaries = await pool.query(query, params);
let created = 0;
for (const disp of dispensaries.rows) {
// Check if already has pending/running task
const hasActive = await taskService.hasActiveTask(disp.id);
if (!hasActive) {
await taskService.createTask({
role: 'product_refresh',
dispensary_id: disp.id,
platform: 'dutchie',
priority,
});
created++;
}
}
res.json({
success: true,
tasks_created: created,
dispensaries_checked: dispensaries.rows.length,
state_filter: state_code || 'all',
});
} catch (error: unknown) {
console.error('Error creating resync tasks:', error);
res.status(500).json({ error: 'Failed to create resync tasks' });
}
});
/**
* POST /api/tasks/migration/full-migrate
* One-click migration: disable old systems, create new tasks
*/
router.post('/migration/full-migrate', async (req: Request, res: Response) => {
try {
const results: any = {
success: true,
steps: [],
};
// Step 1: Disable old job schedules
const disableResult = await pool.query(`
UPDATE job_schedules
SET enabled = false, updated_at = NOW()
WHERE enabled = true
RETURNING id
`);
results.steps.push({
step: 'disable_job_schedules',
count: disableResult.rowCount,
});
// Step 2: Cancel pending crawl jobs
const cancelResult = await pool.query(`
UPDATE dispensary_crawl_jobs
SET status = 'cancelled', completed_at = NOW(), updated_at = NOW()
WHERE status = 'pending'
RETURNING id
`);
results.steps.push({
step: 'cancel_pending_crawl_jobs',
count: cancelResult.rowCount,
});
// Step 3: Generate initial resync tasks
const resyncCount = await taskService.generateDailyResyncTasks(6);
results.steps.push({
step: 'generate_resync_tasks',
count: resyncCount,
});
// Step 4: Create store discovery task
const discoveryTask = await taskService.createStoreDiscoveryTask('dutchie', undefined, 0);
results.steps.push({
step: 'create_discovery_task',
task_id: discoveryTask.id,
});
// Step 5: Create analytics refresh task
const analyticsTask = await taskService.createTask({
role: 'analytics_refresh',
priority: 0,
});
results.steps.push({
step: 'create_analytics_task',
task_id: analyticsTask.id,
});
results.message = 'Migration complete. New task workers will pick up tasks.';
res.json(results);
} catch (error: unknown) {
console.error('Error during full migration:', error);
res.status(500).json({ error: 'Failed to complete migration' });
}
});
// ============================================================
// STAGGERED BATCH TASK CREATION
// ============================================================
/**
* POST /api/tasks/batch/staggered
* Create multiple tasks with staggered start times
*
* This endpoint prevents resource contention when creating many tasks by
* staggering their scheduled_for timestamps. Each task becomes eligible
* for claiming only after its scheduled time.
*
* WORKFLOW:
* 1. Tasks created with scheduled_for = NOW() + (index * stagger_seconds)
* 2. Worker claims task only when scheduled_for <= NOW()
* 3. Worker runs preflight on EVERY task claim
* 4. If preflight passes, worker executes task
* 5. If preflight fails, task released back to pending for another worker
*
* Body:
* - dispensary_ids: number[] (required) - Array of dispensary IDs
* - role: TaskRole (required) - 'product_refresh' | 'product_discovery'
* - stagger_seconds: number (default: 15) - Seconds between each task start
* - platform: string (default: 'dutchie')
* - method: 'curl' | 'http' | null (default: null)
*/
router.post('/batch/staggered', async (req: Request, res: Response) => {
try {
const {
dispensary_ids,
role,
stagger_seconds = 15,
platform = 'dutchie',
method = null,
} = req.body;
if (!dispensary_ids || !Array.isArray(dispensary_ids) || dispensary_ids.length === 0) {
return res.status(400).json({ error: 'dispensary_ids array is required' });
}
if (!role) {
return res.status(400).json({ error: 'role is required' });
}
const result = await taskService.createStaggeredTasks(
dispensary_ids,
role as TaskRole,
stagger_seconds,
platform,
method
);
const totalDuration = (dispensary_ids.length - 1) * stagger_seconds;
const estimatedEndTime = new Date(Date.now() + totalDuration * 1000);
res.status(201).json({
success: true,
created: result.created,
task_ids: result.taskIds,
stagger_seconds,
total_duration_seconds: totalDuration,
estimated_completion: estimatedEndTime.toISOString(),
message: `Created ${result.created} staggered ${role} tasks (${stagger_seconds}s apart, ~${Math.ceil(totalDuration / 60)} min total)`,
});
} catch (error: unknown) {
console.error('Error creating staggered tasks:', error);
res.status(500).json({ error: 'Failed to create staggered tasks' });
}
});
/**
* POST /api/tasks/batch/az-stores
* Convenience endpoint to create staggered tasks for Arizona stores
*
* Body:
* - total_tasks: number (default: 24) - Total tasks to create
* - stagger_seconds: number (default: 15) - Seconds between each task
* - split_roles: boolean (default: true) - Split between product_refresh and product_discovery
*/
router.post('/batch/az-stores', async (req: Request, res: Response) => {
try {
const {
total_tasks = 24,
stagger_seconds = 15,
split_roles = true,
} = req.body;
const result = await taskService.createAZStoreTasks(
total_tasks,
stagger_seconds,
split_roles
);
const totalDuration = (result.total - 1) * stagger_seconds;
const estimatedEndTime = new Date(Date.now() + totalDuration * 1000);
res.status(201).json({
success: true,
total: result.total,
product_refresh: result.product_refresh,
product_discovery: result.product_discovery,
task_ids: result.taskIds,
stagger_seconds,
total_duration_seconds: totalDuration,
estimated_completion: estimatedEndTime.toISOString(),
message: `Created ${result.total} staggered tasks for AZ stores (${result.product_refresh} refresh, ${result.product_discovery} discovery)`,
});
} catch (error: unknown) {
console.error('Error creating AZ store tasks:', error);
res.status(500).json({ error: 'Failed to create AZ store tasks' });
}
});
// ============================================================
// TASK POOL MANAGEMENT
// ============================================================
/**
* GET /api/tasks/pool/status
* Check if task pool is paused
*/
router.get('/pool/status', async (_req: Request, res: Response) => {
const status = getTaskPoolStatus();
res.json({
success: true,
...status,
});
});
/**
* POST /api/tasks/pool/pause
* Pause the task pool - workers won't pick up new tasks
*/
router.post('/pool/pause', async (_req: Request, res: Response) => {
pauseTaskPool();
res.json({
success: true,
paused: true,
message: 'Task pool paused - workers will not pick up new tasks',
});
});
/**
* POST /api/tasks/pool/resume
* Resume the task pool - workers will pick up tasks again
*/
router.post('/pool/resume', async (_req: Request, res: Response) => {
resumeTaskPool();
res.json({
success: true,
paused: false,
message: 'Task pool resumed - workers will pick up new tasks',
});
});
export default router;