- Add task completion verification with DB and output layers - Add reconciliation loop to sync worker memory with DB state - Implement IP-per-store-per-platform conflict detection - Add task ID hash to MinIO payload filenames for traceability - Fix schedule edit modal with dispensary info in API responses - Add task ID display after dispensary name in worker dashboard - Add migrations for proxy_ip and source tracking columns 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1849 lines
58 KiB
TypeScript
1849 lines
58 KiB
TypeScript
/**
|
|
* Task Queue API Routes
|
|
*
|
|
* Endpoints for managing worker tasks, viewing capacity metrics,
|
|
* and generating batch tasks.
|
|
*
|
|
* SCHEDULE MANAGEMENT (added 2025-12-12):
|
|
* This file now contains the canonical schedule management endpoints.
|
|
* The job_schedules table has been deprecated and all schedule management
|
|
* is now consolidated into task_schedules:
|
|
*
|
|
* Schedule endpoints:
|
|
* GET /api/tasks/schedules - List all schedules
|
|
* POST /api/tasks/schedules - Create new schedule
|
|
* GET /api/tasks/schedules/:id - Get schedule by ID
|
|
* PUT /api/tasks/schedules/:id - Update schedule
|
|
* DELETE /api/tasks/schedules/:id - Delete schedule
|
|
* DELETE /api/tasks/schedules - Bulk delete schedules
|
|
* POST /api/tasks/schedules/:id/toggle - Toggle schedule enabled/disabled
|
|
*
|
|
* Note: "Run Now" was removed - use task priority instead.
|
|
* Higher priority tasks get picked up first (ORDER BY priority DESC).
|
|
*
|
|
* Note: Schedule routes are defined BEFORE /:id to avoid route conflicts
|
|
* (Express matches routes in order, and "schedules" would match /:id otherwise)
|
|
*/
|
|
|
|
import { Router, Request, Response } from 'express';
|
|
import {
|
|
taskService,
|
|
TaskRole,
|
|
TaskStatus,
|
|
TaskFilter,
|
|
TaskSource,
|
|
} from '../tasks/task-service';
|
|
|
|
/**
|
|
* Extract request metadata for source tracking
|
|
*/
|
|
function getRequestMetadata(req: Request): Record<string, unknown> {
|
|
return {
|
|
ip: req.ip || req.socket?.remoteAddress || 'unknown',
|
|
userAgent: req.get('user-agent') || 'unknown',
|
|
endpoint: req.originalUrl,
|
|
method: req.method,
|
|
timestamp: new Date().toISOString(),
|
|
};
|
|
}
|
|
import { pool } from '../db/pool';
|
|
import {
|
|
isTaskPoolPaused,
|
|
isTaskPoolOpen,
|
|
pauseTaskPool,
|
|
resumeTaskPool,
|
|
closeTaskPool,
|
|
openTaskPool,
|
|
getTaskPoolStatus,
|
|
} from '../tasks/task-pool-state';
|
|
|
|
const router = Router();
|
|
|
|
/**
|
|
* GET /api/tasks
|
|
* List tasks with optional filters
|
|
*
|
|
* Query params:
|
|
* - role: Filter by role
|
|
* - status: Filter by status (comma-separated for multiple)
|
|
* - dispensary_id: Filter by dispensary
|
|
* - worker_id: Filter by worker
|
|
* - limit: Max results (default 100)
|
|
* - offset: Pagination offset
|
|
*/
|
|
router.get('/', async (req: Request, res: Response) => {
|
|
try {
|
|
const filter: TaskFilter = {};
|
|
|
|
if (req.query.role) {
|
|
filter.role = req.query.role as TaskRole;
|
|
}
|
|
|
|
if (req.query.status) {
|
|
const statuses = (req.query.status as string).split(',') as TaskStatus[];
|
|
filter.status = statuses.length === 1 ? statuses[0] : statuses;
|
|
}
|
|
|
|
if (req.query.dispensary_id) {
|
|
filter.dispensary_id = parseInt(req.query.dispensary_id as string, 10);
|
|
}
|
|
|
|
if (req.query.worker_id) {
|
|
filter.worker_id = req.query.worker_id as string;
|
|
}
|
|
|
|
if (req.query.pool_id) {
|
|
filter.pool_id = parseInt(req.query.pool_id as string, 10);
|
|
}
|
|
|
|
if (req.query.limit) {
|
|
filter.limit = parseInt(req.query.limit as string, 10);
|
|
}
|
|
|
|
if (req.query.offset) {
|
|
filter.offset = parseInt(req.query.offset as string, 10);
|
|
}
|
|
|
|
const tasks = await taskService.listTasks(filter);
|
|
res.json({ tasks, count: tasks.length });
|
|
} catch (error: unknown) {
|
|
console.error('Error listing tasks:', error);
|
|
res.status(500).json({ error: 'Failed to list tasks' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* GET /api/tasks/counts
|
|
* Get task counts by status
|
|
*/
|
|
router.get('/counts', async (_req: Request, res: Response) => {
|
|
try {
|
|
const counts = await taskService.getTaskCounts();
|
|
res.json(counts);
|
|
} catch (error: unknown) {
|
|
console.error('Error getting task counts:', error);
|
|
res.status(500).json({ error: 'Failed to get task counts' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* GET /api/tasks/counts/by-state
|
|
* Get pending task counts grouped by state
|
|
*/
|
|
router.get('/counts/by-state', async (_req: Request, res: Response) => {
|
|
try {
|
|
const result = await pool.query(`
|
|
SELECT
|
|
d.state as state_code,
|
|
COUNT(*) FILTER (WHERE t.status = 'pending') as pending,
|
|
COUNT(*) FILTER (WHERE t.status IN ('claimed', 'running')) as active,
|
|
COUNT(*) as total
|
|
FROM worker_tasks t
|
|
JOIN dispensaries d ON t.dispensary_id = d.id
|
|
WHERE t.status IN ('pending', 'claimed', 'running')
|
|
GROUP BY d.state
|
|
ORDER BY COUNT(*) DESC
|
|
`);
|
|
res.json({ states: result.rows });
|
|
} catch (error: unknown) {
|
|
console.error('Error getting task counts by state:', error);
|
|
res.status(500).json({ error: 'Failed to get task counts by state' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* GET /api/tasks/capacity
|
|
* Get capacity metrics for all roles
|
|
*/
|
|
router.get('/capacity', async (_req: Request, res: Response) => {
|
|
try {
|
|
const metrics = await taskService.getCapacityMetrics();
|
|
res.json({ metrics });
|
|
} catch (error: unknown) {
|
|
console.error('Error getting capacity metrics:', error);
|
|
res.status(500).json({ error: 'Failed to get capacity metrics' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* GET /api/tasks/capacity/:role
|
|
* Get capacity metrics for a specific role
|
|
*/
|
|
router.get('/capacity/:role', async (req: Request, res: Response) => {
|
|
try {
|
|
const role = req.params.role as TaskRole;
|
|
const capacity = await taskService.getRoleCapacity(role);
|
|
|
|
if (!capacity) {
|
|
return res.status(404).json({ error: 'Role not found or no data' });
|
|
}
|
|
|
|
// Calculate workers needed for different SLAs
|
|
const workersFor1Hour = await taskService.calculateWorkersNeeded(role, 1);
|
|
const workersFor4Hours = await taskService.calculateWorkersNeeded(role, 4);
|
|
const workersFor8Hours = await taskService.calculateWorkersNeeded(role, 8);
|
|
|
|
res.json({
|
|
...capacity,
|
|
workers_needed: {
|
|
for_1_hour: workersFor1Hour,
|
|
for_4_hours: workersFor4Hours,
|
|
for_8_hours: workersFor8Hours,
|
|
},
|
|
});
|
|
} catch (error: unknown) {
|
|
console.error('Error getting role capacity:', error);
|
|
res.status(500).json({ error: 'Failed to get role capacity' });
|
|
}
|
|
});
|
|
|
|
// ============================================================
|
|
// SCHEDULE MANAGEMENT ROUTES
|
|
// (Must be before /:id to avoid route conflicts)
|
|
// ============================================================
|
|
|
|
/**
|
|
* GET /api/tasks/schedules
|
|
* List all task schedules
|
|
*
|
|
* Returns schedules with is_immutable flag - immutable schedules can only
|
|
* have their interval_hours, priority, and enabled fields updated (not deleted).
|
|
*/
|
|
router.get('/schedules', async (req: Request, res: Response) => {
|
|
try {
|
|
const enabledOnly = req.query.enabled === 'true';
|
|
|
|
// Check if pool_id column exists (migration 114)
|
|
const colCheck = await pool.query(`
|
|
SELECT column_name FROM information_schema.columns
|
|
WHERE table_name = 'task_schedules' AND column_name = 'pool_id'
|
|
`);
|
|
const hasPoolId = colCheck.rows.length > 0;
|
|
|
|
let query: string;
|
|
if (hasPoolId) {
|
|
query = `
|
|
SELECT ts.id, ts.name, ts.role, ts.description, ts.enabled, ts.interval_hours,
|
|
ts.priority, ts.state_code, ts.pool_id, tp.display_name as pool_name,
|
|
ts.dispensary_id, d.name as dispensary_name,
|
|
ts.platform, ts.method,
|
|
COALESCE(ts.is_immutable, false) as is_immutable,
|
|
ts.last_run_at, ts.next_run_at,
|
|
ts.last_task_count, ts.last_error, ts.created_at, ts.updated_at
|
|
FROM task_schedules ts
|
|
LEFT JOIN task_pools tp ON tp.id = ts.pool_id
|
|
LEFT JOIN dispensaries d ON d.id = ts.dispensary_id
|
|
`;
|
|
} else {
|
|
// Fallback query without pool_id (migration 114 not yet run)
|
|
query = `
|
|
SELECT ts.id, ts.name, ts.role, ts.description, ts.enabled, ts.interval_hours,
|
|
ts.priority, ts.state_code, NULL::integer as pool_id, NULL::text as pool_name,
|
|
ts.dispensary_id, d.name as dispensary_name,
|
|
ts.platform, ts.method,
|
|
COALESCE(ts.is_immutable, false) as is_immutable,
|
|
ts.last_run_at, ts.next_run_at,
|
|
ts.last_task_count, ts.last_error, ts.created_at, ts.updated_at
|
|
FROM task_schedules ts
|
|
LEFT JOIN dispensaries d ON d.id = ts.dispensary_id
|
|
`;
|
|
}
|
|
|
|
if (enabledOnly) {
|
|
query += ` WHERE ts.enabled = true`;
|
|
}
|
|
|
|
query += ` ORDER BY
|
|
CASE ts.role
|
|
WHEN 'store_discovery' THEN 1
|
|
WHEN 'product_discovery' THEN 2
|
|
WHEN 'analytics_refresh' THEN 3
|
|
ELSE 4
|
|
END,
|
|
ts.state_code NULLS FIRST,
|
|
ts.name`;
|
|
|
|
const result = await pool.query(query);
|
|
res.json({ schedules: result.rows });
|
|
} catch (error: unknown) {
|
|
console.error('Error listing schedules:', error);
|
|
res.status(500).json({ error: 'Failed to list schedules' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* DELETE /api/tasks/schedules
|
|
* Bulk delete schedules
|
|
*
|
|
* Immutable schedules are automatically skipped (not deleted).
|
|
*
|
|
* Body:
|
|
* - ids: number[] (required) - array of schedule IDs to delete
|
|
* - all: boolean (optional) - if true, delete all non-immutable schedules (ids ignored)
|
|
*/
|
|
router.delete('/schedules', async (req: Request, res: Response) => {
|
|
try {
|
|
const { ids, all } = req.body;
|
|
|
|
let result;
|
|
let skippedImmutable: { id: number; name: string }[] = [];
|
|
|
|
if (all === true) {
|
|
// First, find immutable schedules that will be skipped
|
|
const immutableResult = await pool.query(`
|
|
SELECT id, name FROM task_schedules WHERE is_immutable = true
|
|
`);
|
|
skippedImmutable = immutableResult.rows;
|
|
|
|
// Delete all non-immutable schedules
|
|
result = await pool.query(`
|
|
DELETE FROM task_schedules
|
|
WHERE COALESCE(is_immutable, false) = false
|
|
RETURNING id, name
|
|
`);
|
|
} else if (Array.isArray(ids) && ids.length > 0) {
|
|
// First, find which of the requested IDs are immutable
|
|
const immutableResult = await pool.query(`
|
|
SELECT id, name FROM task_schedules
|
|
WHERE id = ANY($1) AND is_immutable = true
|
|
`, [ids]);
|
|
skippedImmutable = immutableResult.rows;
|
|
|
|
// Delete only non-immutable schedules from the requested IDs
|
|
result = await pool.query(`
|
|
DELETE FROM task_schedules
|
|
WHERE id = ANY($1) AND COALESCE(is_immutable, false) = false
|
|
RETURNING id, name
|
|
`, [ids]);
|
|
} else {
|
|
return res.status(400).json({
|
|
error: 'Either provide ids array or set all=true',
|
|
});
|
|
}
|
|
|
|
res.json({
|
|
success: true,
|
|
deleted_count: result.rowCount,
|
|
deleted: result.rows,
|
|
skipped_immutable_count: skippedImmutable.length,
|
|
skipped_immutable: skippedImmutable,
|
|
message: skippedImmutable.length > 0
|
|
? `Deleted ${result.rowCount} schedule(s), skipped ${skippedImmutable.length} immutable schedule(s)`
|
|
: `Deleted ${result.rowCount} schedule(s)`,
|
|
});
|
|
} catch (error: unknown) {
|
|
console.error('Error bulk deleting schedules:', error);
|
|
res.status(500).json({ error: 'Failed to delete schedules' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/tasks/schedules
|
|
* Create a new schedule
|
|
*
|
|
* Body:
|
|
* - name: string (required, unique)
|
|
* - role: TaskRole (required)
|
|
* - description: string (optional)
|
|
* - enabled: boolean (default true)
|
|
* - interval_hours: number (required)
|
|
* - priority: number (default 0)
|
|
* - state_code: string (optional)
|
|
* - platform: string (optional)
|
|
*/
|
|
router.post('/schedules', async (req: Request, res: Response) => {
|
|
try {
|
|
const {
|
|
name,
|
|
role,
|
|
description,
|
|
enabled = true,
|
|
interval_hours,
|
|
priority = 0,
|
|
state_code,
|
|
dispensary_id,
|
|
platform,
|
|
} = req.body;
|
|
|
|
if (!name || !role || !interval_hours) {
|
|
return res.status(400).json({
|
|
error: 'name, role, and interval_hours are required',
|
|
});
|
|
}
|
|
|
|
// Calculate next_run_at based on interval
|
|
const nextRunAt = new Date(Date.now() + interval_hours * 60 * 60 * 1000);
|
|
|
|
const result = await pool.query(`
|
|
INSERT INTO task_schedules
|
|
(name, role, description, enabled, interval_hours, priority, state_code, dispensary_id, platform, next_run_at)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
|
|
RETURNING id, name, role, description, enabled, interval_hours,
|
|
priority, state_code, dispensary_id, platform, last_run_at, next_run_at,
|
|
last_task_count, last_error, created_at, updated_at
|
|
`, [name, role, description, enabled, interval_hours, priority, state_code, dispensary_id, platform, nextRunAt]);
|
|
|
|
res.status(201).json(result.rows[0]);
|
|
} catch (error: any) {
|
|
if (error.code === '23505') {
|
|
// Unique constraint violation
|
|
return res.status(409).json({ error: 'A schedule with this name already exists' });
|
|
}
|
|
console.error('Error creating schedule:', error);
|
|
res.status(500).json({ error: 'Failed to create schedule' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* GET /api/tasks/schedules/:id
|
|
* Get a specific schedule by ID
|
|
*/
|
|
router.get('/schedules/:id', async (req: Request, res: Response) => {
|
|
try {
|
|
const scheduleId = parseInt(req.params.id, 10);
|
|
|
|
const result = await pool.query(`
|
|
SELECT id, name, role, description, enabled, interval_hours,
|
|
priority, state_code, platform, last_run_at, next_run_at,
|
|
last_task_count, last_error, created_at, updated_at
|
|
FROM task_schedules
|
|
WHERE id = $1
|
|
`, [scheduleId]);
|
|
|
|
if (result.rows.length === 0) {
|
|
return res.status(404).json({ error: 'Schedule not found' });
|
|
}
|
|
|
|
res.json(result.rows[0]);
|
|
} catch (error: unknown) {
|
|
console.error('Error getting schedule:', error);
|
|
res.status(500).json({ error: 'Failed to get schedule' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* PUT /api/tasks/schedules/:id
|
|
* Update an existing schedule
|
|
*
|
|
* For IMMUTABLE schedules, only these fields can be updated:
|
|
* - enabled (turn on/off)
|
|
* - interval_hours (change frequency)
|
|
* - priority (change priority)
|
|
*
|
|
* For regular schedules, all fields can be updated.
|
|
*/
|
|
router.put('/schedules/:id', async (req: Request, res: Response) => {
|
|
try {
|
|
const scheduleId = parseInt(req.params.id, 10);
|
|
const {
|
|
name,
|
|
role,
|
|
description,
|
|
enabled,
|
|
interval_hours,
|
|
priority,
|
|
state_code,
|
|
platform,
|
|
} = req.body;
|
|
|
|
// First check if schedule exists and if it's immutable
|
|
const checkResult = await pool.query(`
|
|
SELECT id, name, COALESCE(is_immutable, false) as is_immutable
|
|
FROM task_schedules WHERE id = $1
|
|
`, [scheduleId]);
|
|
|
|
if (checkResult.rows.length === 0) {
|
|
return res.status(404).json({ error: 'Schedule not found' });
|
|
}
|
|
|
|
const schedule = checkResult.rows[0];
|
|
const isImmutable = schedule.is_immutable;
|
|
|
|
// For immutable schedules, reject attempts to change protected fields
|
|
if (isImmutable) {
|
|
const protectedFields: string[] = [];
|
|
if (name !== undefined) protectedFields.push('name');
|
|
if (role !== undefined) protectedFields.push('role');
|
|
if (description !== undefined) protectedFields.push('description');
|
|
if (state_code !== undefined) protectedFields.push('state_code');
|
|
if (platform !== undefined) protectedFields.push('platform');
|
|
|
|
if (protectedFields.length > 0) {
|
|
return res.status(403).json({
|
|
error: 'Cannot modify protected fields on immutable schedule',
|
|
message: `Schedule "${schedule.name}" is immutable. Only enabled, interval_hours, and priority can be changed.`,
|
|
protected_fields: protectedFields,
|
|
allowed_fields: ['enabled', 'interval_hours', 'priority'],
|
|
});
|
|
}
|
|
}
|
|
|
|
// Build dynamic update query
|
|
const updates: string[] = [];
|
|
const values: any[] = [];
|
|
let paramIndex = 1;
|
|
|
|
// These fields can only be updated on non-immutable schedules
|
|
if (!isImmutable) {
|
|
if (name !== undefined) {
|
|
updates.push(`name = $${paramIndex++}`);
|
|
values.push(name);
|
|
}
|
|
if (role !== undefined) {
|
|
updates.push(`role = $${paramIndex++}`);
|
|
values.push(role);
|
|
}
|
|
if (description !== undefined) {
|
|
updates.push(`description = $${paramIndex++}`);
|
|
values.push(description);
|
|
}
|
|
if (state_code !== undefined) {
|
|
updates.push(`state_code = $${paramIndex++}`);
|
|
values.push(state_code || null);
|
|
}
|
|
if (platform !== undefined) {
|
|
updates.push(`platform = $${paramIndex++}`);
|
|
values.push(platform || null);
|
|
}
|
|
}
|
|
|
|
// These fields can be updated on ALL schedules (including immutable)
|
|
if (enabled !== undefined) {
|
|
updates.push(`enabled = $${paramIndex++}`);
|
|
values.push(enabled);
|
|
}
|
|
if (interval_hours !== undefined) {
|
|
updates.push(`interval_hours = $${paramIndex++}`);
|
|
values.push(interval_hours);
|
|
|
|
// Recalculate next_run_at if interval changed
|
|
const nextRunAt = new Date(Date.now() + interval_hours * 60 * 60 * 1000);
|
|
updates.push(`next_run_at = $${paramIndex++}`);
|
|
values.push(nextRunAt);
|
|
}
|
|
if (priority !== undefined) {
|
|
updates.push(`priority = $${paramIndex++}`);
|
|
values.push(priority);
|
|
}
|
|
|
|
if (updates.length === 0) {
|
|
return res.status(400).json({ error: 'No fields to update' });
|
|
}
|
|
|
|
updates.push('updated_at = NOW()');
|
|
values.push(scheduleId);
|
|
|
|
const result = await pool.query(`
|
|
UPDATE task_schedules
|
|
SET ${updates.join(', ')}
|
|
WHERE id = $${paramIndex}
|
|
RETURNING id, name, role, description, enabled, interval_hours,
|
|
priority, state_code, platform, method, dispensary_id, pool_id,
|
|
COALESCE(is_immutable, false) as is_immutable,
|
|
last_run_at, next_run_at,
|
|
last_task_count, last_error, created_at, updated_at
|
|
`, values);
|
|
|
|
// Add dispensary_name if dispensary_id is set
|
|
const updatedSchedule = result.rows[0];
|
|
if (updatedSchedule.dispensary_id) {
|
|
const dispResult = await pool.query(
|
|
'SELECT name FROM dispensaries WHERE id = $1',
|
|
[updatedSchedule.dispensary_id]
|
|
);
|
|
updatedSchedule.dispensary_name = dispResult.rows[0]?.name || null;
|
|
}
|
|
|
|
res.json(updatedSchedule);
|
|
} catch (error: any) {
|
|
if (error.code === '23505') {
|
|
return res.status(409).json({ error: 'A schedule with this name already exists' });
|
|
}
|
|
console.error('Error updating schedule:', error);
|
|
res.status(500).json({ error: 'Failed to update schedule' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* DELETE /api/tasks/schedules/:id
|
|
* Delete a schedule
|
|
*
|
|
* Immutable schedules cannot be deleted - they can only be disabled.
|
|
*/
|
|
router.delete('/schedules/:id', async (req: Request, res: Response) => {
|
|
try {
|
|
const scheduleId = parseInt(req.params.id, 10);
|
|
|
|
// First check if schedule exists and is immutable
|
|
const checkResult = await pool.query(`
|
|
SELECT id, name, COALESCE(is_immutable, false) as is_immutable
|
|
FROM task_schedules WHERE id = $1
|
|
`, [scheduleId]);
|
|
|
|
if (checkResult.rows.length === 0) {
|
|
return res.status(404).json({ error: 'Schedule not found' });
|
|
}
|
|
|
|
const schedule = checkResult.rows[0];
|
|
|
|
// Prevent deletion of immutable schedules
|
|
if (schedule.is_immutable) {
|
|
return res.status(403).json({
|
|
error: 'Cannot delete immutable schedule',
|
|
message: `Schedule "${schedule.name}" is immutable and cannot be deleted. You can disable it instead.`,
|
|
schedule_id: scheduleId,
|
|
is_immutable: true,
|
|
});
|
|
}
|
|
|
|
// Delete the schedule
|
|
await pool.query(`DELETE FROM task_schedules WHERE id = $1`, [scheduleId]);
|
|
|
|
res.json({
|
|
success: true,
|
|
message: `Schedule "${schedule.name}" deleted`,
|
|
});
|
|
} catch (error: unknown) {
|
|
console.error('Error deleting schedule:', error);
|
|
res.status(500).json({ error: 'Failed to delete schedule' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/tasks/schedules/:id/toggle
|
|
* Toggle a schedule's enabled status
|
|
*/
|
|
router.post('/schedules/:id/toggle', async (req: Request, res: Response) => {
|
|
try {
|
|
const scheduleId = parseInt(req.params.id, 10);
|
|
|
|
const result = await pool.query(`
|
|
UPDATE task_schedules
|
|
SET enabled = NOT enabled,
|
|
updated_at = NOW()
|
|
WHERE id = $1
|
|
RETURNING id, name, enabled
|
|
`, [scheduleId]);
|
|
|
|
if (result.rows.length === 0) {
|
|
return res.status(404).json({ error: 'Schedule not found' });
|
|
}
|
|
|
|
res.json({
|
|
success: true,
|
|
schedule: result.rows[0],
|
|
message: result.rows[0].enabled
|
|
? `Schedule "${result.rows[0].name}" enabled`
|
|
: `Schedule "${result.rows[0].name}" disabled`,
|
|
});
|
|
} catch (error: unknown) {
|
|
console.error('Error toggling schedule:', error);
|
|
res.status(500).json({ error: 'Failed to toggle schedule' });
|
|
}
|
|
});
|
|
|
|
// ============================================================
|
|
// TASK-SPECIFIC ROUTES (with :id parameter)
|
|
// ============================================================
|
|
|
|
/**
|
|
* GET /api/tasks/:id
|
|
* Get a specific task by ID
|
|
*/
|
|
router.get('/:id', async (req: Request, res: Response) => {
|
|
try {
|
|
const taskId = parseInt(req.params.id, 10);
|
|
const task = await taskService.getTask(taskId);
|
|
|
|
if (!task) {
|
|
return res.status(404).json({ error: 'Task not found' });
|
|
}
|
|
|
|
res.json(task);
|
|
} catch (error: unknown) {
|
|
console.error('Error getting task:', error);
|
|
res.status(500).json({ error: 'Failed to get task' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* DELETE /api/tasks/:id
|
|
* Delete a specific task by ID
|
|
* Only allows deletion of failed, completed, or pending tasks (not running)
|
|
*/
|
|
router.delete('/:id', async (req: Request, res: Response) => {
|
|
try {
|
|
const taskId = parseInt(req.params.id, 10);
|
|
|
|
// First check if task exists and its status
|
|
const task = await taskService.getTask(taskId);
|
|
if (!task) {
|
|
return res.status(404).json({ error: 'Task not found' });
|
|
}
|
|
|
|
// Don't allow deleting running tasks
|
|
if (task.status === 'running' || task.status === 'claimed') {
|
|
return res.status(400).json({ error: 'Cannot delete a running or claimed task' });
|
|
}
|
|
|
|
// Delete the task
|
|
await pool.query('DELETE FROM worker_tasks WHERE id = $1', [taskId]);
|
|
|
|
res.json({ success: true, message: `Task ${taskId} deleted` });
|
|
} catch (error: unknown) {
|
|
console.error('Error deleting task:', error);
|
|
res.status(500).json({ error: 'Failed to delete task' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/tasks
|
|
* Create a new task
|
|
*
|
|
* Body:
|
|
* - role: TaskRole (required)
|
|
* - dispensary_id: number (optional)
|
|
* - platform: string (optional)
|
|
* - priority: number (optional, default 0)
|
|
* - scheduled_for: ISO date string (optional)
|
|
*/
|
|
router.post('/', async (req: Request, res: Response) => {
|
|
try {
|
|
const { role, dispensary_id, platform, priority, scheduled_for } = req.body;
|
|
|
|
if (!role) {
|
|
return res.status(400).json({ error: 'Role is required' });
|
|
}
|
|
|
|
// Check if store already has an active task
|
|
if (dispensary_id) {
|
|
const hasActive = await taskService.hasActiveTask(dispensary_id);
|
|
if (hasActive) {
|
|
return res.status(409).json({
|
|
error: 'Store already has an active task',
|
|
dispensary_id,
|
|
});
|
|
}
|
|
}
|
|
|
|
const task = await taskService.createTask({
|
|
role,
|
|
dispensary_id,
|
|
platform,
|
|
priority,
|
|
scheduled_for: scheduled_for ? new Date(scheduled_for) : undefined,
|
|
});
|
|
|
|
res.status(201).json(task);
|
|
} catch (error: unknown) {
|
|
console.error('Error creating task:', error);
|
|
res.status(500).json({ error: 'Failed to create task' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/tasks/generate/resync
|
|
* Generate daily resync tasks for all active stores
|
|
*
|
|
* Body:
|
|
* - batches_per_day: number (optional, default 6 = every 4 hours)
|
|
* - date: ISO date string (optional, default today)
|
|
*/
|
|
router.post('/generate/resync', async (req: Request, res: Response) => {
|
|
try {
|
|
const { batches_per_day, date } = req.body;
|
|
const batchesPerDay = batches_per_day ?? 6;
|
|
const targetDate = date ? new Date(date) : new Date();
|
|
|
|
const createdCount = await taskService.generateDailyResyncTasks(
|
|
batchesPerDay,
|
|
targetDate
|
|
);
|
|
|
|
res.json({
|
|
success: true,
|
|
tasks_created: createdCount,
|
|
batches_per_day: batchesPerDay,
|
|
date: targetDate.toISOString().split('T')[0],
|
|
});
|
|
} catch (error: unknown) {
|
|
console.error('Error generating resync tasks:', error);
|
|
res.status(500).json({ error: 'Failed to generate resync tasks' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/tasks/generate/discovery
|
|
* Generate store discovery tasks for a platform
|
|
*
|
|
* Body:
|
|
* - platform: string (required, e.g., 'dutchie')
|
|
* - state_code: string (optional, e.g., 'AZ')
|
|
* - priority: number (optional)
|
|
*/
|
|
router.post('/generate/discovery', async (req: Request, res: Response) => {
|
|
try {
|
|
const { platform, state_code, priority } = req.body;
|
|
|
|
if (!platform) {
|
|
return res.status(400).json({ error: 'Platform is required' });
|
|
}
|
|
|
|
const task = await taskService.createStoreDiscoveryTask(
|
|
platform,
|
|
state_code,
|
|
priority ?? 0
|
|
);
|
|
|
|
res.status(201).json(task);
|
|
} catch (error: unknown) {
|
|
console.error('Error creating discovery task:', error);
|
|
res.status(500).json({ error: 'Failed to create discovery task' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/tasks/recover-stale
|
|
* Recover stale tasks from dead workers
|
|
*
|
|
* Body:
|
|
* - threshold_minutes: number (optional, default 10)
|
|
*/
|
|
router.post('/recover-stale', async (req: Request, res: Response) => {
|
|
try {
|
|
const { threshold_minutes } = req.body;
|
|
const recovered = await taskService.recoverStaleTasks(threshold_minutes ?? 10);
|
|
|
|
res.json({
|
|
success: true,
|
|
tasks_recovered: recovered,
|
|
});
|
|
} catch (error: unknown) {
|
|
console.error('Error recovering stale tasks:', error);
|
|
res.status(500).json({ error: 'Failed to recover stale tasks' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/tasks/retry-failed
|
|
* Reset failed tasks back to pending for retry
|
|
*
|
|
* Body:
|
|
* - role: string (optional, filter by role)
|
|
* - max_age_hours: number (optional, default 24 - only retry tasks from last N hours)
|
|
* - limit: number (optional, default 100)
|
|
*/
|
|
router.post('/retry-failed', async (req: Request, res: Response) => {
|
|
try {
|
|
const { role, max_age_hours = 24, limit = 100 } = req.body;
|
|
|
|
let query = `
|
|
UPDATE worker_tasks
|
|
SET status = 'pending',
|
|
worker_id = NULL,
|
|
claimed_at = NULL,
|
|
started_at = NULL,
|
|
completed_at = NULL,
|
|
error_message = NULL,
|
|
retry_count = retry_count + 1,
|
|
updated_at = NOW()
|
|
WHERE status = 'failed'
|
|
AND created_at > NOW() - INTERVAL '${parseInt(max_age_hours)} hours'
|
|
`;
|
|
const params: any[] = [];
|
|
|
|
if (role) {
|
|
query += ` AND role = $1`;
|
|
params.push(role);
|
|
}
|
|
|
|
query += ` RETURNING id, role, dispensary_id`;
|
|
|
|
const { rows } = await pool.query(query, params);
|
|
|
|
console.log(`[Tasks] Retried ${rows.length} failed tasks`);
|
|
|
|
res.json({
|
|
success: true,
|
|
tasks_retried: rows.length,
|
|
tasks: rows.slice(0, 20), // Return first 20 for visibility
|
|
});
|
|
} catch (error: unknown) {
|
|
console.error('Error retrying failed tasks:', error);
|
|
res.status(500).json({ error: 'Failed to retry tasks' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/tasks/fix-null-methods
|
|
* Update tasks with method=null to method='http'
|
|
* Ensures only HTTP-preflight workers can claim crawl tasks
|
|
*/
|
|
router.post('/fix-null-methods', async (_req: Request, res: Response) => {
|
|
try {
|
|
const { rows } = await pool.query(`
|
|
UPDATE worker_tasks
|
|
SET method = 'http', updated_at = NOW()
|
|
WHERE method IS NULL
|
|
AND role IN ('product_discovery', 'product_refresh', 'entry_point_discovery', 'store_discovery', 'payload_fetch')
|
|
AND status IN ('pending', 'failed')
|
|
RETURNING id, role, status
|
|
`);
|
|
|
|
console.log(`[Tasks] Fixed ${rows.length} tasks with null method → http`);
|
|
|
|
res.json({
|
|
success: true,
|
|
tasks_fixed: rows.length,
|
|
tasks: rows.slice(0, 20),
|
|
});
|
|
} catch (error: unknown) {
|
|
console.error('Error fixing null methods:', error);
|
|
res.status(500).json({ error: 'Failed to fix null methods' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* GET /api/tasks/role/:role/last-completion
|
|
* Get the last completion time for a role
|
|
*/
|
|
router.get('/role/:role/last-completion', async (req: Request, res: Response) => {
|
|
try {
|
|
const role = req.params.role as TaskRole;
|
|
const lastCompletion = await taskService.getLastCompletion(role);
|
|
|
|
res.json({
|
|
role,
|
|
last_completion: lastCompletion?.toISOString() ?? null,
|
|
time_since: lastCompletion
|
|
? Math.floor((Date.now() - lastCompletion.getTime()) / 1000)
|
|
: null,
|
|
});
|
|
} catch (error: unknown) {
|
|
console.error('Error getting last completion:', error);
|
|
res.status(500).json({ error: 'Failed to get last completion' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* GET /api/tasks/role/:role/recent
|
|
* Get recent completions for a role
|
|
*/
|
|
router.get('/role/:role/recent', async (req: Request, res: Response) => {
|
|
try {
|
|
const role = req.params.role as TaskRole;
|
|
const limit = parseInt(req.query.limit as string, 10) || 10;
|
|
|
|
const tasks = await taskService.getRecentCompletions(role, limit);
|
|
res.json({ tasks });
|
|
} catch (error: unknown) {
|
|
console.error('Error getting recent completions:', error);
|
|
res.status(500).json({ error: 'Failed to get recent completions' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* GET /api/tasks/store/:dispensaryId/active
|
|
* Check if a store has an active task
|
|
*/
|
|
router.get('/store/:dispensaryId/active', async (req: Request, res: Response) => {
|
|
try {
|
|
const dispensaryId = parseInt(req.params.dispensaryId, 10);
|
|
const hasActive = await taskService.hasActiveTask(dispensaryId);
|
|
|
|
res.json({
|
|
dispensary_id: dispensaryId,
|
|
has_active_task: hasActive,
|
|
});
|
|
} catch (error: unknown) {
|
|
console.error('Error checking active task:', error);
|
|
res.status(500).json({ error: 'Failed to check active task' });
|
|
}
|
|
});
|
|
|
|
// ============================================================
|
|
// MIGRATION ROUTES - Disable old job systems
|
|
// ============================================================
|
|
|
|
/**
|
|
* GET /api/tasks/migration/status
|
|
* Get status of old job systems vs new task queue
|
|
*/
|
|
router.get('/migration/status', async (_req: Request, res: Response) => {
|
|
try {
|
|
// Get old job system counts
|
|
const [schedules, crawlJobs, rawPayloads, taskCounts] = await Promise.all([
|
|
pool.query(`
|
|
SELECT
|
|
COUNT(*) as total,
|
|
COUNT(*) FILTER (WHERE enabled = true) as enabled
|
|
FROM job_schedules
|
|
`),
|
|
pool.query(`
|
|
SELECT
|
|
COUNT(*) as total,
|
|
COUNT(*) FILTER (WHERE status = 'pending') as pending,
|
|
COUNT(*) FILTER (WHERE status = 'running') as running
|
|
FROM dispensary_crawl_jobs
|
|
`),
|
|
pool.query(`
|
|
SELECT
|
|
COUNT(*) as total,
|
|
COUNT(*) FILTER (WHERE processed = false) as unprocessed
|
|
FROM raw_payloads
|
|
`),
|
|
taskService.getTaskCounts(),
|
|
]);
|
|
|
|
res.json({
|
|
old_systems: {
|
|
job_schedules: {
|
|
total: parseInt(schedules.rows[0].total) || 0,
|
|
enabled: parseInt(schedules.rows[0].enabled) || 0,
|
|
},
|
|
dispensary_crawl_jobs: {
|
|
total: parseInt(crawlJobs.rows[0].total) || 0,
|
|
pending: parseInt(crawlJobs.rows[0].pending) || 0,
|
|
running: parseInt(crawlJobs.rows[0].running) || 0,
|
|
},
|
|
raw_payloads: {
|
|
total: parseInt(rawPayloads.rows[0].total) || 0,
|
|
unprocessed: parseInt(rawPayloads.rows[0].unprocessed) || 0,
|
|
},
|
|
},
|
|
new_task_queue: taskCounts,
|
|
recommendation: schedules.rows[0].enabled > 0
|
|
? 'Disable old job schedules before switching to new task queue'
|
|
: 'Ready to use new task queue',
|
|
});
|
|
} catch (error: unknown) {
|
|
console.error('Error getting migration status:', error);
|
|
res.status(500).json({ error: 'Failed to get migration status' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/tasks/migration/disable-old-schedules
|
|
* Disable all old job schedules to prepare for new task queue
|
|
*/
|
|
router.post('/migration/disable-old-schedules', async (_req: Request, res: Response) => {
|
|
try {
|
|
const result = await pool.query(`
|
|
UPDATE job_schedules
|
|
SET enabled = false,
|
|
updated_at = NOW()
|
|
WHERE enabled = true
|
|
RETURNING id, job_name
|
|
`);
|
|
|
|
res.json({
|
|
success: true,
|
|
disabled_count: result.rowCount,
|
|
disabled_schedules: result.rows.map(r => ({ id: r.id, job_name: r.job_name })),
|
|
});
|
|
} catch (error: unknown) {
|
|
console.error('Error disabling old schedules:', error);
|
|
res.status(500).json({ error: 'Failed to disable old schedules' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/tasks/migration/cancel-pending-crawl-jobs
|
|
* Cancel all pending crawl jobs from the old system
|
|
*/
|
|
router.post('/migration/cancel-pending-crawl-jobs', async (_req: Request, res: Response) => {
|
|
try {
|
|
const result = await pool.query(`
|
|
UPDATE dispensary_crawl_jobs
|
|
SET status = 'cancelled',
|
|
completed_at = NOW(),
|
|
updated_at = NOW()
|
|
WHERE status = 'pending'
|
|
RETURNING id
|
|
`);
|
|
|
|
res.json({
|
|
success: true,
|
|
cancelled_count: result.rowCount,
|
|
});
|
|
} catch (error: unknown) {
|
|
console.error('Error cancelling pending crawl jobs:', error);
|
|
res.status(500).json({ error: 'Failed to cancel pending crawl jobs' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/tasks/migration/create-resync-tasks
|
|
* Create product_refresh tasks for all crawl-enabled dispensaries
|
|
*/
|
|
router.post('/migration/create-resync-tasks', async (req: Request, res: Response) => {
|
|
try {
|
|
const { priority = 0, state_code } = req.body;
|
|
|
|
let query = `
|
|
SELECT id, name FROM dispensaries
|
|
WHERE crawl_enabled = true
|
|
AND platform_dispensary_id IS NOT NULL
|
|
`;
|
|
const params: any[] = [];
|
|
|
|
if (state_code) {
|
|
query += `
|
|
AND state_id = (SELECT id FROM states WHERE code = $1)
|
|
`;
|
|
params.push(state_code.toUpperCase());
|
|
}
|
|
|
|
query += ` ORDER BY id`;
|
|
|
|
const dispensaries = await pool.query(query, params);
|
|
let created = 0;
|
|
|
|
for (const disp of dispensaries.rows) {
|
|
// Check if already has pending/running task
|
|
const hasActive = await taskService.hasActiveTask(disp.id);
|
|
if (!hasActive) {
|
|
await taskService.createTask({
|
|
role: 'product_refresh',
|
|
dispensary_id: disp.id,
|
|
platform: 'dutchie',
|
|
priority,
|
|
});
|
|
created++;
|
|
}
|
|
}
|
|
|
|
res.json({
|
|
success: true,
|
|
tasks_created: created,
|
|
dispensaries_checked: dispensaries.rows.length,
|
|
state_filter: state_code || 'all',
|
|
});
|
|
} catch (error: unknown) {
|
|
console.error('Error creating resync tasks:', error);
|
|
res.status(500).json({ error: 'Failed to create resync tasks' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/tasks/migration/full-migrate
|
|
* One-click migration: disable old systems, create new tasks
|
|
*/
|
|
router.post('/migration/full-migrate', async (req: Request, res: Response) => {
|
|
try {
|
|
const results: any = {
|
|
success: true,
|
|
steps: [],
|
|
};
|
|
|
|
// Step 1: Disable old job schedules
|
|
const disableResult = await pool.query(`
|
|
UPDATE job_schedules
|
|
SET enabled = false, updated_at = NOW()
|
|
WHERE enabled = true
|
|
RETURNING id
|
|
`);
|
|
results.steps.push({
|
|
step: 'disable_job_schedules',
|
|
count: disableResult.rowCount,
|
|
});
|
|
|
|
// Step 2: Cancel pending crawl jobs
|
|
const cancelResult = await pool.query(`
|
|
UPDATE dispensary_crawl_jobs
|
|
SET status = 'cancelled', completed_at = NOW(), updated_at = NOW()
|
|
WHERE status = 'pending'
|
|
RETURNING id
|
|
`);
|
|
results.steps.push({
|
|
step: 'cancel_pending_crawl_jobs',
|
|
count: cancelResult.rowCount,
|
|
});
|
|
|
|
// Step 3: Generate initial resync tasks
|
|
const resyncCount = await taskService.generateDailyResyncTasks(6);
|
|
results.steps.push({
|
|
step: 'generate_resync_tasks',
|
|
count: resyncCount,
|
|
});
|
|
|
|
// Step 4: Create store discovery task
|
|
const discoveryTask = await taskService.createStoreDiscoveryTask('dutchie', undefined, 0);
|
|
results.steps.push({
|
|
step: 'create_discovery_task',
|
|
task_id: discoveryTask.id,
|
|
});
|
|
|
|
// Step 5: Create analytics refresh task
|
|
const analyticsTask = await taskService.createTask({
|
|
role: 'analytics_refresh',
|
|
priority: 0,
|
|
});
|
|
results.steps.push({
|
|
step: 'create_analytics_task',
|
|
task_id: analyticsTask.id,
|
|
});
|
|
|
|
results.message = 'Migration complete. New task workers will pick up tasks.';
|
|
res.json(results);
|
|
} catch (error: unknown) {
|
|
console.error('Error during full migration:', error);
|
|
res.status(500).json({ error: 'Failed to complete migration' });
|
|
}
|
|
});
|
|
|
|
// ============================================================
|
|
// STAGGERED BATCH TASK CREATION
|
|
// ============================================================
|
|
|
|
/**
|
|
* POST /api/tasks/batch/staggered
|
|
* Create multiple tasks with staggered start times
|
|
*
|
|
* This endpoint prevents resource contention when creating many tasks by
|
|
* staggering their scheduled_for timestamps. Each task becomes eligible
|
|
* for claiming only after its scheduled time.
|
|
*
|
|
* WORKFLOW:
|
|
* 1. Tasks created with scheduled_for = NOW() + (index * stagger_seconds)
|
|
* 2. Worker claims task only when scheduled_for <= NOW()
|
|
* 3. Worker runs preflight on EVERY task claim
|
|
* 4. If preflight passes, worker executes task
|
|
* 5. If preflight fails, task released back to pending for another worker
|
|
*
|
|
* Body:
|
|
* - dispensary_ids: number[] (required) - Array of dispensary IDs
|
|
* - role: TaskRole (required) - 'product_refresh' | 'product_discovery'
|
|
* - stagger_seconds: number (default: 15) - Seconds between each task start
|
|
* - platform: string (default: 'dutchie')
|
|
* - method: 'curl' | 'http' | null (default: null)
|
|
*/
|
|
router.post('/batch/staggered', async (req: Request, res: Response) => {
|
|
try {
|
|
const requestMetadata = getRequestMetadata(req);
|
|
|
|
// Log the request for tracking phantom tasks
|
|
console.log(`[TaskAPI] POST /batch/staggered from ${requestMetadata.ip} (${requestMetadata.userAgent})`);
|
|
|
|
const {
|
|
dispensary_ids,
|
|
role,
|
|
stagger_seconds = 0, // Default to 0 (no stagger) - worker controls pacing
|
|
platform = 'dutchie',
|
|
method = null,
|
|
} = req.body;
|
|
|
|
if (!dispensary_ids || !Array.isArray(dispensary_ids) || dispensary_ids.length === 0) {
|
|
return res.status(400).json({ error: 'dispensary_ids array is required' });
|
|
}
|
|
|
|
if (!role) {
|
|
return res.status(400).json({ error: 'role is required' });
|
|
}
|
|
|
|
console.log(`[TaskAPI] Creating ${dispensary_ids.length} ${role} tasks for dispensaries: ${dispensary_ids.slice(0, 5).join(',')}...`);
|
|
|
|
const result = await taskService.createStaggeredTasks(
|
|
dispensary_ids,
|
|
role as TaskRole,
|
|
stagger_seconds,
|
|
platform,
|
|
method,
|
|
{
|
|
source: 'api_batch_staggered',
|
|
source_metadata: requestMetadata,
|
|
}
|
|
);
|
|
|
|
const totalDuration = (result.created - 1) * stagger_seconds;
|
|
const estimatedEndTime = new Date(Date.now() + totalDuration * 1000);
|
|
|
|
res.status(201).json({
|
|
success: true,
|
|
created: result.created,
|
|
skipped: result.skipped,
|
|
task_ids: result.taskIds,
|
|
stagger_seconds,
|
|
total_duration_seconds: totalDuration,
|
|
estimated_completion: estimatedEndTime.toISOString(),
|
|
message: result.skipped > 0
|
|
? `Created ${result.created} staggered ${role} tasks, skipped ${result.skipped} (duplicate/recently completed)`
|
|
: `Created ${result.created} staggered ${role} tasks (${stagger_seconds}s apart, ~${Math.ceil(totalDuration / 60)} min total)`,
|
|
});
|
|
} catch (error: unknown) {
|
|
console.error('Error creating staggered tasks:', error);
|
|
res.status(500).json({ error: 'Failed to create staggered tasks' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/tasks/batch/entry-point-discovery
|
|
* Create entry_point_discovery tasks for stores missing platform_dispensary_id
|
|
*
|
|
* This is idempotent - stores that already have platform_dispensary_id are skipped.
|
|
* Only creates tasks for stores with menu_url set and crawl_enabled = true.
|
|
*
|
|
* Body (optional):
|
|
* - state_code: string (optional) - Filter by state code
|
|
* - stagger_seconds: number (default: 5) - Seconds between tasks
|
|
* - force: boolean (default: false) - Re-run even for previously failed stores
|
|
*/
|
|
router.post('/batch/entry-point-discovery', async (req: Request, res: Response) => {
|
|
try {
|
|
const {
|
|
state_code,
|
|
stagger_seconds = 5,
|
|
force = false,
|
|
} = req.body;
|
|
|
|
// Find stores that need entry point discovery
|
|
const storeResult = await pool.query(`
|
|
SELECT d.id, d.name, d.menu_url
|
|
FROM dispensaries d
|
|
JOIN states s ON d.state_id = s.id
|
|
WHERE d.crawl_enabled = true
|
|
AND d.menu_url IS NOT NULL
|
|
AND d.platform_dispensary_id IS NULL
|
|
${state_code ? 'AND s.code = $1' : ''}
|
|
${!force ? "AND (d.id_resolution_status IS NULL OR d.id_resolution_status = 'pending')" : ''}
|
|
-- No pending/running entry_point_discovery task already
|
|
AND NOT EXISTS (
|
|
SELECT 1 FROM worker_tasks t
|
|
WHERE t.dispensary_id = d.id
|
|
AND t.role = 'entry_point_discovery'
|
|
AND t.status IN ('pending', 'claimed', 'running')
|
|
)
|
|
ORDER BY d.id
|
|
`, state_code ? [state_code.toUpperCase()] : []);
|
|
|
|
const dispensaryIds = storeResult.rows.map((r: { id: number }) => r.id);
|
|
|
|
if (dispensaryIds.length === 0) {
|
|
return res.json({
|
|
success: true,
|
|
message: state_code
|
|
? `No ${state_code.toUpperCase()} stores need entry point discovery`
|
|
: 'No stores need entry point discovery',
|
|
tasks_created: 0,
|
|
});
|
|
}
|
|
|
|
// Create staggered tasks
|
|
const taskIds: number[] = [];
|
|
for (let i = 0; i < dispensaryIds.length; i++) {
|
|
const scheduledFor = new Date(Date.now() + i * stagger_seconds * 1000);
|
|
const result = await pool.query(`
|
|
INSERT INTO worker_tasks (role, dispensary_id, priority, scheduled_for, method)
|
|
VALUES ('entry_point_discovery', $1, 10, $2, 'http')
|
|
RETURNING id
|
|
`, [dispensaryIds[i], scheduledFor]);
|
|
taskIds.push(result.rows[0].id);
|
|
}
|
|
|
|
const totalDuration = dispensaryIds.length * stagger_seconds;
|
|
const estimatedEndTime = new Date(Date.now() + totalDuration * 1000);
|
|
|
|
res.json({
|
|
success: true,
|
|
tasks_created: taskIds.length,
|
|
task_ids: taskIds,
|
|
stores: storeResult.rows.map((r: { id: number; name: string }) => ({ id: r.id, name: r.name })),
|
|
stagger_seconds,
|
|
total_duration_seconds: totalDuration,
|
|
estimated_completion: estimatedEndTime.toISOString(),
|
|
message: `Created ${taskIds.length} entry_point_discovery tasks${state_code ? ` for ${state_code.toUpperCase()}` : ''}`,
|
|
});
|
|
} catch (error: unknown) {
|
|
console.error('Error creating entry point discovery tasks:', error);
|
|
res.status(500).json({ error: 'Failed to create entry point discovery tasks' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/tasks/batch/store-discovery
|
|
* Create parallelized store_discovery_state tasks for all active states
|
|
*
|
|
* Instead of one monolithic store_discovery task that takes hours,
|
|
* this creates individual tasks for each state that can run in parallel.
|
|
*
|
|
* Body (optional):
|
|
* - stagger_seconds: number (default: 10) - Seconds between each state task
|
|
* - priority: number (default: 5) - Task priority
|
|
* - states: string[] (optional) - Specific state codes to discover (default: all active)
|
|
*/
|
|
router.post('/batch/store-discovery', async (req: Request, res: Response) => {
|
|
try {
|
|
const {
|
|
stagger_seconds = 10,
|
|
priority = 5,
|
|
states: specificStates,
|
|
} = req.body;
|
|
|
|
// Get active states
|
|
let statesQuery = `
|
|
SELECT code, name FROM states WHERE is_active = true
|
|
`;
|
|
const params: any[] = [];
|
|
|
|
if (specificStates && Array.isArray(specificStates) && specificStates.length > 0) {
|
|
statesQuery += ` AND code = ANY($1)`;
|
|
params.push(specificStates.map((s: string) => s.toUpperCase()));
|
|
}
|
|
|
|
statesQuery += ` ORDER BY code`;
|
|
|
|
const statesResult = await pool.query(statesQuery, params);
|
|
|
|
if (statesResult.rows.length === 0) {
|
|
return res.json({
|
|
success: true,
|
|
message: 'No active states to discover',
|
|
tasks_created: 0,
|
|
});
|
|
}
|
|
|
|
// Check for existing pending/running store_discovery_state tasks
|
|
const existingResult = await pool.query(`
|
|
SELECT payload->>'state_code' as state_code
|
|
FROM worker_tasks
|
|
WHERE role = 'store_discovery_state'
|
|
AND status IN ('pending', 'claimed', 'running')
|
|
`);
|
|
const existingStates = new Set(existingResult.rows.map((r: any) => r.state_code));
|
|
|
|
// Filter out states that already have pending tasks
|
|
const statesToCreate = statesResult.rows.filter(
|
|
(s: { code: string }) => !existingStates.has(s.code)
|
|
);
|
|
|
|
if (statesToCreate.length === 0) {
|
|
return res.json({
|
|
success: true,
|
|
message: 'All states already have pending store_discovery_state tasks',
|
|
tasks_created: 0,
|
|
skipped: statesResult.rows.length,
|
|
});
|
|
}
|
|
|
|
// Create staggered tasks for each state
|
|
const taskIds: number[] = [];
|
|
for (let i = 0; i < statesToCreate.length; i++) {
|
|
const state = statesToCreate[i];
|
|
const scheduledFor = new Date(Date.now() + i * stagger_seconds * 1000);
|
|
|
|
const result = await pool.query(`
|
|
INSERT INTO worker_tasks (role, priority, scheduled_for, method, payload)
|
|
VALUES ('store_discovery_state', $1, $2, 'http', $3)
|
|
RETURNING id
|
|
`, [priority, scheduledFor, JSON.stringify({ state_code: state.code })]);
|
|
|
|
taskIds.push(result.rows[0].id);
|
|
}
|
|
|
|
const totalDuration = statesToCreate.length * stagger_seconds;
|
|
const estimatedEndTime = new Date(Date.now() + totalDuration * 1000);
|
|
|
|
res.status(201).json({
|
|
success: true,
|
|
tasks_created: taskIds.length,
|
|
task_ids: taskIds,
|
|
states: statesToCreate.map((s: { code: string; name: string }) => s.code),
|
|
skipped: statesResult.rows.length - statesToCreate.length,
|
|
stagger_seconds,
|
|
total_duration_seconds: totalDuration,
|
|
estimated_start_completion: estimatedEndTime.toISOString(),
|
|
message: `Created ${taskIds.length} store_discovery_state tasks for parallel execution`,
|
|
});
|
|
} catch (error: unknown) {
|
|
console.error('Error creating store discovery tasks:', error);
|
|
res.status(500).json({ error: 'Failed to create store discovery tasks' });
|
|
}
|
|
});
|
|
|
|
// ============================================================
|
|
// STATE-BASED CRAWL ENDPOINTS
|
|
// ============================================================
|
|
|
|
/**
|
|
* POST /api/tasks/crawl-state/:stateCode
|
|
* Create product_discovery tasks for all stores in a state
|
|
*
|
|
* This is the primary endpoint for triggering crawls by state.
|
|
* Creates tasks for all crawl-enabled stores in the specified state.
|
|
*
|
|
* Params:
|
|
* - stateCode: State code (e.g., 'AZ', 'CA', 'CO')
|
|
*
|
|
* Body (optional):
|
|
* - stagger_seconds: number (default: 0) - Seconds between each task (0 = worker controls pacing)
|
|
* - priority: number (default: 10) - Task priority
|
|
* - method: 'curl' | 'http' | null (default: 'http')
|
|
*
|
|
* Returns:
|
|
* - tasks_created: Number of tasks created
|
|
* - stores_in_state: Total stores found for the state
|
|
* - skipped: Number skipped (already have active tasks)
|
|
*/
|
|
router.post('/crawl-state/:stateCode', async (req: Request, res: Response) => {
|
|
try {
|
|
const stateCode = req.params.stateCode.toUpperCase();
|
|
const requestMetadata = getRequestMetadata(req);
|
|
|
|
// Log the request for tracking phantom tasks
|
|
console.log(`[TaskAPI] POST /crawl-state/${stateCode} from ${requestMetadata.ip} (${requestMetadata.userAgent})`);
|
|
|
|
const {
|
|
stagger_seconds = 0, // Default to 0 (no stagger) - worker controls pacing
|
|
priority = 10,
|
|
method = 'http',
|
|
} = req.body;
|
|
|
|
// Verify state exists
|
|
const stateResult = await pool.query(`
|
|
SELECT id, code, name FROM states WHERE code = $1
|
|
`, [stateCode]);
|
|
|
|
if (stateResult.rows.length === 0) {
|
|
return res.status(404).json({
|
|
error: 'State not found',
|
|
state_code: stateCode,
|
|
});
|
|
}
|
|
|
|
const state = stateResult.rows[0];
|
|
|
|
// Get all crawl-enabled dispensaries in this state
|
|
const dispensariesResult = await pool.query(`
|
|
SELECT d.id, d.name
|
|
FROM dispensaries d
|
|
WHERE d.state_id = $1
|
|
AND d.crawl_enabled = true
|
|
AND d.platform_dispensary_id IS NOT NULL
|
|
ORDER BY d.last_fetch_at NULLS FIRST, d.id
|
|
`, [state.id]);
|
|
|
|
if (dispensariesResult.rows.length === 0) {
|
|
return res.status(200).json({
|
|
success: true,
|
|
message: `No crawl-enabled stores found in ${state.name}`,
|
|
state_code: stateCode,
|
|
state_name: state.name,
|
|
tasks_created: 0,
|
|
stores_in_state: 0,
|
|
});
|
|
}
|
|
|
|
const dispensaryIds = dispensariesResult.rows.map((d: { id: number }) => d.id);
|
|
|
|
console.log(`[TaskAPI] Creating ${dispensaryIds.length} product_discovery tasks for ${stateCode}`);
|
|
|
|
// Create tasks with source tracking
|
|
const result = await taskService.createStaggeredTasks(
|
|
dispensaryIds,
|
|
'product_discovery',
|
|
stagger_seconds,
|
|
'dutchie',
|
|
method,
|
|
{
|
|
source: 'api_crawl_state',
|
|
source_metadata: { ...requestMetadata, stateCode },
|
|
}
|
|
);
|
|
|
|
const totalDuration = (result.created - 1) * stagger_seconds;
|
|
const estimatedEndTime = new Date(Date.now() + totalDuration * 1000);
|
|
|
|
res.status(201).json({
|
|
success: true,
|
|
state_code: stateCode,
|
|
state_name: state.name,
|
|
tasks_created: result.created,
|
|
stores_in_state: dispensariesResult.rows.length,
|
|
skipped: result.skipped,
|
|
stagger_seconds,
|
|
total_duration_seconds: totalDuration,
|
|
estimated_completion: estimatedEndTime.toISOString(),
|
|
message: result.skipped > 0
|
|
? `Created ${result.created} product_discovery tasks for ${state.name}, skipped ${result.skipped} (duplicate/recently completed)`
|
|
: `Created ${result.created} product_discovery tasks for ${state.name} (${stagger_seconds}s apart, ~${Math.ceil(totalDuration / 60)} min total)`,
|
|
});
|
|
} catch (error: unknown) {
|
|
console.error('Error creating state crawl tasks:', error);
|
|
res.status(500).json({ error: 'Failed to create state crawl tasks' });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* GET /api/tasks/states
|
|
* List all states with their store counts and crawl status
|
|
*/
|
|
router.get('/states', async (_req: Request, res: Response) => {
|
|
try {
|
|
const result = await pool.query(`
|
|
SELECT
|
|
s.code,
|
|
s.name,
|
|
COUNT(d.id)::int as total_stores,
|
|
COUNT(d.id) FILTER (WHERE d.crawl_enabled = true AND d.platform_dispensary_id IS NOT NULL)::int as crawl_enabled_stores,
|
|
COUNT(d.id) FILTER (WHERE d.crawl_enabled = true AND d.platform_dispensary_id IS NULL)::int as missing_platform_id,
|
|
MAX(d.last_fetch_at) as last_crawl_at,
|
|
(SELECT COUNT(*) FROM worker_tasks t
|
|
JOIN dispensaries d2 ON t.dispensary_id = d2.id
|
|
WHERE d2.state_id = s.id
|
|
AND t.role = 'product_discovery'
|
|
AND t.status IN ('pending', 'claimed', 'running'))::int as active_tasks
|
|
FROM states s
|
|
LEFT JOIN dispensaries d ON d.state_id = s.id
|
|
GROUP BY s.id, s.code, s.name
|
|
HAVING COUNT(d.id) > 0
|
|
ORDER BY COUNT(d.id) DESC
|
|
`);
|
|
|
|
res.json({
|
|
states: result.rows,
|
|
total_states: result.rows.length,
|
|
});
|
|
} catch (error: unknown) {
|
|
console.error('Error listing states:', error);
|
|
res.status(500).json({ error: 'Failed to list states' });
|
|
}
|
|
});
|
|
|
|
// ============================================================
|
|
// TASK POOL MANAGEMENT
|
|
// ============================================================
|
|
|
|
/**
|
|
* GET /api/tasks/pool/status
|
|
* Check if task pool is open or closed
|
|
*/
|
|
router.get('/pool/status', async (_req: Request, res: Response) => {
|
|
try {
|
|
const status = await getTaskPoolStatus();
|
|
res.json({
|
|
success: true,
|
|
...status,
|
|
});
|
|
} catch (err: any) {
|
|
res.status(500).json({ success: false, error: err.message });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/tasks/pool/close
|
|
* Close the task pool - workers won't pick up new tasks
|
|
*/
|
|
router.post('/pool/close', async (_req: Request, res: Response) => {
|
|
try {
|
|
await closeTaskPool();
|
|
res.json({
|
|
success: true,
|
|
open: false,
|
|
message: 'Pool is Closed - workers will not pick up new tasks',
|
|
});
|
|
} catch (err: any) {
|
|
res.status(500).json({ success: false, error: err.message });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/tasks/pool/open
|
|
* Open the task pool - workers will pick up tasks again
|
|
*/
|
|
router.post('/pool/open', async (_req: Request, res: Response) => {
|
|
try {
|
|
await openTaskPool();
|
|
res.json({
|
|
success: true,
|
|
open: true,
|
|
message: 'Pool is Open - workers are picking up tasks',
|
|
});
|
|
} catch (err: any) {
|
|
res.status(500).json({ success: false, error: err.message });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* POST /api/tasks/pool/toggle
|
|
* Toggle the task pool state
|
|
*/
|
|
router.post('/pool/toggle', async (_req: Request, res: Response) => {
|
|
try {
|
|
const isOpen = await isTaskPoolOpen();
|
|
if (isOpen) {
|
|
await closeTaskPool();
|
|
} else {
|
|
await openTaskPool();
|
|
}
|
|
const status = await getTaskPoolStatus();
|
|
res.json({
|
|
success: true,
|
|
...status,
|
|
});
|
|
} catch (err: any) {
|
|
res.status(500).json({ success: false, error: err.message });
|
|
}
|
|
});
|
|
|
|
// Legacy endpoints for compatibility
|
|
router.post('/pool/pause', async (_req: Request, res: Response) => {
|
|
try {
|
|
await closeTaskPool();
|
|
res.json({ success: true, paused: true, message: 'Task pool closed' });
|
|
} catch (err: any) {
|
|
res.status(500).json({ success: false, error: err.message });
|
|
}
|
|
});
|
|
|
|
router.post('/pool/resume', async (_req: Request, res: Response) => {
|
|
try {
|
|
await openTaskPool();
|
|
res.json({ success: true, paused: false, message: 'Task pool opened' });
|
|
} catch (err: any) {
|
|
res.status(500).json({ success: false, error: err.message });
|
|
}
|
|
});
|
|
|
|
// =============================================================================
|
|
// GEO TASK POOLS - View pools and their contents
|
|
// =============================================================================
|
|
|
|
/**
|
|
* GET /api/tasks/pools/summary
|
|
* Quick summary of all pools for dashboard
|
|
* NOTE: Must be defined BEFORE /pools/:id to avoid route conflict
|
|
*/
|
|
router.get('/pools/summary', async (_req: Request, res: Response) => {
|
|
try {
|
|
const { rows } = await pool.query(`
|
|
SELECT
|
|
COUNT(DISTINCT tp.id) as total_pools,
|
|
COUNT(DISTINCT tp.id) FILTER (WHERE tp.is_active) as active_pools,
|
|
COUNT(DISTINCT d.id) as total_stores,
|
|
COUNT(DISTINCT d.id) FILTER (WHERE d.pool_id IS NOT NULL) as assigned_stores,
|
|
COUNT(DISTINCT t.id) FILTER (WHERE t.status = 'pending') as pending_tasks,
|
|
COUNT(DISTINCT t.id) FILTER (WHERE t.status = 'running') as running_tasks
|
|
FROM task_pools tp
|
|
LEFT JOIN dispensaries d ON d.pool_id = tp.id
|
|
LEFT JOIN worker_tasks t ON t.dispensary_id = d.id
|
|
`);
|
|
|
|
const poolStatus = await getTaskPoolStatus();
|
|
|
|
res.json({
|
|
success: true,
|
|
...rows[0],
|
|
pool_open: poolStatus.open,
|
|
});
|
|
} catch (err: any) {
|
|
res.status(500).json({ success: false, error: err.message });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* GET /api/tasks/pools
|
|
* List all geo task pools with their stats
|
|
*/
|
|
router.get('/pools', async (_req: Request, res: Response) => {
|
|
try {
|
|
const { rows } = await pool.query(`
|
|
SELECT
|
|
tp.id,
|
|
tp.name,
|
|
tp.display_name,
|
|
tp.state_code,
|
|
tp.city,
|
|
tp.timezone,
|
|
tp.radius_miles,
|
|
tp.is_active,
|
|
COUNT(DISTINCT d.id) as store_count,
|
|
COUNT(DISTINCT t.id) FILTER (WHERE t.status = 'pending') as pending_tasks,
|
|
COUNT(DISTINCT t.id) FILTER (WHERE t.status = 'running') as running_tasks,
|
|
COUNT(DISTINCT t.id) FILTER (WHERE t.status = 'completed') as completed_tasks,
|
|
COUNT(DISTINCT wr.worker_id) FILTER (WHERE wr.current_pool_id = tp.id) as active_workers
|
|
FROM task_pools tp
|
|
LEFT JOIN dispensaries d ON d.pool_id = tp.id
|
|
LEFT JOIN worker_tasks t ON t.dispensary_id = d.id
|
|
LEFT JOIN worker_registry wr ON wr.current_pool_id = tp.id
|
|
GROUP BY tp.id
|
|
ORDER BY COUNT(DISTINCT t.id) FILTER (WHERE t.status = 'pending') DESC, tp.display_name
|
|
`);
|
|
|
|
res.json({
|
|
success: true,
|
|
pools: rows,
|
|
total: rows.length,
|
|
});
|
|
} catch (err: any) {
|
|
res.status(500).json({ success: false, error: err.message });
|
|
}
|
|
});
|
|
|
|
/**
|
|
* GET /api/tasks/pools/:id
|
|
* Get a single pool with its stores and tasks
|
|
*/
|
|
router.get('/pools/:id', async (req: Request, res: Response) => {
|
|
try {
|
|
const poolId = parseInt(req.params.id);
|
|
|
|
// Get pool info
|
|
const { rows: poolRows } = await pool.query(`
|
|
SELECT * FROM task_pools WHERE id = $1
|
|
`, [poolId]);
|
|
|
|
if (poolRows.length === 0) {
|
|
return res.status(404).json({ success: false, error: 'Pool not found' });
|
|
}
|
|
|
|
// Get stores in this pool
|
|
const { rows: stores } = await pool.query(`
|
|
SELECT
|
|
d.id,
|
|
d.name,
|
|
d.city,
|
|
d.state,
|
|
d.latitude,
|
|
d.longitude,
|
|
COUNT(t.id) FILTER (WHERE t.status = 'pending') as pending_tasks,
|
|
COUNT(t.id) FILTER (WHERE t.status = 'running') as running_tasks
|
|
FROM dispensaries d
|
|
LEFT JOIN worker_tasks t ON t.dispensary_id = d.id
|
|
WHERE d.pool_id = $1
|
|
GROUP BY d.id
|
|
ORDER BY COUNT(t.id) FILTER (WHERE t.status = 'pending') DESC, d.name
|
|
`, [poolId]);
|
|
|
|
// Get active workers for this pool
|
|
const { rows: workers } = await pool.query(`
|
|
SELECT
|
|
worker_id,
|
|
friendly_name,
|
|
current_state,
|
|
current_city,
|
|
http_ip as proxy_ip,
|
|
pool_stores_visited,
|
|
pool_max_stores
|
|
FROM worker_registry
|
|
WHERE current_pool_id = $1
|
|
`, [poolId]);
|
|
|
|
res.json({
|
|
success: true,
|
|
pool: poolRows[0],
|
|
stores,
|
|
workers,
|
|
stats: {
|
|
store_count: stores.length,
|
|
worker_count: workers.length,
|
|
pending_tasks: stores.reduce((sum, s) => sum + parseInt(s.pending_tasks || '0'), 0),
|
|
running_tasks: stores.reduce((sum, s) => sum + parseInt(s.running_tasks || '0'), 0),
|
|
},
|
|
});
|
|
} catch (err: any) {
|
|
res.status(500).json({ success: false, error: err.message });
|
|
}
|
|
});
|
|
|
|
export default router;
|