diff --git a/backend/src/routes/tasks.ts b/backend/src/routes/tasks.ts index b2c5a812..d5bf729c 100644 --- a/backend/src/routes/tasks.ts +++ b/backend/src/routes/tasks.ts @@ -1243,6 +1243,89 @@ router.post('/batch/az-stores', async (req: Request, res: Response) => { } }); +/** + * POST /api/tasks/batch/entry-point-discovery + * Create entry_point_discovery tasks for stores missing platform_dispensary_id + * + * This is idempotent - stores that already have platform_dispensary_id are skipped. + * Only creates tasks for stores with menu_url set and crawl_enabled = true. + * + * Body (optional): + * - state_code: string (optional) - Filter by state code + * - stagger_seconds: number (default: 5) - Seconds between tasks + * - force: boolean (default: false) - Re-run even for previously failed stores + */ +router.post('/batch/entry-point-discovery', async (req: Request, res: Response) => { + try { + const { + state_code, + stagger_seconds = 5, + force = false, + } = req.body; + + // Find stores that need entry point discovery + const storeResult = await pool.query(` + SELECT d.id, d.name, d.menu_url + FROM dispensaries d + JOIN states s ON d.state_id = s.id + WHERE d.crawl_enabled = true + AND d.menu_url IS NOT NULL + AND d.platform_dispensary_id IS NULL + ${state_code ? 'AND s.code = $1' : ''} + ${!force ? "AND (d.id_resolution_status IS NULL OR d.id_resolution_status = 'pending')" : ''} + -- No pending/running entry_point_discovery task already + AND NOT EXISTS ( + SELECT 1 FROM worker_tasks t + WHERE t.dispensary_id = d.id + AND t.role = 'entry_point_discovery' + AND t.status IN ('pending', 'claimed', 'running') + ) + ORDER BY d.id + `, state_code ? [state_code.toUpperCase()] : []); + + const dispensaryIds = storeResult.rows.map((r: { id: number }) => r.id); + + if (dispensaryIds.length === 0) { + return res.json({ + success: true, + message: state_code + ? `No ${state_code.toUpperCase()} stores need entry point discovery` + : 'No stores need entry point discovery', + tasks_created: 0, + }); + } + + // Create staggered tasks + const taskIds: number[] = []; + for (let i = 0; i < dispensaryIds.length; i++) { + const scheduledFor = new Date(Date.now() + i * stagger_seconds * 1000); + const result = await pool.query(` + INSERT INTO worker_tasks (role, dispensary_id, priority, scheduled_for, method) + VALUES ('entry_point_discovery', $1, 10, $2, 'http') + RETURNING id + `, [dispensaryIds[i], scheduledFor]); + taskIds.push(result.rows[0].id); + } + + const totalDuration = dispensaryIds.length * stagger_seconds; + const estimatedEndTime = new Date(Date.now() + totalDuration * 1000); + + res.json({ + success: true, + tasks_created: taskIds.length, + task_ids: taskIds, + stores: storeResult.rows.map((r: { id: number; name: string }) => ({ id: r.id, name: r.name })), + stagger_seconds, + total_duration_seconds: totalDuration, + estimated_completion: estimatedEndTime.toISOString(), + message: `Created ${taskIds.length} entry_point_discovery tasks${state_code ? ` for ${state_code.toUpperCase()}` : ''}`, + }); + } catch (error: unknown) { + console.error('Error creating entry point discovery tasks:', error); + res.status(500).json({ error: 'Failed to create entry point discovery tasks' }); + } +}); + // ============================================================ // STATE-BASED CRAWL ENDPOINTS // ============================================================ diff --git a/backend/src/tasks/handlers/entry-point-discovery.ts b/backend/src/tasks/handlers/entry-point-discovery.ts index b94c1d4b..008b280a 100644 --- a/backend/src/tasks/handlers/entry-point-discovery.ts +++ b/backend/src/tasks/handlers/entry-point-discovery.ts @@ -41,9 +41,16 @@ export async function handleEntryPointDiscovery(ctx: TaskContext): Promise