feat: Idempotent entry_point_discovery with bulk endpoint

- Track id_resolution_status, attempts, and errors in handler
- Add POST /api/tasks/batch/entry-point-discovery endpoint
- Skip already-resolved stores, retry failed with force flag
This commit is contained in:
Kelly
2025-12-12 20:27:36 -07:00
parent dfd36dacf8
commit 822d2b0609
2 changed files with 106 additions and 3 deletions

View File

@@ -1243,6 +1243,89 @@ router.post('/batch/az-stores', async (req: Request, res: Response) => {
}
});
/**
* POST /api/tasks/batch/entry-point-discovery
* Create entry_point_discovery tasks for stores missing platform_dispensary_id
*
* This is idempotent - stores that already have platform_dispensary_id are skipped.
* Only creates tasks for stores with menu_url set and crawl_enabled = true.
*
* Body (optional):
* - state_code: string (optional) - Filter by state code
* - stagger_seconds: number (default: 5) - Seconds between tasks
* - force: boolean (default: false) - Re-run even for previously failed stores
*/
router.post('/batch/entry-point-discovery', async (req: Request, res: Response) => {
try {
const {
state_code,
stagger_seconds = 5,
force = false,
} = req.body;
// Find stores that need entry point discovery
const storeResult = await pool.query(`
SELECT d.id, d.name, d.menu_url
FROM dispensaries d
JOIN states s ON d.state_id = s.id
WHERE d.crawl_enabled = true
AND d.menu_url IS NOT NULL
AND d.platform_dispensary_id IS NULL
${state_code ? 'AND s.code = $1' : ''}
${!force ? "AND (d.id_resolution_status IS NULL OR d.id_resolution_status = 'pending')" : ''}
-- No pending/running entry_point_discovery task already
AND NOT EXISTS (
SELECT 1 FROM worker_tasks t
WHERE t.dispensary_id = d.id
AND t.role = 'entry_point_discovery'
AND t.status IN ('pending', 'claimed', 'running')
)
ORDER BY d.id
`, state_code ? [state_code.toUpperCase()] : []);
const dispensaryIds = storeResult.rows.map((r: { id: number }) => r.id);
if (dispensaryIds.length === 0) {
return res.json({
success: true,
message: state_code
? `No ${state_code.toUpperCase()} stores need entry point discovery`
: 'No stores need entry point discovery',
tasks_created: 0,
});
}
// Create staggered tasks
const taskIds: number[] = [];
for (let i = 0; i < dispensaryIds.length; i++) {
const scheduledFor = new Date(Date.now() + i * stagger_seconds * 1000);
const result = await pool.query(`
INSERT INTO worker_tasks (role, dispensary_id, priority, scheduled_for, method)
VALUES ('entry_point_discovery', $1, 10, $2, 'http')
RETURNING id
`, [dispensaryIds[i], scheduledFor]);
taskIds.push(result.rows[0].id);
}
const totalDuration = dispensaryIds.length * stagger_seconds;
const estimatedEndTime = new Date(Date.now() + totalDuration * 1000);
res.json({
success: true,
tasks_created: taskIds.length,
task_ids: taskIds,
stores: storeResult.rows.map((r: { id: number; name: string }) => ({ id: r.id, name: r.name })),
stagger_seconds,
total_duration_seconds: totalDuration,
estimated_completion: estimatedEndTime.toISOString(),
message: `Created ${taskIds.length} entry_point_discovery tasks${state_code ? ` for ${state_code.toUpperCase()}` : ''}`,
});
} catch (error: unknown) {
console.error('Error creating entry point discovery tasks:', error);
res.status(500).json({ error: 'Failed to create entry point discovery tasks' });
}
});
// ============================================================
// STATE-BASED CRAWL ENDPOINTS
// ============================================================