feat(discovery): Add self-healing and rename schedule
- Rename 'store_discovery_dutchie' to 'Store Discovery' (platform badge via platform field) - Add self-healing: scan for stores missing payloads and queue product_discovery - Catches stores added before chaining was implemented - Limits to 50 stores per run to avoid overwhelming the system 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -99,7 +99,7 @@ class TaskScheduler {
|
||||
// Core schedules - all use HTTP transport for browser-based scraping
|
||||
const defaults = [
|
||||
{
|
||||
name: 'store_discovery_dutchie',
|
||||
name: 'Store Discovery',
|
||||
role: 'store_discovery' as TaskRole,
|
||||
interval_hours: 168, // Weekly
|
||||
priority: 5,
|
||||
|
||||
@@ -438,7 +438,50 @@ export async function handleStoreDiscoveryHttp(ctx: TaskContext): Promise<TaskRe
|
||||
await browser.close();
|
||||
browser = null;
|
||||
|
||||
console.log(`[StoreDiscoveryHTTP] Complete: ${totalDiscovered} new, ${totalUpserted} upserted, ${allNewStoreIds.length} promoted`);
|
||||
// ============================================================
|
||||
// SELF-HEALING: Find existing stores missing payloads
|
||||
// This catches stores that were added before chaining was implemented,
|
||||
// or stores where product_discovery previously failed.
|
||||
// ============================================================
|
||||
let healedStoreIds: number[] = [];
|
||||
try {
|
||||
const healResult = await pool.query(`
|
||||
SELECT d.id, d.name
|
||||
FROM dispensaries d
|
||||
WHERE d.platform = 'dutchie'
|
||||
AND d.crawl_enabled = true
|
||||
AND (d.stage IS NULL OR d.stage NOT IN ('deprecated', 'failing'))
|
||||
AND d.platform_dispensary_id IS NOT NULL
|
||||
AND d.last_payload_at IS NULL
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM task_queue t
|
||||
WHERE t.dispensary_id = d.id
|
||||
AND t.role = 'product_discovery'
|
||||
AND t.status IN ('pending', 'running')
|
||||
)
|
||||
ORDER BY d.id
|
||||
LIMIT 50
|
||||
`);
|
||||
|
||||
if (healResult.rows.length > 0) {
|
||||
console.log(`[StoreDiscoveryHTTP] Self-healing: Found ${healResult.rows.length} stores missing payloads`);
|
||||
|
||||
for (const store of healResult.rows) {
|
||||
await pool.query(`
|
||||
INSERT INTO task_queue (role, dispensary_id, priority, scheduled_for, method, platform)
|
||||
VALUES ('product_discovery', $1, 5, NOW(), 'http', 'dutchie')
|
||||
ON CONFLICT DO NOTHING
|
||||
`, [store.id]);
|
||||
healedStoreIds.push(store.id);
|
||||
}
|
||||
|
||||
console.log(`[StoreDiscoveryHTTP] Self-healing: Queued ${healedStoreIds.length} product_discovery tasks`);
|
||||
}
|
||||
} catch (healErr: any) {
|
||||
console.error(`[StoreDiscoveryHTTP] Self-healing error:`, healErr.message);
|
||||
}
|
||||
|
||||
console.log(`[StoreDiscoveryHTTP] Complete: ${totalDiscovered} new, ${totalUpserted} upserted, ${allNewStoreIds.length} promoted, ${healedStoreIds.length} healed`);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
@@ -446,6 +489,7 @@ export async function handleStoreDiscoveryHttp(ctx: TaskContext): Promise<TaskRe
|
||||
storesUpserted: totalUpserted,
|
||||
statesProcessed: stateCodesToDiscover.length,
|
||||
newStoreIds: allNewStoreIds,
|
||||
healedStoreIds,
|
||||
};
|
||||
|
||||
} catch (error: unknown) {
|
||||
|
||||
Reference in New Issue
Block a user