feat(discovery): Add self-healing and rename schedule

- Rename 'store_discovery_dutchie' to 'Store Discovery' (platform badge via platform field)
- Add self-healing: scan for stores missing payloads and queue product_discovery
- Catches stores added before chaining was implemented
- Limits to 50 stores per run to avoid overwhelming the system

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-13 14:14:21 -07:00
parent e9a688fbb3
commit 59e0e45f8f
4 changed files with 58 additions and 4 deletions

View File

@@ -99,7 +99,7 @@ class TaskScheduler {
// Core schedules - all use HTTP transport for browser-based scraping
const defaults = [
{
name: 'store_discovery_dutchie',
name: 'Store Discovery',
role: 'store_discovery' as TaskRole,
interval_hours: 168, // Weekly
priority: 5,

View File

@@ -438,7 +438,50 @@ export async function handleStoreDiscoveryHttp(ctx: TaskContext): Promise<TaskRe
await browser.close();
browser = null;
console.log(`[StoreDiscoveryHTTP] Complete: ${totalDiscovered} new, ${totalUpserted} upserted, ${allNewStoreIds.length} promoted`);
// ============================================================
// SELF-HEALING: Find existing stores missing payloads
// This catches stores that were added before chaining was implemented,
// or stores where product_discovery previously failed.
// ============================================================
let healedStoreIds: number[] = [];
try {
const healResult = await pool.query(`
SELECT d.id, d.name
FROM dispensaries d
WHERE d.platform = 'dutchie'
AND d.crawl_enabled = true
AND (d.stage IS NULL OR d.stage NOT IN ('deprecated', 'failing'))
AND d.platform_dispensary_id IS NOT NULL
AND d.last_payload_at IS NULL
AND NOT EXISTS (
SELECT 1 FROM task_queue t
WHERE t.dispensary_id = d.id
AND t.role = 'product_discovery'
AND t.status IN ('pending', 'running')
)
ORDER BY d.id
LIMIT 50
`);
if (healResult.rows.length > 0) {
console.log(`[StoreDiscoveryHTTP] Self-healing: Found ${healResult.rows.length} stores missing payloads`);
for (const store of healResult.rows) {
await pool.query(`
INSERT INTO task_queue (role, dispensary_id, priority, scheduled_for, method, platform)
VALUES ('product_discovery', $1, 5, NOW(), 'http', 'dutchie')
ON CONFLICT DO NOTHING
`, [store.id]);
healedStoreIds.push(store.id);
}
console.log(`[StoreDiscoveryHTTP] Self-healing: Queued ${healedStoreIds.length} product_discovery tasks`);
}
} catch (healErr: any) {
console.error(`[StoreDiscoveryHTTP] Self-healing error:`, healErr.message);
}
console.log(`[StoreDiscoveryHTTP] Complete: ${totalDiscovered} new, ${totalUpserted} upserted, ${allNewStoreIds.length} promoted, ${healedStoreIds.length} healed`);
return {
success: true,
@@ -446,6 +489,7 @@ export async function handleStoreDiscoveryHttp(ctx: TaskContext): Promise<TaskRe
storesUpserted: totalUpserted,
statesProcessed: stateCodesToDiscover.length,
newStoreIds: allNewStoreIds,
healedStoreIds,
};
} catch (error: unknown) {