From ff1510f475da976ac0f6ac93fc80128691631331 Mon Sep 17 00:00:00 2001 From: Kelly Date: Thu, 4 Dec 2025 00:07:20 -0700 Subject: [PATCH] feat(api): add bulk crawl endpoints for all Dutchie stores MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - GET /api/az/admin/dutchie-stores - Lists all Dutchie stores with crawl status - POST /api/az/admin/crawl-all - Enqueues product crawl jobs for all ready stores 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- backend/src/dutchie-az/routes/index.ts | 136 +++++++++++++++++++++++++ 1 file changed, 136 insertions(+) diff --git a/backend/src/dutchie-az/routes/index.ts b/backend/src/dutchie-az/routes/index.ts index 4ec35763..942559b9 100644 --- a/backend/src/dutchie-az/routes/index.ts +++ b/backend/src/dutchie-az/routes/index.ts @@ -876,6 +876,142 @@ router.post('/admin/crawl/:id', async (req: Request, res: Response) => { } }); +import { bulkEnqueueJobs, getQueueStats as getJobQueueStats } from '../services/job-queue'; + +/** + * GET /api/dutchie-az/admin/dutchie-stores + * Get all Dutchie stores with their crawl status + */ +router.get('/admin/dutchie-stores', async (_req: Request, res: Response) => { + try { + const { rows } = await query(` + SELECT + d.id, + d.name, + d.dba_name, + d.city, + d.state, + d.menu_type, + d.platform_dispensary_id, + d.menu_url, + d.website, + d.last_crawl_at, + d.consecutive_failures, + d.failed_at, + ( + SELECT COUNT(*) + FROM dutchie_products + WHERE dispensary_id = d.id + ) as product_count, + ( + SELECT MAX(crawled_at) + FROM dutchie_product_snapshots s + JOIN dutchie_products p ON s.dutchie_product_id = p.id + WHERE p.dispensary_id = d.id + ) as last_snapshot_at + FROM dispensaries d + WHERE d.menu_type = 'dutchie' + AND d.state = 'AZ' + ORDER BY d.name + `); + + const ready = rows.filter((r: any) => r.platform_dispensary_id && !r.failed_at); + const needsPlatformId = rows.filter((r: any) => !r.platform_dispensary_id && !r.failed_at); + const failed = rows.filter((r: any) => r.failed_at); + + res.json({ + total: rows.length, + ready: ready.length, + needsPlatformId: needsPlatformId.length, + failed: failed.length, + stores: rows.map((r: any) => ({ + id: r.id, + name: r.dba_name || r.name, + city: r.city, + state: r.state, + menuType: r.menu_type, + platformDispensaryId: r.platform_dispensary_id, + menuUrl: r.menu_url, + website: r.website, + lastCrawlAt: r.last_crawl_at, + productCount: parseInt(r.product_count || '0', 10), + lastSnapshotAt: r.last_snapshot_at, + status: r.failed_at + ? 'failed' + : r.platform_dispensary_id + ? 'ready' + : 'needs_platform_id', + })), + }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * POST /api/dutchie-az/admin/crawl-all + * Enqueue crawl jobs for ALL ready Dutchie stores + * This is a convenience endpoint to queue all stores without triggering the scheduler + */ +router.post('/admin/crawl-all', async (req: Request, res: Response) => { + try { + const { pricingType = 'rec', useBothModes = true } = req.body; + + // Get all "ready" dispensaries (menu_type='dutchie' AND platform_dispensary_id IS NOT NULL AND not failed) + const { rows: rawRows } = await query( + ` + SELECT id, name, platform_dispensary_id FROM dispensaries + WHERE state = 'AZ' + AND menu_type = 'dutchie' + AND platform_dispensary_id IS NOT NULL + AND failed_at IS NULL + ORDER BY last_crawl_at ASC NULLS FIRST + ` + ); + + if (rawRows.length === 0) { + return res.json({ + success: true, + message: 'No ready dispensaries to crawl. Run menu detection first.', + enqueued: 0, + skipped: 0, + dispensaries: [], + }); + } + + const dispensaryIds = rawRows.map((r: any) => r.id); + + // Bulk enqueue jobs (skips dispensaries that already have pending/running jobs) + const { enqueued, skipped } = await bulkEnqueueJobs( + 'dutchie_product_crawl', + dispensaryIds, + { + priority: 0, + metadata: { pricingType, useBothModes }, + } + ); + + // Get current queue stats + const queueStats = await getJobQueueStats(); + + res.json({ + success: true, + message: `Enqueued ${enqueued} crawl jobs for Dutchie stores`, + totalReady: rawRows.length, + enqueued, + skipped, + queueStats, + dispensaries: rawRows.map((r: any) => ({ + id: r.id, + name: r.name, + platformDispensaryId: r.platform_dispensary_id, + })), + }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + /** * GET /api/dutchie-az/admin/jobs * Get crawl job history