feat(api): add bulk crawl endpoints for all Dutchie stores
- GET /api/az/admin/dutchie-stores - Lists all Dutchie stores with crawl status - POST /api/az/admin/crawl-all - Enqueues product crawl jobs for all ready stores 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -876,6 +876,142 @@ router.post('/admin/crawl/:id', async (req: Request, res: Response) => {
|
||||
}
|
||||
});
|
||||
|
||||
import { bulkEnqueueJobs, getQueueStats as getJobQueueStats } from '../services/job-queue';
|
||||
|
||||
/**
|
||||
* GET /api/dutchie-az/admin/dutchie-stores
|
||||
* Get all Dutchie stores with their crawl status
|
||||
*/
|
||||
router.get('/admin/dutchie-stores', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const { rows } = await query(`
|
||||
SELECT
|
||||
d.id,
|
||||
d.name,
|
||||
d.dba_name,
|
||||
d.city,
|
||||
d.state,
|
||||
d.menu_type,
|
||||
d.platform_dispensary_id,
|
||||
d.menu_url,
|
||||
d.website,
|
||||
d.last_crawl_at,
|
||||
d.consecutive_failures,
|
||||
d.failed_at,
|
||||
(
|
||||
SELECT COUNT(*)
|
||||
FROM dutchie_products
|
||||
WHERE dispensary_id = d.id
|
||||
) as product_count,
|
||||
(
|
||||
SELECT MAX(crawled_at)
|
||||
FROM dutchie_product_snapshots s
|
||||
JOIN dutchie_products p ON s.dutchie_product_id = p.id
|
||||
WHERE p.dispensary_id = d.id
|
||||
) as last_snapshot_at
|
||||
FROM dispensaries d
|
||||
WHERE d.menu_type = 'dutchie'
|
||||
AND d.state = 'AZ'
|
||||
ORDER BY d.name
|
||||
`);
|
||||
|
||||
const ready = rows.filter((r: any) => r.platform_dispensary_id && !r.failed_at);
|
||||
const needsPlatformId = rows.filter((r: any) => !r.platform_dispensary_id && !r.failed_at);
|
||||
const failed = rows.filter((r: any) => r.failed_at);
|
||||
|
||||
res.json({
|
||||
total: rows.length,
|
||||
ready: ready.length,
|
||||
needsPlatformId: needsPlatformId.length,
|
||||
failed: failed.length,
|
||||
stores: rows.map((r: any) => ({
|
||||
id: r.id,
|
||||
name: r.dba_name || r.name,
|
||||
city: r.city,
|
||||
state: r.state,
|
||||
menuType: r.menu_type,
|
||||
platformDispensaryId: r.platform_dispensary_id,
|
||||
menuUrl: r.menu_url,
|
||||
website: r.website,
|
||||
lastCrawlAt: r.last_crawl_at,
|
||||
productCount: parseInt(r.product_count || '0', 10),
|
||||
lastSnapshotAt: r.last_snapshot_at,
|
||||
status: r.failed_at
|
||||
? 'failed'
|
||||
: r.platform_dispensary_id
|
||||
? 'ready'
|
||||
: 'needs_platform_id',
|
||||
})),
|
||||
});
|
||||
} catch (error: any) {
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/dutchie-az/admin/crawl-all
|
||||
* Enqueue crawl jobs for ALL ready Dutchie stores
|
||||
* This is a convenience endpoint to queue all stores without triggering the scheduler
|
||||
*/
|
||||
router.post('/admin/crawl-all', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { pricingType = 'rec', useBothModes = true } = req.body;
|
||||
|
||||
// Get all "ready" dispensaries (menu_type='dutchie' AND platform_dispensary_id IS NOT NULL AND not failed)
|
||||
const { rows: rawRows } = await query(
|
||||
`
|
||||
SELECT id, name, platform_dispensary_id FROM dispensaries
|
||||
WHERE state = 'AZ'
|
||||
AND menu_type = 'dutchie'
|
||||
AND platform_dispensary_id IS NOT NULL
|
||||
AND failed_at IS NULL
|
||||
ORDER BY last_crawl_at ASC NULLS FIRST
|
||||
`
|
||||
);
|
||||
|
||||
if (rawRows.length === 0) {
|
||||
return res.json({
|
||||
success: true,
|
||||
message: 'No ready dispensaries to crawl. Run menu detection first.',
|
||||
enqueued: 0,
|
||||
skipped: 0,
|
||||
dispensaries: [],
|
||||
});
|
||||
}
|
||||
|
||||
const dispensaryIds = rawRows.map((r: any) => r.id);
|
||||
|
||||
// Bulk enqueue jobs (skips dispensaries that already have pending/running jobs)
|
||||
const { enqueued, skipped } = await bulkEnqueueJobs(
|
||||
'dutchie_product_crawl',
|
||||
dispensaryIds,
|
||||
{
|
||||
priority: 0,
|
||||
metadata: { pricingType, useBothModes },
|
||||
}
|
||||
);
|
||||
|
||||
// Get current queue stats
|
||||
const queueStats = await getJobQueueStats();
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `Enqueued ${enqueued} crawl jobs for Dutchie stores`,
|
||||
totalReady: rawRows.length,
|
||||
enqueued,
|
||||
skipped,
|
||||
queueStats,
|
||||
dispensaries: rawRows.map((r: any) => ({
|
||||
id: r.id,
|
||||
name: r.name,
|
||||
platformDispensaryId: r.platform_dispensary_id,
|
||||
})),
|
||||
});
|
||||
} catch (error: any) {
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/dutchie-az/admin/jobs
|
||||
* Get crawl job history
|
||||
|
||||
Reference in New Issue
Block a user