feat(api): add bulk crawl endpoints for all Dutchie stores
- GET /api/az/admin/dutchie-stores - Lists all Dutchie stores with crawl status - POST /api/az/admin/crawl-all - Enqueues product crawl jobs for all ready stores 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -876,6 +876,142 @@ router.post('/admin/crawl/:id', async (req: Request, res: Response) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
import { bulkEnqueueJobs, getQueueStats as getJobQueueStats } from '../services/job-queue';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/dutchie-az/admin/dutchie-stores
|
||||||
|
* Get all Dutchie stores with their crawl status
|
||||||
|
*/
|
||||||
|
router.get('/admin/dutchie-stores', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { rows } = await query(`
|
||||||
|
SELECT
|
||||||
|
d.id,
|
||||||
|
d.name,
|
||||||
|
d.dba_name,
|
||||||
|
d.city,
|
||||||
|
d.state,
|
||||||
|
d.menu_type,
|
||||||
|
d.platform_dispensary_id,
|
||||||
|
d.menu_url,
|
||||||
|
d.website,
|
||||||
|
d.last_crawl_at,
|
||||||
|
d.consecutive_failures,
|
||||||
|
d.failed_at,
|
||||||
|
(
|
||||||
|
SELECT COUNT(*)
|
||||||
|
FROM dutchie_products
|
||||||
|
WHERE dispensary_id = d.id
|
||||||
|
) as product_count,
|
||||||
|
(
|
||||||
|
SELECT MAX(crawled_at)
|
||||||
|
FROM dutchie_product_snapshots s
|
||||||
|
JOIN dutchie_products p ON s.dutchie_product_id = p.id
|
||||||
|
WHERE p.dispensary_id = d.id
|
||||||
|
) as last_snapshot_at
|
||||||
|
FROM dispensaries d
|
||||||
|
WHERE d.menu_type = 'dutchie'
|
||||||
|
AND d.state = 'AZ'
|
||||||
|
ORDER BY d.name
|
||||||
|
`);
|
||||||
|
|
||||||
|
const ready = rows.filter((r: any) => r.platform_dispensary_id && !r.failed_at);
|
||||||
|
const needsPlatformId = rows.filter((r: any) => !r.platform_dispensary_id && !r.failed_at);
|
||||||
|
const failed = rows.filter((r: any) => r.failed_at);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
total: rows.length,
|
||||||
|
ready: ready.length,
|
||||||
|
needsPlatformId: needsPlatformId.length,
|
||||||
|
failed: failed.length,
|
||||||
|
stores: rows.map((r: any) => ({
|
||||||
|
id: r.id,
|
||||||
|
name: r.dba_name || r.name,
|
||||||
|
city: r.city,
|
||||||
|
state: r.state,
|
||||||
|
menuType: r.menu_type,
|
||||||
|
platformDispensaryId: r.platform_dispensary_id,
|
||||||
|
menuUrl: r.menu_url,
|
||||||
|
website: r.website,
|
||||||
|
lastCrawlAt: r.last_crawl_at,
|
||||||
|
productCount: parseInt(r.product_count || '0', 10),
|
||||||
|
lastSnapshotAt: r.last_snapshot_at,
|
||||||
|
status: r.failed_at
|
||||||
|
? 'failed'
|
||||||
|
: r.platform_dispensary_id
|
||||||
|
? 'ready'
|
||||||
|
: 'needs_platform_id',
|
||||||
|
})),
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/dutchie-az/admin/crawl-all
|
||||||
|
* Enqueue crawl jobs for ALL ready Dutchie stores
|
||||||
|
* This is a convenience endpoint to queue all stores without triggering the scheduler
|
||||||
|
*/
|
||||||
|
router.post('/admin/crawl-all', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { pricingType = 'rec', useBothModes = true } = req.body;
|
||||||
|
|
||||||
|
// Get all "ready" dispensaries (menu_type='dutchie' AND platform_dispensary_id IS NOT NULL AND not failed)
|
||||||
|
const { rows: rawRows } = await query(
|
||||||
|
`
|
||||||
|
SELECT id, name, platform_dispensary_id FROM dispensaries
|
||||||
|
WHERE state = 'AZ'
|
||||||
|
AND menu_type = 'dutchie'
|
||||||
|
AND platform_dispensary_id IS NOT NULL
|
||||||
|
AND failed_at IS NULL
|
||||||
|
ORDER BY last_crawl_at ASC NULLS FIRST
|
||||||
|
`
|
||||||
|
);
|
||||||
|
|
||||||
|
if (rawRows.length === 0) {
|
||||||
|
return res.json({
|
||||||
|
success: true,
|
||||||
|
message: 'No ready dispensaries to crawl. Run menu detection first.',
|
||||||
|
enqueued: 0,
|
||||||
|
skipped: 0,
|
||||||
|
dispensaries: [],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const dispensaryIds = rawRows.map((r: any) => r.id);
|
||||||
|
|
||||||
|
// Bulk enqueue jobs (skips dispensaries that already have pending/running jobs)
|
||||||
|
const { enqueued, skipped } = await bulkEnqueueJobs(
|
||||||
|
'dutchie_product_crawl',
|
||||||
|
dispensaryIds,
|
||||||
|
{
|
||||||
|
priority: 0,
|
||||||
|
metadata: { pricingType, useBothModes },
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
// Get current queue stats
|
||||||
|
const queueStats = await getJobQueueStats();
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
message: `Enqueued ${enqueued} crawl jobs for Dutchie stores`,
|
||||||
|
totalReady: rawRows.length,
|
||||||
|
enqueued,
|
||||||
|
skipped,
|
||||||
|
queueStats,
|
||||||
|
dispensaries: rawRows.map((r: any) => ({
|
||||||
|
id: r.id,
|
||||||
|
name: r.name,
|
||||||
|
platformDispensaryId: r.platform_dispensary_id,
|
||||||
|
})),
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/dutchie-az/admin/jobs
|
* GET /api/dutchie-az/admin/jobs
|
||||||
* Get crawl job history
|
* Get crawl job history
|
||||||
|
|||||||
Reference in New Issue
Block a user