Switch scheduler UI to dispensary-based API

- Add migrations 021-023 for dispensary_crawl_schedule tables and views
- Add dispensary-orchestrator service and bootstrap-discovery script
- Update schedule routes with dispensary endpoints (/api/schedule/dispensaries)
- Switch frontend scheduler to use canonical dispensaries table (182 AZDHS entries)
- Add dispensary schedule API methods to frontend api.ts
- Remove "Unmapped" badge logic - all dispensaries now linked properly
- Add proper URL linking to dispensary detail pages (/dispensaries/:state/:city/:slug)
- Update Jobs table to show dispensary_name

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-11-30 18:54:52 -07:00
parent 0083f6a510
commit 5306c3f4ca
10 changed files with 1514 additions and 106 deletions

View File

@@ -0,0 +1,388 @@
#!/usr/bin/env npx tsx
/**
* Bootstrap Discovery Script
*
* One-time (but reusable) bootstrap command that:
* 1. Ensures every Dispensary has a dispensary_crawl_schedule entry (4h default)
* 2. Optionally runs RunDispensaryOrchestrator for each dispensary
*
* Usage:
* npx tsx src/scripts/bootstrap-discovery.ts # Create schedules only
* npx tsx src/scripts/bootstrap-discovery.ts --run # Create schedules + run orchestrator
* npx tsx src/scripts/bootstrap-discovery.ts --run --limit=10 # Run for first 10 dispensaries
* npx tsx src/scripts/bootstrap-discovery.ts --dry-run # Preview what would happen
* npx tsx src/scripts/bootstrap-discovery.ts --status # Show current status only
*/
import { pool } from '../db/migrate';
import {
ensureAllDispensariesHaveSchedules,
runDispensaryOrchestrator,
runBatchDispensaryOrchestrator,
getDispensariesDueForOrchestration,
} from '../services/dispensary-orchestrator';
// Parse command line args
const args = process.argv.slice(2);
const flags = {
run: args.includes('--run'),
dryRun: args.includes('--dry-run'),
status: args.includes('--status'),
help: args.includes('--help') || args.includes('-h'),
limit: parseInt(args.find(a => a.startsWith('--limit='))?.split('=')[1] || '0'),
concurrency: parseInt(args.find(a => a.startsWith('--concurrency='))?.split('=')[1] || '3'),
interval: parseInt(args.find(a => a.startsWith('--interval='))?.split('=')[1] || '240'),
detectionOnly: args.includes('--detection-only'),
productionOnly: args.includes('--production-only'),
sandboxOnly: args.includes('--sandbox-only'),
};
async function showHelp() {
console.log(`
Bootstrap Discovery - Initialize Dispensary Crawl System
USAGE:
npx tsx src/scripts/bootstrap-discovery.ts [OPTIONS]
OPTIONS:
--run After creating schedules, run the orchestrator for each dispensary
--dry-run Show what would happen without making changes
--status Show current status and exit
--limit=N Limit how many dispensaries to process (0 = all, default: 0)
--concurrency=N How many dispensaries to process in parallel (default: 3)
--interval=M Default interval in minutes for new schedules (default: 240 = 4 hours)
--detection-only Only run detection, don't crawl
--production-only Only run dispensaries in production mode
--sandbox-only Only run dispensaries in sandbox mode
--help, -h Show this help message
EXAMPLES:
# Create schedule entries for all dispensaries (no crawling)
npx tsx src/scripts/bootstrap-discovery.ts
# Create schedules and run orchestrator for all dispensaries
npx tsx src/scripts/bootstrap-discovery.ts --run
# Run orchestrator for first 10 dispensaries
npx tsx src/scripts/bootstrap-discovery.ts --run --limit=10
# Run with higher concurrency
npx tsx src/scripts/bootstrap-discovery.ts --run --concurrency=5
# Show current status
npx tsx src/scripts/bootstrap-discovery.ts --status
WHAT IT DOES:
1. Creates dispensary_crawl_schedule entries for all dispensaries that don't have one
2. If --run: For each dispensary, runs the orchestrator which:
a. Checks if provider detection is needed (null/unknown/stale/low confidence)
b. Runs detection if needed
c. If Dutchie + production mode: runs production crawl
d. Otherwise: runs sandbox crawl
3. Updates schedule status and job records
`);
}
async function showStatus() {
console.log('\n📊 Current Dispensary Crawl Status\n');
console.log('═'.repeat(70));
// Get dispensary counts by provider
const providerStats = await pool.query(`
SELECT
COALESCE(product_provider, 'undetected') as provider,
COUNT(*) as count,
COUNT(*) FILTER (WHERE product_crawler_mode = 'production') as production,
COUNT(*) FILTER (WHERE product_crawler_mode = 'sandbox') as sandbox,
COUNT(*) FILTER (WHERE product_crawler_mode IS NULL) as no_mode
FROM dispensaries
GROUP BY COALESCE(product_provider, 'undetected')
ORDER BY count DESC
`);
console.log('\nProvider Distribution:');
console.log('-'.repeat(60));
console.log(
'Provider'.padEnd(20) +
'Total'.padStart(8) +
'Production'.padStart(12) +
'Sandbox'.padStart(10) +
'No Mode'.padStart(10)
);
console.log('-'.repeat(60));
for (const row of providerStats.rows) {
console.log(
row.provider.padEnd(20) +
row.count.toString().padStart(8) +
row.production.toString().padStart(12) +
row.sandbox.toString().padStart(10) +
row.no_mode.toString().padStart(10)
);
}
// Get schedule stats
const scheduleStats = await pool.query(`
SELECT
COUNT(DISTINCT d.id) as total_dispensaries,
COUNT(DISTINCT dcs.id) as with_schedule,
COUNT(DISTINCT d.id) - COUNT(DISTINCT dcs.id) as without_schedule,
COUNT(*) FILTER (WHERE dcs.is_active = TRUE) as active_schedules,
COUNT(*) FILTER (WHERE dcs.last_status = 'success') as last_success,
COUNT(*) FILTER (WHERE dcs.last_status = 'error') as last_error,
COUNT(*) FILTER (WHERE dcs.last_status = 'sandbox_only') as last_sandbox,
COUNT(*) FILTER (WHERE dcs.last_status = 'detection_only') as last_detection,
COUNT(*) FILTER (WHERE dcs.next_run_at <= NOW()) as due_now,
AVG(dcs.interval_minutes)::INTEGER as avg_interval
FROM dispensaries d
LEFT JOIN dispensary_crawl_schedule dcs ON dcs.dispensary_id = d.id
`);
const s = scheduleStats.rows[0];
console.log('\n\nSchedule Status:');
console.log('-'.repeat(60));
console.log(` Total Dispensaries: ${s.total_dispensaries}`);
console.log(` With Schedule: ${s.with_schedule}`);
console.log(` Without Schedule: ${s.without_schedule}`);
console.log(` Active Schedules: ${s.active_schedules || 0}`);
console.log(` Average Interval: ${s.avg_interval || 240} minutes`);
console.log('\n Last Run Status:');
console.log(` - Success: ${s.last_success || 0}`);
console.log(` - Error: ${s.last_error || 0}`);
console.log(` - Sandbox Only: ${s.last_sandbox || 0}`);
console.log(` - Detection Only: ${s.last_detection || 0}`);
console.log(` - Due Now: ${s.due_now || 0}`);
// Get recent job stats
const jobStats = await pool.query(`
SELECT
COUNT(*) as total,
COUNT(*) FILTER (WHERE status = 'completed') as completed,
COUNT(*) FILTER (WHERE status = 'failed') as failed,
COUNT(*) FILTER (WHERE status = 'running') as running,
COUNT(*) FILTER (WHERE status = 'pending') as pending,
COUNT(*) FILTER (WHERE detection_ran = TRUE) as with_detection,
COUNT(*) FILTER (WHERE crawl_ran = TRUE) as with_crawl,
COUNT(*) FILTER (WHERE crawl_type = 'production') as production_crawls,
COUNT(*) FILTER (WHERE crawl_type = 'sandbox') as sandbox_crawls,
SUM(products_found) as total_products_found
FROM dispensary_crawl_jobs
WHERE created_at > NOW() - INTERVAL '24 hours'
`);
const j = jobStats.rows[0];
console.log('\n\nJobs (Last 24 Hours):');
console.log('-'.repeat(60));
console.log(` Total Jobs: ${j.total || 0}`);
console.log(` Completed: ${j.completed || 0}`);
console.log(` Failed: ${j.failed || 0}`);
console.log(` Running: ${j.running || 0}`);
console.log(` Pending: ${j.pending || 0}`);
console.log(` With Detection: ${j.with_detection || 0}`);
console.log(` With Crawl: ${j.with_crawl || 0}`);
console.log(` - Production: ${j.production_crawls || 0}`);
console.log(` - Sandbox: ${j.sandbox_crawls || 0}`);
console.log(` Products Found: ${j.total_products_found || 0}`);
console.log('\n' + '═'.repeat(70) + '\n');
}
async function createSchedules(): Promise<{ created: number; existing: number }> {
console.log('\n📅 Creating Dispensary Schedules...\n');
if (flags.dryRun) {
// Count how many would be created
const result = await pool.query(`
SELECT COUNT(*) as count
FROM dispensaries d
WHERE NOT EXISTS (
SELECT 1 FROM dispensary_crawl_schedule dcs WHERE dcs.dispensary_id = d.id
)
`);
const wouldCreate = parseInt(result.rows[0].count);
console.log(` Would create ${wouldCreate} new schedule entries (${flags.interval} minute interval)`);
return { created: wouldCreate, existing: 0 };
}
const result = await ensureAllDispensariesHaveSchedules(flags.interval);
console.log(` ✓ Created ${result.created} new schedule entries`);
console.log(`${result.existing} dispensaries already had schedules`);
return result;
}
async function getDispensariesToProcess(): Promise<number[]> {
// Build query based on filters
let whereClause = 'TRUE';
if (flags.productionOnly) {
whereClause += ` AND d.product_crawler_mode = 'production'`;
} else if (flags.sandboxOnly) {
whereClause += ` AND d.product_crawler_mode = 'sandbox'`;
}
if (flags.detectionOnly) {
whereClause += ` AND (d.product_provider IS NULL OR d.product_provider = 'unknown' OR d.product_confidence < 50)`;
}
const limitClause = flags.limit > 0 ? `LIMIT ${flags.limit}` : '';
const query = `
SELECT d.id, d.name, d.product_provider, d.product_crawler_mode
FROM dispensaries d
LEFT JOIN dispensary_crawl_schedule dcs ON dcs.dispensary_id = d.id
WHERE ${whereClause}
ORDER BY
COALESCE(dcs.priority, 0) DESC,
dcs.last_run_at ASC NULLS FIRST,
d.id ASC
${limitClause}
`;
const result = await pool.query(query);
return result.rows.map(row => row.id);
}
async function runOrchestrator() {
console.log('\n🚀 Running Dispensary Orchestrator...\n');
const dispensaryIds = await getDispensariesToProcess();
if (dispensaryIds.length === 0) {
console.log(' No dispensaries to process.');
return;
}
console.log(` Found ${dispensaryIds.length} dispensaries to process`);
console.log(` Concurrency: ${flags.concurrency}`);
if (flags.dryRun) {
console.log('\n Would process these dispensaries:');
const details = await pool.query(
`SELECT id, name, product_provider, product_crawler_mode
FROM dispensaries WHERE id = ANY($1) ORDER BY id`,
[dispensaryIds]
);
for (const row of details.rows.slice(0, 20)) {
console.log(` - [${row.id}] ${row.name} (${row.product_provider || 'undetected'}, ${row.product_crawler_mode || 'no mode'})`);
}
if (details.rows.length > 20) {
console.log(` ... and ${details.rows.length - 20} more`);
}
return;
}
console.log('\n Starting batch processing...\n');
const results = await runBatchDispensaryOrchestrator(dispensaryIds, flags.concurrency);
// Summarize results
const summary = {
total: results.length,
success: results.filter(r => r.status === 'success').length,
sandboxOnly: results.filter(r => r.status === 'sandbox_only').length,
detectionOnly: results.filter(r => r.status === 'detection_only').length,
error: results.filter(r => r.status === 'error').length,
detectionsRan: results.filter(r => r.detectionRan).length,
crawlsRan: results.filter(r => r.crawlRan).length,
productionCrawls: results.filter(r => r.crawlType === 'production').length,
sandboxCrawls: results.filter(r => r.crawlType === 'sandbox').length,
totalProducts: results.reduce((sum, r) => sum + (r.productsFound || 0), 0),
totalDuration: results.reduce((sum, r) => sum + r.durationMs, 0),
};
console.log('\n' + '═'.repeat(70));
console.log(' Orchestrator Results');
console.log('═'.repeat(70));
console.log(`
Total Processed: ${summary.total}
Status:
- Success: ${summary.success}
- Sandbox Only: ${summary.sandboxOnly}
- Detection Only: ${summary.detectionOnly}
- Error: ${summary.error}
Operations:
- Detections Ran: ${summary.detectionsRan}
- Crawls Ran: ${summary.crawlsRan}
- Production: ${summary.productionCrawls}
- Sandbox: ${summary.sandboxCrawls}
Results:
- Products Found: ${summary.totalProducts}
- Total Duration: ${(summary.totalDuration / 1000).toFixed(1)}s
- Avg per Dispensary: ${(summary.totalDuration / summary.total / 1000).toFixed(1)}s
`);
console.log('═'.repeat(70) + '\n');
// Show errors if any
const errors = results.filter(r => r.status === 'error');
if (errors.length > 0) {
console.log('\n⚠ Errors encountered:');
for (const err of errors.slice(0, 10)) {
console.log(` - [${err.dispensaryId}] ${err.dispensaryName}: ${err.error}`);
}
if (errors.length > 10) {
console.log(` ... and ${errors.length - 10} more errors`);
}
}
}
async function main() {
if (flags.help) {
await showHelp();
process.exit(0);
}
console.log('\n' + '═'.repeat(70));
console.log(' Dispensary Crawl Bootstrap Discovery');
console.log('═'.repeat(70));
if (flags.dryRun) {
console.log('\n🔍 DRY RUN MODE - No changes will be made');
}
try {
// Always show status first
await showStatus();
if (flags.status) {
// Status-only mode, we're done
await pool.end();
process.exit(0);
}
// Step 1: Create schedule entries
await createSchedules();
// Step 2: Optionally run orchestrator
if (flags.run) {
await runOrchestrator();
} else {
console.log('\n💡 Tip: Use --run to also run the orchestrator for each dispensary');
}
// Show final status
if (!flags.dryRun) {
await showStatus();
}
} catch (error: any) {
console.error('\n❌ Fatal error:', error.message);
console.error(error.stack);
process.exit(1);
} finally {
await pool.end();
}
}
main();