Files
cannaiq/backend/src/canonical-hydration/cli/backfill.ts
Kelly b4a2fb7d03 feat: Add v2 architecture with multi-state support and orchestrator services
Major additions:
- Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare
- Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator
- Discovery system: dutchie discovery service, geo validation, city seeding scripts
- Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages
- Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram)
- Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata

Frontend pages added:
- Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores
- StateHeatmap, CrossStateCompare, SyncInfoPanel

Components added:
- StateSelector, OrchestratorTraceModal, WorkflowStepper

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-07 11:30:57 -07:00

171 lines
6.3 KiB
TypeScript

#!/usr/bin/env npx tsx
/**
* Backfill CLI - Historical data hydration
*
* Usage:
* npx tsx src/canonical-hydration/cli/backfill.ts [options]
*
* Options:
* --dispensary-id <id> Hydrate only a specific dispensary
* --start-date <date> Start date for backfill (ISO format)
* --end-date <date> End date for backfill (ISO format)
* --batch-size <n> Number of jobs to process per batch (default: 50)
* --dry-run Show what would be done without making changes
* --status Show hydration status and exit
*
* Examples:
* npx tsx src/canonical-hydration/cli/backfill.ts --status
* npx tsx src/canonical-hydration/cli/backfill.ts --dispensary-id 112
* npx tsx src/canonical-hydration/cli/backfill.ts --start-date 2024-01-01 --end-date 2024-12-31
* npx tsx src/canonical-hydration/cli/backfill.ts --dry-run
*/
import { Pool } from 'pg';
import { CanonicalHydrationService } from '../hydration-service';
import { HydrationOptions } from '../types';
async function main() {
const args = process.argv.slice(2);
// Parse command line arguments
const options: HydrationOptions = {
mode: 'backfill',
};
let showStatus = false;
for (let i = 0; i < args.length; i++) {
const arg = args[i];
switch (arg) {
case '--dispensary-id':
options.dispensaryId = parseInt(args[++i]);
break;
case '--start-date':
options.startDate = new Date(args[++i]);
break;
case '--end-date':
options.endDate = new Date(args[++i]);
break;
case '--batch-size':
options.batchSize = parseInt(args[++i]);
break;
case '--dry-run':
options.dryRun = true;
break;
case '--status':
showStatus = true;
break;
case '--help':
console.log(`
Backfill CLI - Historical data hydration
Usage:
npx tsx src/canonical-hydration/cli/backfill.ts [options]
Options:
--dispensary-id <id> Hydrate only a specific dispensary
--start-date <date> Start date for backfill (ISO format)
--end-date <date> End date for backfill (ISO format)
--batch-size <n> Number of jobs to process per batch (default: 50)
--dry-run Show what would be done without making changes
--status Show hydration status and exit
Examples:
npx tsx src/canonical-hydration/cli/backfill.ts --status
npx tsx src/canonical-hydration/cli/backfill.ts --dispensary-id 112
npx tsx src/canonical-hydration/cli/backfill.ts --start-date 2024-01-01 --end-date 2024-12-31
npx tsx src/canonical-hydration/cli/backfill.ts --dry-run
`);
process.exit(0);
}
}
// Connect to database
const pool = new Pool({
connectionString: process.env.DATABASE_URL,
});
const service = new CanonicalHydrationService({
pool,
logger: (msg) => console.log(`[${new Date().toISOString()}] ${msg}`),
});
try {
if (showStatus) {
// Show status and exit
if (options.dispensaryId) {
const status = await service.getHydrationStatus(options.dispensaryId);
console.log(`\nHydration Status for Dispensary ${options.dispensaryId}:`);
console.log('═'.repeat(50));
console.log(` Source Jobs (completed): ${status.sourceJobs}`);
console.log(` Hydrated Jobs: ${status.hydratedJobs}`);
console.log(` Unhydrated Jobs: ${status.unhydratedJobs}`);
console.log('');
console.log(` Source Products: ${status.sourceProducts}`);
console.log(` Store Products: ${status.storeProducts}`);
console.log('');
console.log(` Source Snapshots: ${status.sourceSnapshots}`);
console.log(` Store Snapshots: ${status.storeSnapshots}`);
} else {
const status = await service.getOverallStatus();
console.log('\nOverall Hydration Status:');
console.log('═'.repeat(50));
console.log(` Dispensaries with Data: ${status.dispensariesWithData}`);
console.log('');
console.log(` Source Jobs (completed): ${status.totalSourceJobs}`);
console.log(` Hydrated Jobs: ${status.totalHydratedJobs}`);
console.log(` Unhydrated Jobs: ${status.totalSourceJobs - status.totalHydratedJobs}`);
console.log('');
console.log(` Source Products: ${status.totalSourceProducts}`);
console.log(` Store Products: ${status.totalStoreProducts}`);
console.log('');
console.log(` Source Snapshots: ${status.totalSourceSnapshots}`);
console.log(` Store Snapshots: ${status.totalStoreSnapshots}`);
}
process.exit(0);
}
// Run backfill
console.log('\n' + '═'.repeat(60));
console.log(' CANONICAL HYDRATION - BACKFILL MODE');
console.log('═'.repeat(60));
console.log(` Dispensary ID: ${options.dispensaryId || 'ALL'}`);
console.log(` Start Date: ${options.startDate?.toISOString() || 'N/A'}`);
console.log(` End Date: ${options.endDate?.toISOString() || 'N/A'}`);
console.log(` Batch Size: ${options.batchSize || 50}`);
console.log(` Dry Run: ${options.dryRun ? 'YES' : 'NO'}`);
console.log('═'.repeat(60) + '\n');
const result = await service.hydrate(options);
console.log('\n' + '═'.repeat(60));
console.log(' HYDRATION COMPLETE');
console.log('═'.repeat(60));
console.log(` Crawl Runs Created: ${result.crawlRunsCreated}`);
console.log(` Crawl Runs Skipped: ${result.crawlRunsSkipped}`);
console.log(` Products Upserted: ${result.productsUpserted}`);
console.log(` Snapshots Written: ${result.snapshotsWritten}`);
console.log(` Duration: ${result.durationMs}ms`);
console.log(` Errors: ${result.errors.length}`);
if (result.errors.length > 0) {
console.log('\nErrors:');
for (const error of result.errors.slice(0, 10)) {
console.log(` - ${error}`);
}
if (result.errors.length > 10) {
console.log(` ... and ${result.errors.length - 10} more`);
}
}
console.log('═'.repeat(60) + '\n');
process.exit(result.errors.length > 0 ? 1 : 0);
} catch (error: any) {
console.error('Fatal error:', error.message);
process.exit(1);
} finally {
await pool.end();
}
}
main();