Files
cannaiq/backend/src/_deprecated/canonical-hydration/cli/backfill.ts
Kelly a35976b9e9 chore: Clean up deprecated code and docs
- Move deprecated directories to src/_deprecated/:
  - hydration/ (old pipeline approach)
  - scraper-v2/ (old Puppeteer scraper)
  - canonical-hydration/ (merged into tasks)
  - Unused services: availability, crawler-logger, geolocation, etc
  - Unused utils: age-gate-playwright, HomepageValidator, stealthBrowser

- Archive outdated docs to docs/_archive/:
  - ANALYTICS_RUNBOOK.md
  - ANALYTICS_V2_EXAMPLES.md
  - BRAND_INTELLIGENCE_API.md
  - CRAWL_PIPELINE.md
  - TASK_WORKFLOW_2024-12-10.md
  - WORKER_TASK_ARCHITECTURE.md
  - ORGANIC_SCRAPING_GUIDE.md

- Add docs/CODEBASE_MAP.md as single source of truth
- Add warning files to deprecated/archived directories
- Slim down CLAUDE.md to essential rules only

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-11 22:17:40 -07:00

171 lines
6.3 KiB
TypeScript

#!/usr/bin/env npx tsx
/**
* Backfill CLI - Historical data hydration
*
* Usage:
* npx tsx src/canonical-hydration/cli/backfill.ts [options]
*
* Options:
* --dispensary-id <id> Hydrate only a specific dispensary
* --start-date <date> Start date for backfill (ISO format)
* --end-date <date> End date for backfill (ISO format)
* --batch-size <n> Number of jobs to process per batch (default: 50)
* --dry-run Show what would be done without making changes
* --status Show hydration status and exit
*
* Examples:
* npx tsx src/canonical-hydration/cli/backfill.ts --status
* npx tsx src/canonical-hydration/cli/backfill.ts --dispensary-id 112
* npx tsx src/canonical-hydration/cli/backfill.ts --start-date 2024-01-01 --end-date 2024-12-31
* npx tsx src/canonical-hydration/cli/backfill.ts --dry-run
*/
import { Pool } from 'pg';
import { CanonicalHydrationService } from '../hydration-service';
import { HydrationOptions } from '../types';
async function main() {
const args = process.argv.slice(2);
// Parse command line arguments
const options: HydrationOptions = {
mode: 'backfill',
};
let showStatus = false;
for (let i = 0; i < args.length; i++) {
const arg = args[i];
switch (arg) {
case '--dispensary-id':
options.dispensaryId = parseInt(args[++i]);
break;
case '--start-date':
options.startDate = new Date(args[++i]);
break;
case '--end-date':
options.endDate = new Date(args[++i]);
break;
case '--batch-size':
options.batchSize = parseInt(args[++i]);
break;
case '--dry-run':
options.dryRun = true;
break;
case '--status':
showStatus = true;
break;
case '--help':
console.log(`
Backfill CLI - Historical data hydration
Usage:
npx tsx src/canonical-hydration/cli/backfill.ts [options]
Options:
--dispensary-id <id> Hydrate only a specific dispensary
--start-date <date> Start date for backfill (ISO format)
--end-date <date> End date for backfill (ISO format)
--batch-size <n> Number of jobs to process per batch (default: 50)
--dry-run Show what would be done without making changes
--status Show hydration status and exit
Examples:
npx tsx src/canonical-hydration/cli/backfill.ts --status
npx tsx src/canonical-hydration/cli/backfill.ts --dispensary-id 112
npx tsx src/canonical-hydration/cli/backfill.ts --start-date 2024-01-01 --end-date 2024-12-31
npx tsx src/canonical-hydration/cli/backfill.ts --dry-run
`);
process.exit(0);
}
}
// Connect to database
const pool = new Pool({
connectionString: process.env.DATABASE_URL,
});
const service = new CanonicalHydrationService({
pool,
logger: (msg) => console.log(`[${new Date().toISOString()}] ${msg}`),
});
try {
if (showStatus) {
// Show status and exit
if (options.dispensaryId) {
const status = await service.getHydrationStatus(options.dispensaryId);
console.log(`\nHydration Status for Dispensary ${options.dispensaryId}:`);
console.log('═'.repeat(50));
console.log(` Source Jobs (completed): ${status.sourceJobs}`);
console.log(` Hydrated Jobs: ${status.hydratedJobs}`);
console.log(` Unhydrated Jobs: ${status.unhydratedJobs}`);
console.log('');
console.log(` Source Products: ${status.sourceProducts}`);
console.log(` Store Products: ${status.storeProducts}`);
console.log('');
console.log(` Source Snapshots: ${status.sourceSnapshots}`);
console.log(` Store Snapshots: ${status.storeSnapshots}`);
} else {
const status = await service.getOverallStatus();
console.log('\nOverall Hydration Status:');
console.log('═'.repeat(50));
console.log(` Dispensaries with Data: ${status.dispensariesWithData}`);
console.log('');
console.log(` Source Jobs (completed): ${status.totalSourceJobs}`);
console.log(` Hydrated Jobs: ${status.totalHydratedJobs}`);
console.log(` Unhydrated Jobs: ${status.totalSourceJobs - status.totalHydratedJobs}`);
console.log('');
console.log(` Source Products: ${status.totalSourceProducts}`);
console.log(` Store Products: ${status.totalStoreProducts}`);
console.log('');
console.log(` Source Snapshots: ${status.totalSourceSnapshots}`);
console.log(` Store Snapshots: ${status.totalStoreSnapshots}`);
}
process.exit(0);
}
// Run backfill
console.log('\n' + '═'.repeat(60));
console.log(' CANONICAL HYDRATION - BACKFILL MODE');
console.log('═'.repeat(60));
console.log(` Dispensary ID: ${options.dispensaryId || 'ALL'}`);
console.log(` Start Date: ${options.startDate?.toISOString() || 'N/A'}`);
console.log(` End Date: ${options.endDate?.toISOString() || 'N/A'}`);
console.log(` Batch Size: ${options.batchSize || 50}`);
console.log(` Dry Run: ${options.dryRun ? 'YES' : 'NO'}`);
console.log('═'.repeat(60) + '\n');
const result = await service.hydrate(options);
console.log('\n' + '═'.repeat(60));
console.log(' HYDRATION COMPLETE');
console.log('═'.repeat(60));
console.log(` Crawl Runs Created: ${result.crawlRunsCreated}`);
console.log(` Crawl Runs Skipped: ${result.crawlRunsSkipped}`);
console.log(` Products Upserted: ${result.productsUpserted}`);
console.log(` Snapshots Written: ${result.snapshotsWritten}`);
console.log(` Duration: ${result.durationMs}ms`);
console.log(` Errors: ${result.errors.length}`);
if (result.errors.length > 0) {
console.log('\nErrors:');
for (const error of result.errors.slice(0, 10)) {
console.log(` - ${error}`);
}
if (result.errors.length > 10) {
console.log(` ... and ${result.errors.length - 10} more`);
}
}
console.log('═'.repeat(60) + '\n');
process.exit(result.errors.length > 0 ? 1 : 0);
} catch (error: any) {
console.error('Fatal error:', error.message);
process.exit(1);
} finally {
await pool.end();
}
}
main();