- Move deprecated directories to src/_deprecated/: - hydration/ (old pipeline approach) - scraper-v2/ (old Puppeteer scraper) - canonical-hydration/ (merged into tasks) - Unused services: availability, crawler-logger, geolocation, etc - Unused utils: age-gate-playwright, HomepageValidator, stealthBrowser - Archive outdated docs to docs/_archive/: - ANALYTICS_RUNBOOK.md - ANALYTICS_V2_EXAMPLES.md - BRAND_INTELLIGENCE_API.md - CRAWL_PIPELINE.md - TASK_WORKFLOW_2024-12-10.md - WORKER_TASK_ARCHITECTURE.md - ORGANIC_SCRAPING_GUIDE.md - Add docs/CODEBASE_MAP.md as single source of truth - Add warning files to deprecated/archived directories - Slim down CLAUDE.md to essential rules only 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
143 lines
4.3 KiB
TypeScript
143 lines
4.3 KiB
TypeScript
#!/usr/bin/env npx tsx
|
|
/**
|
|
* Incremental CLI - Ongoing data hydration
|
|
*
|
|
* Usage:
|
|
* npx tsx src/canonical-hydration/cli/incremental.ts [options]
|
|
*
|
|
* Options:
|
|
* --dispensary-id <id> Hydrate only a specific dispensary
|
|
* --batch-size <n> Number of jobs to process per batch (default: 100)
|
|
* --loop Run continuously in a loop
|
|
* --interval <seconds> Interval between loops (default: 60)
|
|
* --dry-run Show what would be done without making changes
|
|
*
|
|
* Examples:
|
|
* npx tsx src/canonical-hydration/cli/incremental.ts
|
|
* npx tsx src/canonical-hydration/cli/incremental.ts --dispensary-id 112
|
|
* npx tsx src/canonical-hydration/cli/incremental.ts --loop --interval 300
|
|
* npx tsx src/canonical-hydration/cli/incremental.ts --dry-run
|
|
*/
|
|
|
|
import { Pool } from 'pg';
|
|
import { CanonicalHydrationService } from '../hydration-service';
|
|
import { HydrationOptions } from '../types';
|
|
|
|
async function main() {
|
|
const args = process.argv.slice(2);
|
|
|
|
// Parse command line arguments
|
|
const options: HydrationOptions = {
|
|
mode: 'incremental',
|
|
};
|
|
let loop = false;
|
|
let intervalSeconds = 60;
|
|
|
|
for (let i = 0; i < args.length; i++) {
|
|
const arg = args[i];
|
|
switch (arg) {
|
|
case '--dispensary-id':
|
|
options.dispensaryId = parseInt(args[++i]);
|
|
break;
|
|
case '--batch-size':
|
|
options.batchSize = parseInt(args[++i]);
|
|
break;
|
|
case '--loop':
|
|
loop = true;
|
|
break;
|
|
case '--interval':
|
|
intervalSeconds = parseInt(args[++i]);
|
|
break;
|
|
case '--dry-run':
|
|
options.dryRun = true;
|
|
break;
|
|
case '--help':
|
|
console.log(`
|
|
Incremental CLI - Ongoing data hydration
|
|
|
|
Usage:
|
|
npx tsx src/canonical-hydration/cli/incremental.ts [options]
|
|
|
|
Options:
|
|
--dispensary-id <id> Hydrate only a specific dispensary
|
|
--batch-size <n> Number of jobs to process per batch (default: 100)
|
|
--loop Run continuously in a loop
|
|
--interval <seconds> Interval between loops (default: 60)
|
|
--dry-run Show what would be done without making changes
|
|
|
|
Examples:
|
|
npx tsx src/canonical-hydration/cli/incremental.ts
|
|
npx tsx src/canonical-hydration/cli/incremental.ts --dispensary-id 112
|
|
npx tsx src/canonical-hydration/cli/incremental.ts --loop --interval 300
|
|
npx tsx src/canonical-hydration/cli/incremental.ts --dry-run
|
|
`);
|
|
process.exit(0);
|
|
}
|
|
}
|
|
|
|
// Connect to database
|
|
const pool = new Pool({
|
|
connectionString: process.env.DATABASE_URL,
|
|
});
|
|
|
|
const service = new CanonicalHydrationService({
|
|
pool,
|
|
logger: (msg) => console.log(`[${new Date().toISOString()}] ${msg}`),
|
|
});
|
|
|
|
const log = (msg: string) => console.log(`[${new Date().toISOString()}] ${msg}`);
|
|
|
|
// Graceful shutdown
|
|
let running = true;
|
|
process.on('SIGINT', () => {
|
|
log('Received SIGINT, shutting down...');
|
|
running = false;
|
|
});
|
|
process.on('SIGTERM', () => {
|
|
log('Received SIGTERM, shutting down...');
|
|
running = false;
|
|
});
|
|
|
|
try {
|
|
console.log('\n' + '═'.repeat(60));
|
|
console.log(' CANONICAL HYDRATION - INCREMENTAL MODE');
|
|
console.log('═'.repeat(60));
|
|
console.log(` Dispensary ID: ${options.dispensaryId || 'ALL'}`);
|
|
console.log(` Batch Size: ${options.batchSize || 100}`);
|
|
console.log(` Loop Mode: ${loop ? 'YES' : 'NO'}`);
|
|
if (loop) {
|
|
console.log(` Interval: ${intervalSeconds}s`);
|
|
}
|
|
console.log(` Dry Run: ${options.dryRun ? 'YES' : 'NO'}`);
|
|
console.log('═'.repeat(60) + '\n');
|
|
|
|
do {
|
|
const result = await service.hydrate(options);
|
|
|
|
log(`Hydration complete: ${result.crawlRunsCreated} runs, ${result.productsUpserted} products, ${result.snapshotsWritten} snapshots (${result.durationMs}ms)`);
|
|
|
|
if (result.errors.length > 0) {
|
|
log(`Errors: ${result.errors.length}`);
|
|
for (const error of result.errors.slice(0, 5)) {
|
|
log(` - ${error}`);
|
|
}
|
|
}
|
|
|
|
if (loop && running) {
|
|
log(`Sleeping for ${intervalSeconds}s...`);
|
|
await new Promise(resolve => setTimeout(resolve, intervalSeconds * 1000));
|
|
}
|
|
} while (loop && running);
|
|
|
|
log('Incremental hydration completed');
|
|
process.exit(0);
|
|
} catch (error: any) {
|
|
console.error('Fatal error:', error.message);
|
|
process.exit(1);
|
|
} finally {
|
|
await pool.end();
|
|
}
|
|
}
|
|
|
|
main();
|