fix: Restore hydration and product_refresh for store updates

- Moved hydration module back from _deprecated (needed for product_refresh)
- Restored product_refresh handler for processing stored payloads
- Restored geolocation service for findadispo/findagram
- Stubbed system routes that depend on deprecated SyncOrchestrator
- Removed crawler-sandbox route (deprecated)
- Fixed all TypeScript compilation errors

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-11 23:03:39 -07:00
parent cdab71a1ee
commit 50654be910
33 changed files with 613 additions and 563 deletions

View File

@@ -0,0 +1,510 @@
#!/usr/bin/env npx tsx
/**
* Unified Hydration CLI
*
* Central entrypoint for all hydration operations:
*
* MODES:
* payload - Process raw_payloads → canonical tables (existing behavior)
* backfill - Migrate dutchie_* → canonical tables (legacy backfill)
* sync - Sync recent crawls to canonical tables
* status - Show hydration progress
*
* Usage:
* npx tsx src/scripts/run-hydration.ts --mode=<mode> [options]
*
* Examples:
* # Payload-based hydration (default)
* npx tsx src/scripts/run-hydration.ts --mode=payload
*
* # Full legacy backfill
* npx tsx src/scripts/run-hydration.ts --mode=backfill
*
* # Backfill single dispensary
* npx tsx src/scripts/run-hydration.ts --mode=backfill --store=123
*
* # Sync recent crawls
* npx tsx src/scripts/run-hydration.ts --mode=sync --since="2 hours"
*
* # Check status
* npx tsx src/scripts/run-hydration.ts --mode=status
*/
import { Pool } from 'pg';
import dotenv from 'dotenv';
import {
HydrationWorker,
runHydrationBatch,
processPayloadById,
reprocessFailedPayloads,
getPayloadStats,
} from '../hydration';
import { runLegacyBackfill } from '../hydration/legacy-backfill';
import { syncRecentCrawls } from '../hydration/incremental-sync';
dotenv.config();
// ============================================================
// ARGUMENT PARSING
// ============================================================
interface CliArgs {
mode: 'payload' | 'backfill' | 'sync' | 'status';
store?: number;
since?: string;
dryRun: boolean;
verbose: boolean;
limit: number;
loop: boolean;
reprocess: boolean;
payloadId?: string;
startFrom?: number;
}
function parseArgs(): CliArgs {
const args = process.argv.slice(2);
// Defaults
const result: CliArgs = {
mode: 'payload',
dryRun: args.includes('--dry-run'),
verbose: args.includes('--verbose') || args.includes('-v'),
limit: 50,
loop: args.includes('--loop'),
reprocess: args.includes('--reprocess'),
};
// Parse --mode=<value>
const modeArg = args.find(a => a.startsWith('--mode='));
if (modeArg) {
const mode = modeArg.split('=')[1];
if (['payload', 'backfill', 'sync', 'status'].includes(mode)) {
result.mode = mode as CliArgs['mode'];
}
}
// Parse --store=<id>
const storeArg = args.find(a => a.startsWith('--store='));
if (storeArg) {
result.store = parseInt(storeArg.split('=')[1], 10);
}
// Parse --since=<value>
const sinceArg = args.find(a => a.startsWith('--since='));
if (sinceArg) {
result.since = sinceArg.split('=')[1];
}
// Parse --limit=<value> or --limit <value>
const limitArg = args.find(a => a.startsWith('--limit='));
if (limitArg) {
result.limit = parseInt(limitArg.split('=')[1], 10);
} else {
const limitIdx = args.indexOf('--limit');
if (limitIdx !== -1 && args[limitIdx + 1]) {
result.limit = parseInt(args[limitIdx + 1], 10);
}
}
// Parse --payload=<id> or --payload <id>
const payloadArg = args.find(a => a.startsWith('--payload='));
if (payloadArg) {
result.payloadId = payloadArg.split('=')[1];
} else {
const payloadIdx = args.indexOf('--payload');
if (payloadIdx !== -1 && args[payloadIdx + 1]) {
result.payloadId = args[payloadIdx + 1];
}
}
// Parse --start-from=<id>
const startArg = args.find(a => a.startsWith('--start-from='));
if (startArg) {
result.startFrom = parseInt(startArg.split('=')[1], 10);
}
return result;
}
// ============================================================
// DATABASE CONNECTION
// ============================================================
function getConnectionString(): string {
if (process.env.CANNAIQ_DB_URL) {
return process.env.CANNAIQ_DB_URL;
}
const host = process.env.CANNAIQ_DB_HOST;
const port = process.env.CANNAIQ_DB_PORT;
const name = process.env.CANNAIQ_DB_NAME;
const user = process.env.CANNAIQ_DB_USER;
const pass = process.env.CANNAIQ_DB_PASS;
if (host && port && name && user && pass) {
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
}
// Fallback to DATABASE_URL for local development
if (process.env.DATABASE_URL) {
return process.env.DATABASE_URL;
}
throw new Error('Missing database connection environment variables');
}
// ============================================================
// MODE: PAYLOAD (existing behavior)
// ============================================================
async function runPayloadMode(pool: Pool, args: CliArgs): Promise<void> {
console.log('='.repeat(60));
console.log('HYDRATION - PAYLOAD MODE');
console.log('='.repeat(60));
console.log(`Dry run: ${args.dryRun}`);
console.log(`Batch size: ${args.limit}`);
console.log('');
// Show current stats
try {
const stats = await getPayloadStats(pool);
console.log('Current payload stats:');
console.log(` Total: ${stats.total}`);
console.log(` Processed: ${stats.processed}`);
console.log(` Unprocessed: ${stats.unprocessed}`);
console.log(` Failed: ${stats.failed}`);
console.log('');
} catch {
console.log('Note: raw_payloads table not found or empty');
console.log('');
}
if (args.payloadId) {
// Process specific payload
console.log(`Processing payload: ${args.payloadId}`);
const result = await processPayloadById(pool, args.payloadId, { dryRun: args.dryRun });
console.log('Result:', JSON.stringify(result, null, 2));
} else if (args.reprocess) {
// Reprocess failed payloads
console.log('Reprocessing failed payloads...');
const result = await reprocessFailedPayloads(pool, { dryRun: args.dryRun, batchSize: args.limit });
console.log('Result:', JSON.stringify(result, null, 2));
} else if (args.loop) {
// Run continuous loop
const worker = new HydrationWorker(pool, { dryRun: args.dryRun, batchSize: args.limit });
process.on('SIGINT', () => {
console.log('\nStopping hydration loop...');
worker.stop();
});
await worker.runLoop(30000);
} else {
// Run single batch
const result = await runHydrationBatch(pool, { dryRun: args.dryRun, batchSize: args.limit });
console.log('Batch result:');
console.log(` Payloads processed: ${result.payloadsProcessed}`);
console.log(` Payloads failed: ${result.payloadsFailed}`);
console.log(` Products upserted: ${result.totalProductsUpserted}`);
console.log(` Snapshots created: ${result.totalSnapshotsCreated}`);
console.log(` Brands created: ${result.totalBrandsCreated}`);
console.log(` Duration: ${result.durationMs}ms`);
if (result.errors.length > 0) {
console.log('\nErrors:');
for (const err of result.errors.slice(0, 10)) {
console.log(` ${err.payloadId}: ${err.error}`);
}
}
}
}
// ============================================================
// MODE: BACKFILL (legacy dutchie_* → canonical)
// ============================================================
async function runBackfillMode(pool: Pool, args: CliArgs): Promise<void> {
console.log('='.repeat(60));
console.log('HYDRATION - BACKFILL MODE');
console.log('='.repeat(60));
console.log(`Mode: ${args.dryRun ? 'DRY RUN' : 'LIVE'}`);
if (args.store) {
console.log(`Store: ${args.store}`);
}
if (args.startFrom) {
console.log(`Start from product ID: ${args.startFrom}`);
}
console.log('');
await runLegacyBackfill(pool, {
dryRun: args.dryRun,
verbose: args.verbose,
dispensaryId: args.store,
startFromProductId: args.startFrom,
});
}
// ============================================================
// MODE: SYNC (recent crawls → canonical)
// ============================================================
async function runSyncMode(pool: Pool, args: CliArgs): Promise<void> {
const since = args.since || '1 hour';
console.log('='.repeat(60));
console.log('HYDRATION - SYNC MODE');
console.log('='.repeat(60));
console.log(`Mode: ${args.dryRun ? 'DRY RUN' : 'LIVE'}`);
console.log(`Since: ${since}`);
console.log(`Limit: ${args.limit}`);
if (args.store) {
console.log(`Store: ${args.store}`);
}
console.log('');
const result = await syncRecentCrawls(pool, {
dryRun: args.dryRun,
verbose: args.verbose,
since,
dispensaryId: args.store,
limit: args.limit,
});
console.log('');
console.log('=== Sync Results ===');
console.log(`Crawls synced: ${result.synced}`);
console.log(`Errors: ${result.errors.length}`);
if (result.errors.length > 0) {
console.log('');
console.log('Errors:');
for (const error of result.errors.slice(0, 10)) {
console.log(` - ${error}`);
}
if (result.errors.length > 10) {
console.log(` ... and ${result.errors.length - 10} more`);
}
}
}
// ============================================================
// MODE: STATUS
// ============================================================
async function runStatusMode(pool: Pool): Promise<void> {
console.log('='.repeat(60));
console.log('HYDRATION STATUS');
console.log('='.repeat(60));
console.log('');
// Check if v_hydration_status view exists
const viewExists = await pool.query(`
SELECT EXISTS (
SELECT 1 FROM pg_views WHERE viewname = 'v_hydration_status'
) as exists
`);
if (viewExists.rows[0].exists) {
const { rows } = await pool.query('SELECT * FROM v_hydration_status');
console.log('Hydration Progress:');
console.log('-'.repeat(70));
console.log(
'Table'.padEnd(30) +
'Source'.padEnd(12) +
'Hydrated'.padEnd(12) +
'Progress'
);
console.log('-'.repeat(70));
for (const row of rows) {
const progress = row.hydration_pct ? `${row.hydration_pct}%` : 'N/A';
console.log(
row.source_table.padEnd(30) +
String(row.source_count).padEnd(12) +
String(row.hydrated_count).padEnd(12) +
progress
);
}
console.log('-'.repeat(70));
} else {
console.log('Note: v_hydration_status view not found. Run migration 052 first.');
}
// Get counts from canonical tables
console.log('\nCanonical Table Counts:');
console.log('-'.repeat(40));
const tables = ['store_products', 'store_product_snapshots', 'crawl_runs'];
for (const table of tables) {
try {
const { rows } = await pool.query(`SELECT COUNT(*) as cnt FROM ${table}`);
console.log(`${table}: ${rows[0].cnt}`);
} catch {
console.log(`${table}: (table not found)`);
}
}
// Get legacy table counts
console.log('\nLegacy Table Counts:');
console.log('-'.repeat(40));
const legacyTables = ['dutchie_products', 'dutchie_product_snapshots', 'dispensary_crawl_jobs'];
for (const table of legacyTables) {
try {
const { rows } = await pool.query(`SELECT COUNT(*) as cnt FROM ${table}`);
console.log(`${table}: ${rows[0].cnt}`);
} catch {
console.log(`${table}: (table not found)`);
}
}
// Show recent sync activity
console.log('\nRecent Crawl Runs (last 24h):');
console.log('-'.repeat(40));
try {
const { rows } = await pool.query(`
SELECT status, COUNT(*) as count
FROM crawl_runs
WHERE started_at > NOW() - INTERVAL '24 hours'
GROUP BY status
ORDER BY count DESC
`);
if (rows.length === 0) {
console.log('No crawl runs in last 24 hours');
} else {
for (const row of rows) {
console.log(`${row.status}: ${row.count}`);
}
}
} catch {
console.log('(crawl_runs table not found)');
}
// Payload stats
console.log('\nPayload Hydration:');
console.log('-'.repeat(40));
try {
const stats = await getPayloadStats(pool);
console.log(`Total payloads: ${stats.total}`);
console.log(`Processed: ${stats.processed}`);
console.log(`Unprocessed: ${stats.unprocessed}`);
console.log(`Failed: ${stats.failed}`);
} catch {
console.log('(raw_payloads table not found)');
}
}
// ============================================================
// HELP
// ============================================================
function showHelp(): void {
console.log(`
Unified Hydration CLI
Usage:
npx tsx src/scripts/run-hydration.ts --mode=<mode> [options]
Modes:
payload Process raw_payloads → canonical tables (default)
backfill Migrate dutchie_* → canonical tables
sync Sync recent crawls to canonical tables
status Show hydration progress
Common Options:
--dry-run Print changes without modifying database
--verbose, -v Show detailed progress
--store=<id> Limit to a single dispensary
--limit=<n> Batch size (default: 50)
Payload Mode Options:
--loop Run continuous hydration loop
--reprocess Reprocess failed payloads
--payload=<id> Process a specific payload by ID
Backfill Mode Options:
--start-from=<id> Resume from a specific product ID
Sync Mode Options:
--since=<interval> Time window (default: "1 hour")
Examples: "30 minutes", "2 hours", "1 day"
Examples:
# Full legacy backfill (dutchie_* → canonical)
npx tsx src/scripts/run-hydration.ts --mode=backfill
# Backfill single dispensary (dry run)
npx tsx src/scripts/run-hydration.ts --mode=backfill --store=123 --dry-run
# Sync recent crawls from last 4 hours
npx tsx src/scripts/run-hydration.ts --mode=sync --since="4 hours"
# Sync single dispensary
npx tsx src/scripts/run-hydration.ts --mode=sync --store=123
# Run payload hydration loop
npx tsx src/scripts/run-hydration.ts --mode=payload --loop
# Check hydration status
npx tsx src/scripts/run-hydration.ts --mode=status
`);
}
// ============================================================
// MAIN
// ============================================================
async function main(): Promise<void> {
const rawArgs = process.argv.slice(2);
if (rawArgs.includes('--help') || rawArgs.includes('-h')) {
showHelp();
process.exit(0);
}
const args = parseArgs();
const pool = new Pool({
connectionString: getConnectionString(),
max: 5,
});
try {
// Verify connection
await pool.query('SELECT 1');
console.log('Database connection: OK\n');
switch (args.mode) {
case 'payload':
await runPayloadMode(pool, args);
break;
case 'backfill':
await runBackfillMode(pool, args);
break;
case 'sync':
await runSyncMode(pool, args);
break;
case 'status':
await runStatusMode(pool);
break;
default:
console.error(`Unknown mode: ${args.mode}`);
showHelp();
process.exit(1);
}
} catch (error: any) {
console.error('Error:', error.message);
process.exit(1);
} finally {
await pool.end();
}
}
main();