#!/usr/bin/env npx tsx /** * Dutchie Discovery CLI * * Command-line interface for running the Dutchie store discovery pipeline. * * Usage: * npx tsx src/scripts/run-discovery.ts [options] * * Commands: * discover:state - Discover all stores in a state (e.g., AZ) * discover:city - Discover stores in a single city * discover:full - Run full discovery pipeline * seed:cities - Seed known cities for a state * stats - Show discovery statistics * list - List discovered locations * * Examples: * npx tsx src/scripts/run-discovery.ts discover:state AZ * npx tsx src/scripts/run-discovery.ts discover:city phoenix --state AZ * npx tsx src/scripts/run-discovery.ts seed:cities AZ * npx tsx src/scripts/run-discovery.ts stats * npx tsx src/scripts/run-discovery.ts list --status discovered --state AZ */ import { Pool } from 'pg'; import { runFullDiscovery, discoverCity, discoverState, getDiscoveryStats, seedKnownCities, } from '../discovery'; import { getCitiesForState } from '../discovery/location-discovery'; // Parse command line arguments function parseArgs() { const args = process.argv.slice(2); const command = args[0] || 'help'; const positional: string[] = []; const flags: Record = {}; for (let i = 1; i < args.length; i++) { const arg = args[i]; if (arg.startsWith('--')) { const [key, value] = arg.slice(2).split('='); if (value !== undefined) { flags[key] = value; } else if (args[i + 1] && !args[i + 1].startsWith('--')) { flags[key] = args[i + 1]; i++; } else { flags[key] = true; } } else { positional.push(arg); } } return { command, positional, flags }; } // Create database pool function createPool(): Pool { const connectionString = process.env.DATABASE_URL; if (!connectionString) { console.error('ERROR: DATABASE_URL environment variable is required'); process.exit(1); } return new Pool({ connectionString }); } // Print help function printHelp() { console.log(` Dutchie Discovery CLI Usage: npx tsx src/scripts/run-discovery.ts [options] Commands: discover:state Discover all stores in a state (e.g., AZ) discover:city Discover stores in a single city discover:full Run full discovery pipeline seed:cities Seed known cities for a state stats Show discovery statistics list List discovered locations Options: --state State code (e.g., AZ, CA, ON) --country Country code (default: US) --status Filter by status (discovered, verified, rejected, merged) --limit Limit results (default: varies by command) --dry-run Don't make any changes, just show what would happen --verbose Show detailed output Examples: npx tsx src/scripts/run-discovery.ts discover:state AZ npx tsx src/scripts/run-discovery.ts discover:city phoenix --state AZ npx tsx src/scripts/run-discovery.ts seed:cities AZ npx tsx src/scripts/run-discovery.ts stats npx tsx src/scripts/run-discovery.ts list --status discovered --state AZ --limit 20 `); } // Main async function main() { const { command, positional, flags } = parseArgs(); if (command === 'help' || flags.help) { printHelp(); process.exit(0); } const pool = createPool(); try { switch (command) { case 'discover:state': { const stateCode = positional[0] || (flags.state as string); if (!stateCode) { console.error('ERROR: State code is required'); console.error('Usage: discover:state '); process.exit(1); } console.log(`\nDiscovering stores in ${stateCode}...\n`); const result = await discoverState(pool, stateCode.toUpperCase(), { dryRun: Boolean(flags['dry-run']), verbose: Boolean(flags.verbose), cityLimit: flags.limit ? parseInt(flags.limit as string, 10) : 100, }); console.log('\n=== DISCOVERY RESULTS ==='); console.log(`Cities crawled: ${result.locations.length}`); console.log(`Locations found: ${result.totalLocationsFound}`); console.log(`Locations upserted: ${result.totalLocationsUpserted}`); console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`); break; } case 'discover:city': { const citySlug = positional[0]; if (!citySlug) { console.error('ERROR: City slug is required'); console.error('Usage: discover:city [--state AZ]'); process.exit(1); } console.log(`\nDiscovering stores in ${citySlug}...\n`); const result = await discoverCity(pool, citySlug, { stateCode: flags.state as string, countryCode: (flags.country as string) || 'US', dryRun: Boolean(flags['dry-run']), verbose: Boolean(flags.verbose), }); if (!result) { console.error(`City not found: ${citySlug}`); process.exit(1); } console.log('\n=== DISCOVERY RESULTS ==='); console.log(`City: ${result.citySlug}`); console.log(`Locations found: ${result.locationsFound}`); console.log(`Locations upserted: ${result.locationsUpserted}`); console.log(`New: ${result.locationsNew}, Updated: ${result.locationsUpdated}`); console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`); if (result.errors.length > 0) { console.log(`Errors: ${result.errors.length}`); result.errors.forEach((e) => console.log(` - ${e}`)); } break; } case 'discover:full': { console.log('\nRunning full discovery pipeline...\n'); const result = await runFullDiscovery(pool, { stateCode: flags.state as string, countryCode: (flags.country as string) || 'US', cityLimit: flags.limit ? parseInt(flags.limit as string, 10) : 50, skipCityDiscovery: Boolean(flags['skip-cities']), onlyStale: !flags.all, staleDays: flags['stale-days'] ? parseInt(flags['stale-days'] as string, 10) : 7, dryRun: Boolean(flags['dry-run']), verbose: Boolean(flags.verbose), }); console.log('\n=== FULL DISCOVERY RESULTS ==='); console.log(`Cities discovered: ${result.cities.citiesFound}`); console.log(`Cities upserted: ${result.cities.citiesUpserted}`); console.log(`Cities crawled: ${result.locations.length}`); console.log(`Total locations found: ${result.totalLocationsFound}`); console.log(`Total locations upserted: ${result.totalLocationsUpserted}`); console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`); break; } case 'seed:cities': { const stateCode = positional[0] || (flags.state as string); if (!stateCode) { console.error('ERROR: State code is required'); console.error('Usage: seed:cities '); process.exit(1); } // Dynamically fetch cities from Dutchie console.log(`\nFetching cities for ${stateCode} from Dutchie...\n`); const cityNames = await getCitiesForState(stateCode.toUpperCase()); if (cityNames.length === 0) { console.error(`No cities found for state: ${stateCode}`); process.exit(1); } const cities = cityNames.map(name => ({ name, slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''), stateCode: stateCode.toUpperCase(), })); console.log(`Seeding ${cities.length} cities for ${stateCode}...\n`); const result = await seedKnownCities(pool, cities); console.log(`Created: ${result.created} new cities`); console.log(`Updated: ${result.updated} existing cities`); break; } case 'stats': { console.log('\nFetching discovery statistics...\n'); const stats = await getDiscoveryStats(pool); console.log('=== CITIES ==='); console.log(`Total: ${stats.cities.total}`); console.log(`Crawled (24h): ${stats.cities.crawledLast24h}`); console.log(`Never crawled: ${stats.cities.neverCrawled}`); console.log(''); console.log('=== LOCATIONS ==='); console.log(`Total active: ${stats.locations.total}`); console.log(`Discovered: ${stats.locations.discovered}`); console.log(`Verified: ${stats.locations.verified}`); console.log(`Merged: ${stats.locations.merged}`); console.log(`Rejected: ${stats.locations.rejected}`); console.log(''); console.log('=== BY STATE ==='); stats.locations.byState.forEach((s) => { console.log(` ${s.stateCode}: ${s.count}`); }); break; } case 'list': { const status = flags.status as string; const stateCode = flags.state as string; const limit = flags.limit ? parseInt(flags.limit as string, 10) : 50; let whereClause = 'WHERE active = TRUE'; const params: any[] = []; let paramIndex = 1; if (status) { whereClause += ` AND status = $${paramIndex}`; params.push(status); paramIndex++; } if (stateCode) { whereClause += ` AND state_code = $${paramIndex}`; params.push(stateCode.toUpperCase()); paramIndex++; } params.push(limit); const { rows } = await pool.query( ` SELECT id, platform, name, city, state_code, status, platform_menu_url, first_seen_at FROM dutchie_discovery_locations ${whereClause} ORDER BY first_seen_at DESC LIMIT $${paramIndex} `, params ); console.log(`\nFound ${rows.length} locations:\n`); console.log('ID\tStatus\t\tState\tCity\t\tName'); console.log('-'.repeat(80)); rows.forEach((row: any) => { const cityDisplay = (row.city || '').substring(0, 12).padEnd(12); const nameDisplay = (row.name || '').substring(0, 30); console.log( `${row.id}\t${row.status.padEnd(12)}\t${row.state_code || 'N/A'}\t${cityDisplay}\t${nameDisplay}` ); }); break; } default: console.error(`Unknown command: ${command}`); printHelp(); process.exit(1); } } catch (error: any) { console.error('ERROR:', error.message); if (flags.verbose) { console.error(error.stack); } process.exit(1); } finally { await pool.end(); } } main();