#!/usr/bin/env npx tsx /** * Discovery Entrypoint: Dutchie Locations (From Cities) * * Reads from dutchie_discovery_cities (crawl_enabled = true) * and discovers store locations for each city. * * Geo coordinates are captured when available from Dutchie's payloads. * * Usage: * npm run discovery:dt:locations * npm run discovery:dt:locations -- --limit=10 * npm run discovery:dt:locations -- --delay=3000 * DATABASE_URL="..." npx tsx src/dutchie-az/discovery/discovery-dt-locations-from-cities.ts * * Options: * --limit=N Only process N cities (default: all) * --delay=N Delay between cities in ms (default: 2000) */ import { Pool } from 'pg'; import { DtLocationDiscoveryService } from './DtLocationDiscoveryService'; const DB_URL = process.env.DATABASE_URL || process.env.CANNAIQ_DB_URL || 'postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus'; function parseArgs(): { limit?: number; delay?: number } { const args: { limit?: number; delay?: number } = {}; for (const arg of process.argv.slice(2)) { const limitMatch = arg.match(/--limit=(\d+)/); if (limitMatch) args.limit = parseInt(limitMatch[1], 10); const delayMatch = arg.match(/--delay=(\d+)/); if (delayMatch) args.delay = parseInt(delayMatch[1], 10); } return args; } async function main() { const args = parseArgs(); console.log('╔══════════════════════════════════════════════════╗'); console.log('║ Dutchie Location Discovery (From Cities) ║'); console.log('║ Reads crawl_enabled cities, discovers stores ║'); console.log('╚══════════════════════════════════════════════════╝'); console.log(`\nDatabase: ${DB_URL.replace(/:[^:@]+@/, ':****@')}`); if (args.limit) console.log(`City limit: ${args.limit}`); if (args.delay) console.log(`Delay: ${args.delay}ms`); const pool = new Pool({ connectionString: DB_URL }); try { const { rows } = await pool.query('SELECT NOW() as time'); console.log(`Connected at: ${rows[0].time}\n`); const service = new DtLocationDiscoveryService(pool); const result = await service.discoverAllEnabled({ limit: args.limit, delayMs: args.delay ?? 2000, }); console.log('\n' + '═'.repeat(50)); console.log('SUMMARY'); console.log('═'.repeat(50)); console.log(`Cities processed: ${result.totalCities}`); console.log(`Locations found: ${result.totalLocationsFound}`); console.log(`Locations inserted: ${result.totalInserted}`); console.log(`Locations updated: ${result.totalUpdated}`); console.log(`Locations skipped: ${result.totalSkipped} (protected status)`); console.log(`Errors: ${result.errors.length}`); console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`); if (result.errors.length > 0) { console.log('\nErrors (first 10):'); result.errors.slice(0, 10).forEach((e, i) => console.log(` ${i + 1}. ${e}`)); if (result.errors.length > 10) { console.log(` ... and ${result.errors.length - 10} more`); } } // Get location stats including coordinates const stats = await service.getStats(); console.log('\nCurrent Database Stats:'); console.log(` Total locations: ${stats.total}`); console.log(` With coordinates: ${stats.withCoordinates}`); console.log(` By status:`); stats.byStatus.forEach(s => console.log(` ${s.status}: ${s.count}`)); if (result.totalCities === 0) { console.log('\n⚠️ No crawl-enabled cities found.'); console.log(' Seed cities first:'); console.log(' npm run discovery:dt:cities:manual -- --city-slug=ny-hudson --city-name=Hudson --state-code=NY'); process.exit(1); } if (result.errors.length > 0) { console.log('\n⚠️ Completed with errors'); process.exit(1); } console.log('\n✅ Location discovery completed successfully'); process.exit(0); } catch (error: any) { console.error('\n❌ Location discovery failed:', error.message); process.exit(1); } finally { await pool.end(); } } main();