Files
cannaiq/backend/src/scripts/discovery-dutchie-locations.ts
Kelly b4a2fb7d03 feat: Add v2 architecture with multi-state support and orchestrator services
Major additions:
- Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare
- Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator
- Discovery system: dutchie discovery service, geo validation, city seeding scripts
- Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages
- Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram)
- Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata

Frontend pages added:
- Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores
- StateHeatmap, CrossStateCompare, SyncInfoPanel

Components added:
- StateSelector, OrchestratorTraceModal, WorkflowStepper

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-07 11:30:57 -07:00

190 lines
6.2 KiB
TypeScript

#!/usr/bin/env npx tsx
/**
* Dutchie Location Discovery CLI Runner
*
* Discovers store locations for cities and upserts to dutchie_discovery_locations.
*
* Usage:
* npm run discovery:dutchie:locations -- --all-enabled
* npm run discovery:dutchie:locations -- --city-slug=phoenix
* npm run discovery:dutchie:locations -- --all-enabled --limit=10
*
* npx tsx src/scripts/discovery-dutchie-locations.ts --all-enabled
* npx tsx src/scripts/discovery-dutchie-locations.ts --city-slug=phoenix
*
* Options:
* --city-slug=<slug> Run for a single city by its slug
* --all-enabled Run for all cities where crawl_enabled = TRUE
* --limit=<n> Limit the number of cities to process
* --delay=<ms> Delay between cities in ms (default: 2000)
*
* Environment:
* DATABASE_URL - PostgreSQL connection string (required)
*/
import { Pool } from 'pg';
import { DutchieLocationDiscovery } from '../dutchie-az/discovery/DutchieLocationDiscovery';
// Parse command line arguments
function parseArgs(): {
citySlug: string | null;
allEnabled: boolean;
limit: number | undefined;
delay: number;
} {
const args = process.argv.slice(2);
let citySlug: string | null = null;
let allEnabled = false;
let limit: number | undefined = undefined;
let delay = 2000;
for (const arg of args) {
if (arg.startsWith('--city-slug=')) {
citySlug = arg.split('=')[1];
} else if (arg === '--all-enabled') {
allEnabled = true;
} else if (arg.startsWith('--limit=')) {
limit = parseInt(arg.split('=')[1], 10);
} else if (arg.startsWith('--delay=')) {
delay = parseInt(arg.split('=')[1], 10);
}
}
return { citySlug, allEnabled, limit, delay };
}
function printUsage() {
console.log(`
Dutchie Location Discovery CLI
Usage:
npx tsx src/scripts/discovery-dutchie-locations.ts [options]
Options:
--city-slug=<slug> Run for a single city by its slug
--all-enabled Run for all cities where crawl_enabled = TRUE
--limit=<n> Limit the number of cities to process
--delay=<ms> Delay between cities in ms (default: 2000)
Examples:
npx tsx src/scripts/discovery-dutchie-locations.ts --all-enabled
npx tsx src/scripts/discovery-dutchie-locations.ts --city-slug=phoenix
npx tsx src/scripts/discovery-dutchie-locations.ts --all-enabled --limit=5
Environment:
DATABASE_URL - PostgreSQL connection string (required)
`);
}
async function main() {
const { citySlug, allEnabled, limit, delay } = parseArgs();
if (!citySlug && !allEnabled) {
console.error('ERROR: Must specify either --city-slug=<slug> or --all-enabled');
printUsage();
process.exit(1);
}
console.log('='.repeat(60));
console.log('DUTCHIE LOCATION DISCOVERY');
console.log('='.repeat(60));
if (citySlug) {
console.log(`Mode: Single city (${citySlug})`);
} else {
console.log(`Mode: All enabled cities${limit ? ` (limit: ${limit})` : ''}`);
}
console.log(`Delay between cities: ${delay}ms`);
console.log('');
// Get database URL from environment
const connectionString = process.env.DATABASE_URL;
if (!connectionString) {
console.error('ERROR: DATABASE_URL environment variable is required');
console.error('');
console.error('Usage:');
console.error(' DATABASE_URL="postgresql://..." npx tsx src/scripts/discovery-dutchie-locations.ts --all-enabled');
process.exit(1);
}
// Create pool
const pool = new Pool({ connectionString });
try {
// Test connection
await pool.query('SELECT 1');
console.log('[CLI] Database connection established');
const discovery = new DutchieLocationDiscovery(pool);
if (citySlug) {
// Single city mode
const city = await discovery.getCityBySlug(citySlug);
if (!city) {
console.error(`ERROR: City not found: ${citySlug}`);
console.error('');
console.error('Make sure you have run city discovery first:');
console.error(' npm run discovery:dutchie:cities');
process.exit(1);
}
const result = await discovery.discoverForCity(city);
console.log('');
console.log('='.repeat(60));
console.log('DISCOVERY COMPLETE');
console.log('='.repeat(60));
console.log(`City: ${city.cityName}, ${city.stateCode}`);
console.log(`Locations found: ${result.locationsFound}`);
console.log(`Inserted: ${result.locationsInserted}`);
console.log(`Updated: ${result.locationsUpdated}`);
console.log(`Skipped (protected): ${result.locationsSkipped}`);
console.log(`Errors: ${result.errors.length}`);
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
if (result.errors.length > 0) {
console.log('');
console.log('Errors:');
result.errors.forEach((e) => console.log(` - ${e}`));
}
process.exit(result.errors.length > 0 ? 1 : 0);
} else {
// All enabled cities mode
const result = await discovery.discoverAllEnabled({ limit, delayMs: delay });
console.log('');
console.log('='.repeat(60));
console.log('DISCOVERY COMPLETE');
console.log('='.repeat(60));
console.log(`Total cities processed: ${result.totalCities}`);
console.log(`Total locations found: ${result.totalLocationsFound}`);
console.log(`Total inserted: ${result.totalInserted}`);
console.log(`Total updated: ${result.totalUpdated}`);
console.log(`Total skipped: ${result.totalSkipped}`);
console.log(`Total errors: ${result.errors.length}`);
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
if (result.errors.length > 0 && result.errors.length <= 20) {
console.log('');
console.log('Errors:');
result.errors.forEach((e) => console.log(` - ${e}`));
} else if (result.errors.length > 20) {
console.log('');
console.log(`First 20 of ${result.errors.length} errors:`);
result.errors.slice(0, 20).forEach((e) => console.log(` - ${e}`));
}
process.exit(result.errors.length > 0 ? 1 : 0);
}
} catch (error: any) {
console.error('FATAL ERROR:', error.message);
console.error(error.stack);
process.exit(1);
} finally {
await pool.end();
}
}
main();