feat: Add v2 architecture with multi-state support and orchestrator services
Major additions: - Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare - Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator - Discovery system: dutchie discovery service, geo validation, city seeding scripts - Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages - Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram) - Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata Frontend pages added: - Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores - StateHeatmap, CrossStateCompare, SyncInfoPanel Components added: - StateSelector, OrchestratorTraceModal, WorkflowStepper 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
309
backend/src/scripts/run-discovery.ts
Normal file
309
backend/src/scripts/run-discovery.ts
Normal file
@@ -0,0 +1,309 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
/**
|
||||
* Dutchie Discovery CLI
|
||||
*
|
||||
* Command-line interface for running the Dutchie store discovery pipeline.
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx src/scripts/run-discovery.ts <command> [options]
|
||||
*
|
||||
* Commands:
|
||||
* discover:state <state> - Discover all stores in a state (e.g., AZ)
|
||||
* discover:city <city> - Discover stores in a single city
|
||||
* discover:full - Run full discovery pipeline
|
||||
* seed:cities <state> - Seed known cities for a state
|
||||
* stats - Show discovery statistics
|
||||
* list - List discovered locations
|
||||
*
|
||||
* Examples:
|
||||
* npx tsx src/scripts/run-discovery.ts discover:state AZ
|
||||
* npx tsx src/scripts/run-discovery.ts discover:city phoenix --state AZ
|
||||
* npx tsx src/scripts/run-discovery.ts seed:cities AZ
|
||||
* npx tsx src/scripts/run-discovery.ts stats
|
||||
* npx tsx src/scripts/run-discovery.ts list --status discovered --state AZ
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import {
|
||||
runFullDiscovery,
|
||||
discoverCity,
|
||||
discoverState,
|
||||
getDiscoveryStats,
|
||||
seedKnownCities,
|
||||
ARIZONA_CITIES,
|
||||
} from '../discovery';
|
||||
|
||||
// Parse command line arguments
|
||||
function parseArgs() {
|
||||
const args = process.argv.slice(2);
|
||||
const command = args[0] || 'help';
|
||||
const positional: string[] = [];
|
||||
const flags: Record<string, string | boolean> = {};
|
||||
|
||||
for (let i = 1; i < args.length; i++) {
|
||||
const arg = args[i];
|
||||
if (arg.startsWith('--')) {
|
||||
const [key, value] = arg.slice(2).split('=');
|
||||
if (value !== undefined) {
|
||||
flags[key] = value;
|
||||
} else if (args[i + 1] && !args[i + 1].startsWith('--')) {
|
||||
flags[key] = args[i + 1];
|
||||
i++;
|
||||
} else {
|
||||
flags[key] = true;
|
||||
}
|
||||
} else {
|
||||
positional.push(arg);
|
||||
}
|
||||
}
|
||||
|
||||
return { command, positional, flags };
|
||||
}
|
||||
|
||||
// Create database pool
|
||||
function createPool(): Pool {
|
||||
const connectionString = process.env.DATABASE_URL;
|
||||
if (!connectionString) {
|
||||
console.error('ERROR: DATABASE_URL environment variable is required');
|
||||
process.exit(1);
|
||||
}
|
||||
return new Pool({ connectionString });
|
||||
}
|
||||
|
||||
// Print help
|
||||
function printHelp() {
|
||||
console.log(`
|
||||
Dutchie Discovery CLI
|
||||
|
||||
Usage:
|
||||
npx tsx src/scripts/run-discovery.ts <command> [options]
|
||||
|
||||
Commands:
|
||||
discover:state <state> Discover all stores in a state (e.g., AZ)
|
||||
discover:city <city> Discover stores in a single city
|
||||
discover:full Run full discovery pipeline
|
||||
seed:cities <state> Seed known cities for a state
|
||||
stats Show discovery statistics
|
||||
list List discovered locations
|
||||
|
||||
Options:
|
||||
--state <code> State code (e.g., AZ, CA, ON)
|
||||
--country <code> Country code (default: US)
|
||||
--status <status> Filter by status (discovered, verified, rejected, merged)
|
||||
--limit <n> Limit results (default: varies by command)
|
||||
--dry-run Don't make any changes, just show what would happen
|
||||
--verbose Show detailed output
|
||||
|
||||
Examples:
|
||||
npx tsx src/scripts/run-discovery.ts discover:state AZ
|
||||
npx tsx src/scripts/run-discovery.ts discover:city phoenix --state AZ
|
||||
npx tsx src/scripts/run-discovery.ts seed:cities AZ
|
||||
npx tsx src/scripts/run-discovery.ts stats
|
||||
npx tsx src/scripts/run-discovery.ts list --status discovered --state AZ --limit 20
|
||||
`);
|
||||
}
|
||||
|
||||
// Main
|
||||
async function main() {
|
||||
const { command, positional, flags } = parseArgs();
|
||||
|
||||
if (command === 'help' || flags.help) {
|
||||
printHelp();
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const pool = createPool();
|
||||
|
||||
try {
|
||||
switch (command) {
|
||||
case 'discover:state': {
|
||||
const stateCode = positional[0] || (flags.state as string);
|
||||
if (!stateCode) {
|
||||
console.error('ERROR: State code is required');
|
||||
console.error('Usage: discover:state <state>');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`\nDiscovering stores in ${stateCode}...\n`);
|
||||
const result = await discoverState(pool, stateCode.toUpperCase(), {
|
||||
dryRun: Boolean(flags['dry-run']),
|
||||
verbose: Boolean(flags.verbose),
|
||||
cityLimit: flags.limit ? parseInt(flags.limit as string, 10) : 100,
|
||||
});
|
||||
|
||||
console.log('\n=== DISCOVERY RESULTS ===');
|
||||
console.log(`Cities crawled: ${result.locations.length}`);
|
||||
console.log(`Locations found: ${result.totalLocationsFound}`);
|
||||
console.log(`Locations upserted: ${result.totalLocationsUpserted}`);
|
||||
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'discover:city': {
|
||||
const citySlug = positional[0];
|
||||
if (!citySlug) {
|
||||
console.error('ERROR: City slug is required');
|
||||
console.error('Usage: discover:city <city-slug> [--state AZ]');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`\nDiscovering stores in ${citySlug}...\n`);
|
||||
const result = await discoverCity(pool, citySlug, {
|
||||
stateCode: flags.state as string,
|
||||
countryCode: (flags.country as string) || 'US',
|
||||
dryRun: Boolean(flags['dry-run']),
|
||||
verbose: Boolean(flags.verbose),
|
||||
});
|
||||
|
||||
if (!result) {
|
||||
console.error(`City not found: ${citySlug}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log('\n=== DISCOVERY RESULTS ===');
|
||||
console.log(`City: ${result.citySlug}`);
|
||||
console.log(`Locations found: ${result.locationsFound}`);
|
||||
console.log(`Locations upserted: ${result.locationsUpserted}`);
|
||||
console.log(`New: ${result.locationsNew}, Updated: ${result.locationsUpdated}`);
|
||||
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
|
||||
if (result.errors.length > 0) {
|
||||
console.log(`Errors: ${result.errors.length}`);
|
||||
result.errors.forEach((e) => console.log(` - ${e}`));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 'discover:full': {
|
||||
console.log('\nRunning full discovery pipeline...\n');
|
||||
const result = await runFullDiscovery(pool, {
|
||||
stateCode: flags.state as string,
|
||||
countryCode: (flags.country as string) || 'US',
|
||||
cityLimit: flags.limit ? parseInt(flags.limit as string, 10) : 50,
|
||||
skipCityDiscovery: Boolean(flags['skip-cities']),
|
||||
onlyStale: !flags.all,
|
||||
staleDays: flags['stale-days'] ? parseInt(flags['stale-days'] as string, 10) : 7,
|
||||
dryRun: Boolean(flags['dry-run']),
|
||||
verbose: Boolean(flags.verbose),
|
||||
});
|
||||
|
||||
console.log('\n=== FULL DISCOVERY RESULTS ===');
|
||||
console.log(`Cities discovered: ${result.cities.citiesFound}`);
|
||||
console.log(`Cities upserted: ${result.cities.citiesUpserted}`);
|
||||
console.log(`Cities crawled: ${result.locations.length}`);
|
||||
console.log(`Total locations found: ${result.totalLocationsFound}`);
|
||||
console.log(`Total locations upserted: ${result.totalLocationsUpserted}`);
|
||||
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'seed:cities': {
|
||||
const stateCode = positional[0] || (flags.state as string);
|
||||
if (!stateCode) {
|
||||
console.error('ERROR: State code is required');
|
||||
console.error('Usage: seed:cities <state>');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
let cities: any[] = [];
|
||||
if (stateCode.toUpperCase() === 'AZ') {
|
||||
cities = ARIZONA_CITIES;
|
||||
} else {
|
||||
console.error(`No predefined cities for state: ${stateCode}`);
|
||||
console.error('Add cities to city-discovery.ts ARIZONA_CITIES array (or add new state arrays)');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`\nSeeding ${cities.length} cities for ${stateCode}...\n`);
|
||||
const result = await seedKnownCities(pool, cities);
|
||||
console.log(`Created: ${result.created} new cities`);
|
||||
console.log(`Updated: ${result.updated} existing cities`);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'stats': {
|
||||
console.log('\nFetching discovery statistics...\n');
|
||||
const stats = await getDiscoveryStats(pool);
|
||||
|
||||
console.log('=== CITIES ===');
|
||||
console.log(`Total: ${stats.cities.total}`);
|
||||
console.log(`Crawled (24h): ${stats.cities.crawledLast24h}`);
|
||||
console.log(`Never crawled: ${stats.cities.neverCrawled}`);
|
||||
console.log('');
|
||||
console.log('=== LOCATIONS ===');
|
||||
console.log(`Total active: ${stats.locations.total}`);
|
||||
console.log(`Discovered: ${stats.locations.discovered}`);
|
||||
console.log(`Verified: ${stats.locations.verified}`);
|
||||
console.log(`Merged: ${stats.locations.merged}`);
|
||||
console.log(`Rejected: ${stats.locations.rejected}`);
|
||||
console.log('');
|
||||
console.log('=== BY STATE ===');
|
||||
stats.locations.byState.forEach((s) => {
|
||||
console.log(` ${s.stateCode}: ${s.count}`);
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
case 'list': {
|
||||
const status = flags.status as string;
|
||||
const stateCode = flags.state as string;
|
||||
const limit = flags.limit ? parseInt(flags.limit as string, 10) : 50;
|
||||
|
||||
let whereClause = 'WHERE active = TRUE';
|
||||
const params: any[] = [];
|
||||
let paramIndex = 1;
|
||||
|
||||
if (status) {
|
||||
whereClause += ` AND status = $${paramIndex}`;
|
||||
params.push(status);
|
||||
paramIndex++;
|
||||
}
|
||||
|
||||
if (stateCode) {
|
||||
whereClause += ` AND state_code = $${paramIndex}`;
|
||||
params.push(stateCode.toUpperCase());
|
||||
paramIndex++;
|
||||
}
|
||||
|
||||
params.push(limit);
|
||||
|
||||
const { rows } = await pool.query(
|
||||
`
|
||||
SELECT id, platform, name, city, state_code, status, platform_menu_url, first_seen_at
|
||||
FROM dutchie_discovery_locations
|
||||
${whereClause}
|
||||
ORDER BY first_seen_at DESC
|
||||
LIMIT $${paramIndex}
|
||||
`,
|
||||
params
|
||||
);
|
||||
|
||||
console.log(`\nFound ${rows.length} locations:\n`);
|
||||
console.log('ID\tStatus\t\tState\tCity\t\tName');
|
||||
console.log('-'.repeat(80));
|
||||
rows.forEach((row: any) => {
|
||||
const cityDisplay = (row.city || '').substring(0, 12).padEnd(12);
|
||||
const nameDisplay = (row.name || '').substring(0, 30);
|
||||
console.log(
|
||||
`${row.id}\t${row.status.padEnd(12)}\t${row.state_code || 'N/A'}\t${cityDisplay}\t${nameDisplay}`
|
||||
);
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
console.error(`Unknown command: ${command}`);
|
||||
printHelp();
|
||||
process.exit(1);
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.error('ERROR:', error.message);
|
||||
if (flags.verbose) {
|
||||
console.error(error.stack);
|
||||
}
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
Reference in New Issue
Block a user