Files
cannaiq/backend/src/scripts/run-discovery.ts
Kelly 2f483b3084 feat: SEO template library, discovery pipeline, and orchestrator enhancements
## SEO Template Library
- Add complete template library with 7 page types (state, city, category, brand, product, search, regeneration)
- Add Template Library tab in SEO Orchestrator with accordion-based editors
- Add template preview, validation, and variable injection engine
- Add API endpoints: /api/seo/templates, preview, validate, generate, regenerate

## Discovery Pipeline
- Add promotion.ts for discovery location validation and promotion
- Add discover-all-states.ts script for multi-state discovery
- Add promotion log migration (067)
- Enhance discovery routes and types

## Orchestrator & Admin
- Add crawl_enabled filter to stores page
- Add API permissions page
- Add job queue management
- Add price analytics routes
- Add markets and intelligence routes
- Enhance dashboard and worker monitoring

## Infrastructure
- Add migrations for worker definitions, SEO settings, field alignment
- Add canonical pipeline for scraper v2
- Update hydration and sync orchestrator
- Enhance multi-state query service

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 00:05:34 -07:00

316 lines
11 KiB
TypeScript

#!/usr/bin/env npx tsx
/**
* Dutchie Discovery CLI
*
* Command-line interface for running the Dutchie store discovery pipeline.
*
* Usage:
* npx tsx src/scripts/run-discovery.ts <command> [options]
*
* Commands:
* discover:state <state> - Discover all stores in a state (e.g., AZ)
* discover:city <city> - Discover stores in a single city
* discover:full - Run full discovery pipeline
* seed:cities <state> - Seed known cities for a state
* stats - Show discovery statistics
* list - List discovered locations
*
* Examples:
* npx tsx src/scripts/run-discovery.ts discover:state AZ
* npx tsx src/scripts/run-discovery.ts discover:city phoenix --state AZ
* npx tsx src/scripts/run-discovery.ts seed:cities AZ
* npx tsx src/scripts/run-discovery.ts stats
* npx tsx src/scripts/run-discovery.ts list --status discovered --state AZ
*/
import { Pool } from 'pg';
import {
runFullDiscovery,
discoverCity,
discoverState,
getDiscoveryStats,
seedKnownCities,
} from '../discovery';
import { getCitiesForState } from '../discovery/location-discovery';
// Parse command line arguments
function parseArgs() {
const args = process.argv.slice(2);
const command = args[0] || 'help';
const positional: string[] = [];
const flags: Record<string, string | boolean> = {};
for (let i = 1; i < args.length; i++) {
const arg = args[i];
if (arg.startsWith('--')) {
const [key, value] = arg.slice(2).split('=');
if (value !== undefined) {
flags[key] = value;
} else if (args[i + 1] && !args[i + 1].startsWith('--')) {
flags[key] = args[i + 1];
i++;
} else {
flags[key] = true;
}
} else {
positional.push(arg);
}
}
return { command, positional, flags };
}
// Create database pool
function createPool(): Pool {
const connectionString = process.env.DATABASE_URL;
if (!connectionString) {
console.error('ERROR: DATABASE_URL environment variable is required');
process.exit(1);
}
return new Pool({ connectionString });
}
// Print help
function printHelp() {
console.log(`
Dutchie Discovery CLI
Usage:
npx tsx src/scripts/run-discovery.ts <command> [options]
Commands:
discover:state <state> Discover all stores in a state (e.g., AZ)
discover:city <city> Discover stores in a single city
discover:full Run full discovery pipeline
seed:cities <state> Seed known cities for a state
stats Show discovery statistics
list List discovered locations
Options:
--state <code> State code (e.g., AZ, CA, ON)
--country <code> Country code (default: US)
--status <status> Filter by status (discovered, verified, rejected, merged)
--limit <n> Limit results (default: varies by command)
--dry-run Don't make any changes, just show what would happen
--verbose Show detailed output
Examples:
npx tsx src/scripts/run-discovery.ts discover:state AZ
npx tsx src/scripts/run-discovery.ts discover:city phoenix --state AZ
npx tsx src/scripts/run-discovery.ts seed:cities AZ
npx tsx src/scripts/run-discovery.ts stats
npx tsx src/scripts/run-discovery.ts list --status discovered --state AZ --limit 20
`);
}
// Main
async function main() {
const { command, positional, flags } = parseArgs();
if (command === 'help' || flags.help) {
printHelp();
process.exit(0);
}
const pool = createPool();
try {
switch (command) {
case 'discover:state': {
const stateCode = positional[0] || (flags.state as string);
if (!stateCode) {
console.error('ERROR: State code is required');
console.error('Usage: discover:state <state>');
process.exit(1);
}
console.log(`\nDiscovering stores in ${stateCode}...\n`);
const result = await discoverState(pool, stateCode.toUpperCase(), {
dryRun: Boolean(flags['dry-run']),
verbose: Boolean(flags.verbose),
cityLimit: flags.limit ? parseInt(flags.limit as string, 10) : 100,
});
console.log('\n=== DISCOVERY RESULTS ===');
console.log(`Cities crawled: ${result.locations.length}`);
console.log(`Locations found: ${result.totalLocationsFound}`);
console.log(`Locations upserted: ${result.totalLocationsUpserted}`);
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
break;
}
case 'discover:city': {
const citySlug = positional[0];
if (!citySlug) {
console.error('ERROR: City slug is required');
console.error('Usage: discover:city <city-slug> [--state AZ]');
process.exit(1);
}
console.log(`\nDiscovering stores in ${citySlug}...\n`);
const result = await discoverCity(pool, citySlug, {
stateCode: flags.state as string,
countryCode: (flags.country as string) || 'US',
dryRun: Boolean(flags['dry-run']),
verbose: Boolean(flags.verbose),
});
if (!result) {
console.error(`City not found: ${citySlug}`);
process.exit(1);
}
console.log('\n=== DISCOVERY RESULTS ===');
console.log(`City: ${result.citySlug}`);
console.log(`Locations found: ${result.locationsFound}`);
console.log(`Locations upserted: ${result.locationsUpserted}`);
console.log(`New: ${result.locationsNew}, Updated: ${result.locationsUpdated}`);
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
if (result.errors.length > 0) {
console.log(`Errors: ${result.errors.length}`);
result.errors.forEach((e) => console.log(` - ${e}`));
}
break;
}
case 'discover:full': {
console.log('\nRunning full discovery pipeline...\n');
const result = await runFullDiscovery(pool, {
stateCode: flags.state as string,
countryCode: (flags.country as string) || 'US',
cityLimit: flags.limit ? parseInt(flags.limit as string, 10) : 50,
skipCityDiscovery: Boolean(flags['skip-cities']),
onlyStale: !flags.all,
staleDays: flags['stale-days'] ? parseInt(flags['stale-days'] as string, 10) : 7,
dryRun: Boolean(flags['dry-run']),
verbose: Boolean(flags.verbose),
});
console.log('\n=== FULL DISCOVERY RESULTS ===');
console.log(`Cities discovered: ${result.cities.citiesFound}`);
console.log(`Cities upserted: ${result.cities.citiesUpserted}`);
console.log(`Cities crawled: ${result.locations.length}`);
console.log(`Total locations found: ${result.totalLocationsFound}`);
console.log(`Total locations upserted: ${result.totalLocationsUpserted}`);
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
break;
}
case 'seed:cities': {
const stateCode = positional[0] || (flags.state as string);
if (!stateCode) {
console.error('ERROR: State code is required');
console.error('Usage: seed:cities <state>');
process.exit(1);
}
// Dynamically fetch cities from Dutchie
console.log(`\nFetching cities for ${stateCode} from Dutchie...\n`);
const cityNames = await getCitiesForState(stateCode.toUpperCase());
if (cityNames.length === 0) {
console.error(`No cities found for state: ${stateCode}`);
process.exit(1);
}
const cities = cityNames.map(name => ({
name,
slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''),
stateCode: stateCode.toUpperCase(),
}));
console.log(`Seeding ${cities.length} cities for ${stateCode}...\n`);
const result = await seedKnownCities(pool, cities);
console.log(`Created: ${result.created} new cities`);
console.log(`Updated: ${result.updated} existing cities`);
break;
}
case 'stats': {
console.log('\nFetching discovery statistics...\n');
const stats = await getDiscoveryStats(pool);
console.log('=== CITIES ===');
console.log(`Total: ${stats.cities.total}`);
console.log(`Crawled (24h): ${stats.cities.crawledLast24h}`);
console.log(`Never crawled: ${stats.cities.neverCrawled}`);
console.log('');
console.log('=== LOCATIONS ===');
console.log(`Total active: ${stats.locations.total}`);
console.log(`Discovered: ${stats.locations.discovered}`);
console.log(`Verified: ${stats.locations.verified}`);
console.log(`Merged: ${stats.locations.merged}`);
console.log(`Rejected: ${stats.locations.rejected}`);
console.log('');
console.log('=== BY STATE ===');
stats.locations.byState.forEach((s) => {
console.log(` ${s.stateCode}: ${s.count}`);
});
break;
}
case 'list': {
const status = flags.status as string;
const stateCode = flags.state as string;
const limit = flags.limit ? parseInt(flags.limit as string, 10) : 50;
let whereClause = 'WHERE active = TRUE';
const params: any[] = [];
let paramIndex = 1;
if (status) {
whereClause += ` AND status = $${paramIndex}`;
params.push(status);
paramIndex++;
}
if (stateCode) {
whereClause += ` AND state_code = $${paramIndex}`;
params.push(stateCode.toUpperCase());
paramIndex++;
}
params.push(limit);
const { rows } = await pool.query(
`
SELECT id, platform, name, city, state_code, status, platform_menu_url, first_seen_at
FROM dutchie_discovery_locations
${whereClause}
ORDER BY first_seen_at DESC
LIMIT $${paramIndex}
`,
params
);
console.log(`\nFound ${rows.length} locations:\n`);
console.log('ID\tStatus\t\tState\tCity\t\tName');
console.log('-'.repeat(80));
rows.forEach((row: any) => {
const cityDisplay = (row.city || '').substring(0, 12).padEnd(12);
const nameDisplay = (row.name || '').substring(0, 30);
console.log(
`${row.id}\t${row.status.padEnd(12)}\t${row.state_code || 'N/A'}\t${cityDisplay}\t${nameDisplay}`
);
});
break;
}
default:
console.error(`Unknown command: ${command}`);
printHelp();
process.exit(1);
}
} catch (error: any) {
console.error('ERROR:', error.message);
if (flags.verbose) {
console.error(error.stack);
}
process.exit(1);
} finally {
await pool.end();
}
}
main();