## SEO Template Library - Add complete template library with 7 page types (state, city, category, brand, product, search, regeneration) - Add Template Library tab in SEO Orchestrator with accordion-based editors - Add template preview, validation, and variable injection engine - Add API endpoints: /api/seo/templates, preview, validate, generate, regenerate ## Discovery Pipeline - Add promotion.ts for discovery location validation and promotion - Add discover-all-states.ts script for multi-state discovery - Add promotion log migration (067) - Enhance discovery routes and types ## Orchestrator & Admin - Add crawl_enabled filter to stores page - Add API permissions page - Add job queue management - Add price analytics routes - Add markets and intelligence routes - Enhance dashboard and worker monitoring ## Infrastructure - Add migrations for worker definitions, SEO settings, field alignment - Add canonical pipeline for scraper v2 - Update hydration and sync orchestrator - Enhance multi-state query service 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
316 lines
11 KiB
TypeScript
316 lines
11 KiB
TypeScript
#!/usr/bin/env npx tsx
|
|
/**
|
|
* Dutchie Discovery CLI
|
|
*
|
|
* Command-line interface for running the Dutchie store discovery pipeline.
|
|
*
|
|
* Usage:
|
|
* npx tsx src/scripts/run-discovery.ts <command> [options]
|
|
*
|
|
* Commands:
|
|
* discover:state <state> - Discover all stores in a state (e.g., AZ)
|
|
* discover:city <city> - Discover stores in a single city
|
|
* discover:full - Run full discovery pipeline
|
|
* seed:cities <state> - Seed known cities for a state
|
|
* stats - Show discovery statistics
|
|
* list - List discovered locations
|
|
*
|
|
* Examples:
|
|
* npx tsx src/scripts/run-discovery.ts discover:state AZ
|
|
* npx tsx src/scripts/run-discovery.ts discover:city phoenix --state AZ
|
|
* npx tsx src/scripts/run-discovery.ts seed:cities AZ
|
|
* npx tsx src/scripts/run-discovery.ts stats
|
|
* npx tsx src/scripts/run-discovery.ts list --status discovered --state AZ
|
|
*/
|
|
|
|
import { Pool } from 'pg';
|
|
import {
|
|
runFullDiscovery,
|
|
discoverCity,
|
|
discoverState,
|
|
getDiscoveryStats,
|
|
seedKnownCities,
|
|
} from '../discovery';
|
|
import { getCitiesForState } from '../discovery/location-discovery';
|
|
|
|
// Parse command line arguments
|
|
function parseArgs() {
|
|
const args = process.argv.slice(2);
|
|
const command = args[0] || 'help';
|
|
const positional: string[] = [];
|
|
const flags: Record<string, string | boolean> = {};
|
|
|
|
for (let i = 1; i < args.length; i++) {
|
|
const arg = args[i];
|
|
if (arg.startsWith('--')) {
|
|
const [key, value] = arg.slice(2).split('=');
|
|
if (value !== undefined) {
|
|
flags[key] = value;
|
|
} else if (args[i + 1] && !args[i + 1].startsWith('--')) {
|
|
flags[key] = args[i + 1];
|
|
i++;
|
|
} else {
|
|
flags[key] = true;
|
|
}
|
|
} else {
|
|
positional.push(arg);
|
|
}
|
|
}
|
|
|
|
return { command, positional, flags };
|
|
}
|
|
|
|
// Create database pool
|
|
function createPool(): Pool {
|
|
const connectionString = process.env.DATABASE_URL;
|
|
if (!connectionString) {
|
|
console.error('ERROR: DATABASE_URL environment variable is required');
|
|
process.exit(1);
|
|
}
|
|
return new Pool({ connectionString });
|
|
}
|
|
|
|
// Print help
|
|
function printHelp() {
|
|
console.log(`
|
|
Dutchie Discovery CLI
|
|
|
|
Usage:
|
|
npx tsx src/scripts/run-discovery.ts <command> [options]
|
|
|
|
Commands:
|
|
discover:state <state> Discover all stores in a state (e.g., AZ)
|
|
discover:city <city> Discover stores in a single city
|
|
discover:full Run full discovery pipeline
|
|
seed:cities <state> Seed known cities for a state
|
|
stats Show discovery statistics
|
|
list List discovered locations
|
|
|
|
Options:
|
|
--state <code> State code (e.g., AZ, CA, ON)
|
|
--country <code> Country code (default: US)
|
|
--status <status> Filter by status (discovered, verified, rejected, merged)
|
|
--limit <n> Limit results (default: varies by command)
|
|
--dry-run Don't make any changes, just show what would happen
|
|
--verbose Show detailed output
|
|
|
|
Examples:
|
|
npx tsx src/scripts/run-discovery.ts discover:state AZ
|
|
npx tsx src/scripts/run-discovery.ts discover:city phoenix --state AZ
|
|
npx tsx src/scripts/run-discovery.ts seed:cities AZ
|
|
npx tsx src/scripts/run-discovery.ts stats
|
|
npx tsx src/scripts/run-discovery.ts list --status discovered --state AZ --limit 20
|
|
`);
|
|
}
|
|
|
|
// Main
|
|
async function main() {
|
|
const { command, positional, flags } = parseArgs();
|
|
|
|
if (command === 'help' || flags.help) {
|
|
printHelp();
|
|
process.exit(0);
|
|
}
|
|
|
|
const pool = createPool();
|
|
|
|
try {
|
|
switch (command) {
|
|
case 'discover:state': {
|
|
const stateCode = positional[0] || (flags.state as string);
|
|
if (!stateCode) {
|
|
console.error('ERROR: State code is required');
|
|
console.error('Usage: discover:state <state>');
|
|
process.exit(1);
|
|
}
|
|
|
|
console.log(`\nDiscovering stores in ${stateCode}...\n`);
|
|
const result = await discoverState(pool, stateCode.toUpperCase(), {
|
|
dryRun: Boolean(flags['dry-run']),
|
|
verbose: Boolean(flags.verbose),
|
|
cityLimit: flags.limit ? parseInt(flags.limit as string, 10) : 100,
|
|
});
|
|
|
|
console.log('\n=== DISCOVERY RESULTS ===');
|
|
console.log(`Cities crawled: ${result.locations.length}`);
|
|
console.log(`Locations found: ${result.totalLocationsFound}`);
|
|
console.log(`Locations upserted: ${result.totalLocationsUpserted}`);
|
|
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
|
|
break;
|
|
}
|
|
|
|
case 'discover:city': {
|
|
const citySlug = positional[0];
|
|
if (!citySlug) {
|
|
console.error('ERROR: City slug is required');
|
|
console.error('Usage: discover:city <city-slug> [--state AZ]');
|
|
process.exit(1);
|
|
}
|
|
|
|
console.log(`\nDiscovering stores in ${citySlug}...\n`);
|
|
const result = await discoverCity(pool, citySlug, {
|
|
stateCode: flags.state as string,
|
|
countryCode: (flags.country as string) || 'US',
|
|
dryRun: Boolean(flags['dry-run']),
|
|
verbose: Boolean(flags.verbose),
|
|
});
|
|
|
|
if (!result) {
|
|
console.error(`City not found: ${citySlug}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
console.log('\n=== DISCOVERY RESULTS ===');
|
|
console.log(`City: ${result.citySlug}`);
|
|
console.log(`Locations found: ${result.locationsFound}`);
|
|
console.log(`Locations upserted: ${result.locationsUpserted}`);
|
|
console.log(`New: ${result.locationsNew}, Updated: ${result.locationsUpdated}`);
|
|
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
|
|
if (result.errors.length > 0) {
|
|
console.log(`Errors: ${result.errors.length}`);
|
|
result.errors.forEach((e) => console.log(` - ${e}`));
|
|
}
|
|
break;
|
|
}
|
|
|
|
case 'discover:full': {
|
|
console.log('\nRunning full discovery pipeline...\n');
|
|
const result = await runFullDiscovery(pool, {
|
|
stateCode: flags.state as string,
|
|
countryCode: (flags.country as string) || 'US',
|
|
cityLimit: flags.limit ? parseInt(flags.limit as string, 10) : 50,
|
|
skipCityDiscovery: Boolean(flags['skip-cities']),
|
|
onlyStale: !flags.all,
|
|
staleDays: flags['stale-days'] ? parseInt(flags['stale-days'] as string, 10) : 7,
|
|
dryRun: Boolean(flags['dry-run']),
|
|
verbose: Boolean(flags.verbose),
|
|
});
|
|
|
|
console.log('\n=== FULL DISCOVERY RESULTS ===');
|
|
console.log(`Cities discovered: ${result.cities.citiesFound}`);
|
|
console.log(`Cities upserted: ${result.cities.citiesUpserted}`);
|
|
console.log(`Cities crawled: ${result.locations.length}`);
|
|
console.log(`Total locations found: ${result.totalLocationsFound}`);
|
|
console.log(`Total locations upserted: ${result.totalLocationsUpserted}`);
|
|
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
|
|
break;
|
|
}
|
|
|
|
case 'seed:cities': {
|
|
const stateCode = positional[0] || (flags.state as string);
|
|
if (!stateCode) {
|
|
console.error('ERROR: State code is required');
|
|
console.error('Usage: seed:cities <state>');
|
|
process.exit(1);
|
|
}
|
|
|
|
// Dynamically fetch cities from Dutchie
|
|
console.log(`\nFetching cities for ${stateCode} from Dutchie...\n`);
|
|
const cityNames = await getCitiesForState(stateCode.toUpperCase());
|
|
|
|
if (cityNames.length === 0) {
|
|
console.error(`No cities found for state: ${stateCode}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
const cities = cityNames.map(name => ({
|
|
name,
|
|
slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''),
|
|
stateCode: stateCode.toUpperCase(),
|
|
}));
|
|
|
|
console.log(`Seeding ${cities.length} cities for ${stateCode}...\n`);
|
|
const result = await seedKnownCities(pool, cities);
|
|
console.log(`Created: ${result.created} new cities`);
|
|
console.log(`Updated: ${result.updated} existing cities`);
|
|
break;
|
|
}
|
|
|
|
case 'stats': {
|
|
console.log('\nFetching discovery statistics...\n');
|
|
const stats = await getDiscoveryStats(pool);
|
|
|
|
console.log('=== CITIES ===');
|
|
console.log(`Total: ${stats.cities.total}`);
|
|
console.log(`Crawled (24h): ${stats.cities.crawledLast24h}`);
|
|
console.log(`Never crawled: ${stats.cities.neverCrawled}`);
|
|
console.log('');
|
|
console.log('=== LOCATIONS ===');
|
|
console.log(`Total active: ${stats.locations.total}`);
|
|
console.log(`Discovered: ${stats.locations.discovered}`);
|
|
console.log(`Verified: ${stats.locations.verified}`);
|
|
console.log(`Merged: ${stats.locations.merged}`);
|
|
console.log(`Rejected: ${stats.locations.rejected}`);
|
|
console.log('');
|
|
console.log('=== BY STATE ===');
|
|
stats.locations.byState.forEach((s) => {
|
|
console.log(` ${s.stateCode}: ${s.count}`);
|
|
});
|
|
break;
|
|
}
|
|
|
|
case 'list': {
|
|
const status = flags.status as string;
|
|
const stateCode = flags.state as string;
|
|
const limit = flags.limit ? parseInt(flags.limit as string, 10) : 50;
|
|
|
|
let whereClause = 'WHERE active = TRUE';
|
|
const params: any[] = [];
|
|
let paramIndex = 1;
|
|
|
|
if (status) {
|
|
whereClause += ` AND status = $${paramIndex}`;
|
|
params.push(status);
|
|
paramIndex++;
|
|
}
|
|
|
|
if (stateCode) {
|
|
whereClause += ` AND state_code = $${paramIndex}`;
|
|
params.push(stateCode.toUpperCase());
|
|
paramIndex++;
|
|
}
|
|
|
|
params.push(limit);
|
|
|
|
const { rows } = await pool.query(
|
|
`
|
|
SELECT id, platform, name, city, state_code, status, platform_menu_url, first_seen_at
|
|
FROM dutchie_discovery_locations
|
|
${whereClause}
|
|
ORDER BY first_seen_at DESC
|
|
LIMIT $${paramIndex}
|
|
`,
|
|
params
|
|
);
|
|
|
|
console.log(`\nFound ${rows.length} locations:\n`);
|
|
console.log('ID\tStatus\t\tState\tCity\t\tName');
|
|
console.log('-'.repeat(80));
|
|
rows.forEach((row: any) => {
|
|
const cityDisplay = (row.city || '').substring(0, 12).padEnd(12);
|
|
const nameDisplay = (row.name || '').substring(0, 30);
|
|
console.log(
|
|
`${row.id}\t${row.status.padEnd(12)}\t${row.state_code || 'N/A'}\t${cityDisplay}\t${nameDisplay}`
|
|
);
|
|
});
|
|
break;
|
|
}
|
|
|
|
default:
|
|
console.error(`Unknown command: ${command}`);
|
|
printHelp();
|
|
process.exit(1);
|
|
}
|
|
} catch (error: any) {
|
|
console.error('ERROR:', error.message);
|
|
if (flags.verbose) {
|
|
console.error(error.stack);
|
|
}
|
|
process.exit(1);
|
|
} finally {
|
|
await pool.end();
|
|
}
|
|
}
|
|
|
|
main();
|