feat: SEO template library, discovery pipeline, and orchestrator enhancements

## SEO Template Library
- Add complete template library with 7 page types (state, city, category, brand, product, search, regeneration)
- Add Template Library tab in SEO Orchestrator with accordion-based editors
- Add template preview, validation, and variable injection engine
- Add API endpoints: /api/seo/templates, preview, validate, generate, regenerate

## Discovery Pipeline
- Add promotion.ts for discovery location validation and promotion
- Add discover-all-states.ts script for multi-state discovery
- Add promotion log migration (067)
- Enhance discovery routes and types

## Orchestrator & Admin
- Add crawl_enabled filter to stores page
- Add API permissions page
- Add job queue management
- Add price analytics routes
- Add markets and intelligence routes
- Enhance dashboard and worker monitoring

## Infrastructure
- Add migrations for worker definitions, SEO settings, field alignment
- Add canonical pipeline for scraper v2
- Update hydration and sync orchestrator
- Enhance multi-state query service

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-09 00:05:34 -07:00
parent 9711d594db
commit 2f483b3084
83 changed files with 16700 additions and 1277 deletions

View File

@@ -0,0 +1,385 @@
#!/usr/bin/env npx tsx
/**
* Discover All States - Sequential State-by-State Dutchie Discovery
*
* This script discovers all Dutchie dispensaries for every US state,
* processing one state at a time with delays between states.
*
* Progress is automatically saved to /tmp/discovery-progress.json
* so the script can resume from where it left off if interrupted.
*
* Usage:
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --dry-run
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --start-from CA
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --resume
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --reset # Clear progress, start fresh
*
* Options:
* --dry-run Don't save to database, just show what would happen
* --start-from Start from a specific state (skip earlier states)
* --states Comma-separated list of specific states to run (e.g., AZ,CA,CO)
* --verbose Show detailed output
* --resume Auto-resume from last saved progress (default if progress file exists)
* --reset Clear progress file and start fresh
*/
import { Pool } from 'pg';
import * as fs from 'fs';
import * as path from 'path';
const PROGRESS_FILE = '/tmp/discovery-progress.json';
interface ProgressData {
lastCompletedState: string | null;
lastCompletedIndex: number;
startedAt: string;
updatedAt: string;
completedStates: string[];
}
function loadProgress(): ProgressData | null {
try {
if (fs.existsSync(PROGRESS_FILE)) {
const data = JSON.parse(fs.readFileSync(PROGRESS_FILE, 'utf-8'));
return data;
}
} catch (e) {
console.warn('[Progress] Could not load progress file:', e);
}
return null;
}
function saveProgress(progress: ProgressData): void {
try {
progress.updatedAt = new Date().toISOString();
fs.writeFileSync(PROGRESS_FILE, JSON.stringify(progress, null, 2));
} catch (e) {
console.warn('[Progress] Could not save progress:', e);
}
}
function clearProgress(): void {
try {
if (fs.existsSync(PROGRESS_FILE)) {
fs.unlinkSync(PROGRESS_FILE);
console.log('[Progress] Cleared progress file');
}
} catch (e) {
console.warn('[Progress] Could not clear progress:', e);
}
}
import { discoverState } from '../discovery';
// US states with legal cannabis (medical or recreational)
// Ordered roughly by market size / likelihood of Dutchie presence
const US_STATES = [
'AZ', // Arizona
'CA', // California
'CO', // Colorado
'FL', // Florida
'IL', // Illinois
'MA', // Massachusetts
'MI', // Michigan
'NV', // Nevada
'NJ', // New Jersey
'NY', // New York
'OH', // Ohio
'OR', // Oregon
'PA', // Pennsylvania
'WA', // Washington
'MD', // Maryland
'MO', // Missouri
'CT', // Connecticut
'NM', // New Mexico
'ME', // Maine
'VT', // Vermont
'MT', // Montana
'AK', // Alaska
'OK', // Oklahoma
'AR', // Arkansas
'ND', // North Dakota
'SD', // South Dakota
'MN', // Minnesota
'NH', // New Hampshire
'RI', // Rhode Island
'DE', // Delaware
'HI', // Hawaii
'WV', // West Virginia
'LA', // Louisiana
'UT', // Utah
'VA', // Virginia
'DC', // District of Columbia
];
interface DiscoveryResult {
stateCode: string;
citiesCrawled: number;
locationsFound: number;
locationsUpserted: number;
durationMs: number;
errors: string[];
}
function parseArgs() {
const args = process.argv.slice(2);
const flags: Record<string, string | boolean> = {};
for (let i = 0; i < args.length; i++) {
const arg = args[i];
if (arg.startsWith('--')) {
const [key, value] = arg.slice(2).split('=');
if (value !== undefined) {
flags[key] = value;
} else if (args[i + 1] && !args[i + 1].startsWith('--')) {
flags[key] = args[i + 1];
i++;
} else {
flags[key] = true;
}
}
}
return flags;
}
async function main() {
const flags = parseArgs();
const dryRun = Boolean(flags['dry-run']);
const verbose = Boolean(flags.verbose);
const reset = Boolean(flags.reset);
const resume = Boolean(flags.resume);
let startFrom = flags['start-from'] as string | undefined;
const specificStates = flags.states
? (flags.states as string).split(',').map((s) => s.trim().toUpperCase())
: null;
// Handle reset flag
if (reset) {
clearProgress();
}
// Determine which states to process
let statesToProcess = specificStates || US_STATES;
// Check for saved progress (auto-resume unless --reset or --start-from specified)
const savedProgress = loadProgress();
if (savedProgress && !reset && !startFrom && !specificStates) {
const nextIndex = savedProgress.lastCompletedIndex + 1;
if (nextIndex < US_STATES.length) {
startFrom = US_STATES[nextIndex];
console.log(`[Progress] Resuming from saved progress`);
console.log(`[Progress] Last completed: ${savedProgress.lastCompletedState} (${savedProgress.completedStates.length} states done)`);
console.log(`[Progress] Started at: ${savedProgress.startedAt}`);
console.log(`[Progress] Last update: ${savedProgress.updatedAt}`);
console.log('');
} else {
console.log(`[Progress] All states already completed! Use --reset to start over.`);
process.exit(0);
}
}
if (startFrom) {
const startIndex = statesToProcess.indexOf(startFrom.toUpperCase());
if (startIndex === -1) {
console.error(`ERROR: State ${startFrom} not found in list`);
process.exit(1);
}
statesToProcess = statesToProcess.slice(startIndex);
console.log(`Starting from ${startFrom}, ${statesToProcess.length} states remaining`);
}
// Initialize progress tracking
let progress: ProgressData = savedProgress || {
lastCompletedState: null,
lastCompletedIndex: -1,
startedAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
completedStates: [],
};
console.log('='.repeat(70));
console.log('DUTCHIE ALL-STATES DISCOVERY');
console.log('='.repeat(70));
console.log(`Mode: ${dryRun ? 'DRY RUN' : 'LIVE'}`);
console.log(`States to process: ${statesToProcess.length}`);
console.log(`States: ${statesToProcess.join(', ')}`);
console.log('');
// Create database pool
const connectionString = process.env.DATABASE_URL;
if (!connectionString) {
console.error('ERROR: DATABASE_URL environment variable is required');
process.exit(1);
}
const pool = new Pool({ connectionString });
const results: DiscoveryResult[] = [];
const startTime = Date.now();
try {
for (let i = 0; i < statesToProcess.length; i++) {
const stateCode = statesToProcess[i];
console.log('');
console.log('─'.repeat(70));
console.log(`[${i + 1}/${statesToProcess.length}] Discovering ${stateCode}...`);
console.log('─'.repeat(70));
try {
const result = await discoverState(pool, stateCode, {
dryRun,
verbose,
cityLimit: 200, // Allow up to 200 cities per state
});
const discoveryResult: DiscoveryResult = {
stateCode,
citiesCrawled: result.locations.length,
locationsFound: result.totalLocationsFound,
locationsUpserted: result.totalLocationsUpserted,
durationMs: result.durationMs,
errors: [],
};
// Collect errors from city results
result.locations.forEach((loc) => {
if (loc.errors && loc.errors.length > 0) {
discoveryResult.errors.push(...loc.errors);
}
});
results.push(discoveryResult);
// Save progress after each successful state
const stateIndex = US_STATES.indexOf(stateCode);
progress.lastCompletedState = stateCode;
progress.lastCompletedIndex = stateIndex;
if (!progress.completedStates.includes(stateCode)) {
progress.completedStates.push(stateCode);
}
saveProgress(progress);
console.log(`\n[${stateCode}] COMPLETE:`);
console.log(` Cities crawled: ${discoveryResult.citiesCrawled}`);
console.log(` Locations found: ${discoveryResult.locationsFound}`);
console.log(` Locations upserted: ${discoveryResult.locationsUpserted}`);
console.log(` Duration: ${(discoveryResult.durationMs / 1000).toFixed(1)}s`);
console.log(` Progress saved (${progress.completedStates.length}/${US_STATES.length} states)`);
if (discoveryResult.errors.length > 0) {
console.log(` Errors: ${discoveryResult.errors.length}`);
}
// Delay between states to avoid rate limiting
if (i < statesToProcess.length - 1) {
const delaySeconds = 5;
console.log(`\n Waiting ${delaySeconds}s before next state...`);
await new Promise((r) => setTimeout(r, delaySeconds * 1000));
}
} catch (error: any) {
console.error(`\n[${stateCode}] ERROR: ${error.message}`);
results.push({
stateCode,
citiesCrawled: 0,
locationsFound: 0,
locationsUpserted: 0,
durationMs: 0,
errors: [error.message],
});
// Continue to next state even on error
await new Promise((r) => setTimeout(r, 3000));
}
}
// Print summary
const totalDuration = Date.now() - startTime;
const totalLocations = results.reduce((sum, r) => sum + r.locationsFound, 0);
const totalUpserted = results.reduce((sum, r) => sum + r.locationsUpserted, 0);
const totalCities = results.reduce((sum, r) => sum + r.citiesCrawled, 0);
const statesWithErrors = results.filter((r) => r.errors.length > 0);
console.log('');
console.log('='.repeat(70));
console.log('DISCOVERY COMPLETE - SUMMARY');
console.log('='.repeat(70));
console.log(`Total states processed: ${results.length}`);
console.log(`Total cities crawled: ${totalCities}`);
console.log(`Total locations found: ${totalLocations}`);
console.log(`Total locations upserted: ${totalUpserted}`);
console.log(`Total duration: ${(totalDuration / 1000 / 60).toFixed(1)} minutes`);
console.log('');
if (statesWithErrors.length > 0) {
console.log('States with errors:');
statesWithErrors.forEach((r) => {
console.log(` ${r.stateCode}: ${r.errors.length} error(s)`);
});
console.log('');
}
// Print per-state breakdown
console.log('Per-state results:');
console.log('-'.repeat(70));
console.log('State\tCities\tFound\tUpserted\tDuration\tStatus');
console.log('-'.repeat(70));
results.forEach((r) => {
const status = r.errors.length > 0 ? 'ERRORS' : 'OK';
const duration = (r.durationMs / 1000).toFixed(1) + 's';
console.log(
`${r.stateCode}\t${r.citiesCrawled}\t${r.locationsFound}\t${r.locationsUpserted}\t\t${duration}\t\t${status}`
);
});
// Final count from database
console.log('');
console.log('='.repeat(70));
console.log('DATABASE TOTALS');
console.log('='.repeat(70));
const { rows: locationCounts } = await pool.query(`
SELECT
state_code,
COUNT(*) as count,
COUNT(CASE WHEN status = 'discovered' THEN 1 END) as discovered,
COUNT(CASE WHEN status = 'promoted' THEN 1 END) as promoted
FROM dutchie_discovery_locations
WHERE active = TRUE
GROUP BY state_code
ORDER BY count DESC
`);
console.log('State\tTotal\tDiscovered\tPromoted');
console.log('-'.repeat(50));
locationCounts.forEach((row: any) => {
console.log(`${row.state_code || 'N/A'}\t${row.count}\t${row.discovered}\t\t${row.promoted}`);
});
const { rows: totalRow } = await pool.query(`
SELECT COUNT(*) as total FROM dutchie_discovery_locations WHERE active = TRUE
`);
console.log('-'.repeat(50));
console.log(`TOTAL: ${totalRow[0].total} locations in discovery table`);
const { rows: dispRow } = await pool.query(`
SELECT COUNT(*) as total FROM dispensaries WHERE menu_type = 'dutchie'
`);
console.log(`DISPENSARIES: ${dispRow[0].total} Dutchie dispensaries in main table`);
// Clear progress file on successful completion of all states
if (results.length === US_STATES.length || (savedProgress && progress.completedStates.length === US_STATES.length)) {
clearProgress();
console.log('\n[Progress] All states completed! Progress file cleared.');
}
} finally {
await pool.end();
}
}
main().catch((error) => {
console.error('Fatal error:', error);
process.exit(1);
});