feat: SEO template library, discovery pipeline, and orchestrator enhancements
## SEO Template Library - Add complete template library with 7 page types (state, city, category, brand, product, search, regeneration) - Add Template Library tab in SEO Orchestrator with accordion-based editors - Add template preview, validation, and variable injection engine - Add API endpoints: /api/seo/templates, preview, validate, generate, regenerate ## Discovery Pipeline - Add promotion.ts for discovery location validation and promotion - Add discover-all-states.ts script for multi-state discovery - Add promotion log migration (067) - Enhance discovery routes and types ## Orchestrator & Admin - Add crawl_enabled filter to stores page - Add API permissions page - Add job queue management - Add price analytics routes - Add markets and intelligence routes - Enhance dashboard and worker monitoring ## Infrastructure - Add migrations for worker definitions, SEO settings, field alignment - Add canonical pipeline for scraper v2 - Update hydration and sync orchestrator - Enhance multi-state query service 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
385
backend/src/scripts/discover-all-states.ts
Normal file
385
backend/src/scripts/discover-all-states.ts
Normal file
@@ -0,0 +1,385 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
/**
|
||||
* Discover All States - Sequential State-by-State Dutchie Discovery
|
||||
*
|
||||
* This script discovers all Dutchie dispensaries for every US state,
|
||||
* processing one state at a time with delays between states.
|
||||
*
|
||||
* Progress is automatically saved to /tmp/discovery-progress.json
|
||||
* so the script can resume from where it left off if interrupted.
|
||||
*
|
||||
* Usage:
|
||||
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts
|
||||
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --dry-run
|
||||
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --start-from CA
|
||||
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --resume
|
||||
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --reset # Clear progress, start fresh
|
||||
*
|
||||
* Options:
|
||||
* --dry-run Don't save to database, just show what would happen
|
||||
* --start-from Start from a specific state (skip earlier states)
|
||||
* --states Comma-separated list of specific states to run (e.g., AZ,CA,CO)
|
||||
* --verbose Show detailed output
|
||||
* --resume Auto-resume from last saved progress (default if progress file exists)
|
||||
* --reset Clear progress file and start fresh
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
const PROGRESS_FILE = '/tmp/discovery-progress.json';
|
||||
|
||||
interface ProgressData {
|
||||
lastCompletedState: string | null;
|
||||
lastCompletedIndex: number;
|
||||
startedAt: string;
|
||||
updatedAt: string;
|
||||
completedStates: string[];
|
||||
}
|
||||
|
||||
function loadProgress(): ProgressData | null {
|
||||
try {
|
||||
if (fs.existsSync(PROGRESS_FILE)) {
|
||||
const data = JSON.parse(fs.readFileSync(PROGRESS_FILE, 'utf-8'));
|
||||
return data;
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn('[Progress] Could not load progress file:', e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function saveProgress(progress: ProgressData): void {
|
||||
try {
|
||||
progress.updatedAt = new Date().toISOString();
|
||||
fs.writeFileSync(PROGRESS_FILE, JSON.stringify(progress, null, 2));
|
||||
} catch (e) {
|
||||
console.warn('[Progress] Could not save progress:', e);
|
||||
}
|
||||
}
|
||||
|
||||
function clearProgress(): void {
|
||||
try {
|
||||
if (fs.existsSync(PROGRESS_FILE)) {
|
||||
fs.unlinkSync(PROGRESS_FILE);
|
||||
console.log('[Progress] Cleared progress file');
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn('[Progress] Could not clear progress:', e);
|
||||
}
|
||||
}
|
||||
import { discoverState } from '../discovery';
|
||||
|
||||
// US states with legal cannabis (medical or recreational)
|
||||
// Ordered roughly by market size / likelihood of Dutchie presence
|
||||
const US_STATES = [
|
||||
'AZ', // Arizona
|
||||
'CA', // California
|
||||
'CO', // Colorado
|
||||
'FL', // Florida
|
||||
'IL', // Illinois
|
||||
'MA', // Massachusetts
|
||||
'MI', // Michigan
|
||||
'NV', // Nevada
|
||||
'NJ', // New Jersey
|
||||
'NY', // New York
|
||||
'OH', // Ohio
|
||||
'OR', // Oregon
|
||||
'PA', // Pennsylvania
|
||||
'WA', // Washington
|
||||
'MD', // Maryland
|
||||
'MO', // Missouri
|
||||
'CT', // Connecticut
|
||||
'NM', // New Mexico
|
||||
'ME', // Maine
|
||||
'VT', // Vermont
|
||||
'MT', // Montana
|
||||
'AK', // Alaska
|
||||
'OK', // Oklahoma
|
||||
'AR', // Arkansas
|
||||
'ND', // North Dakota
|
||||
'SD', // South Dakota
|
||||
'MN', // Minnesota
|
||||
'NH', // New Hampshire
|
||||
'RI', // Rhode Island
|
||||
'DE', // Delaware
|
||||
'HI', // Hawaii
|
||||
'WV', // West Virginia
|
||||
'LA', // Louisiana
|
||||
'UT', // Utah
|
||||
'VA', // Virginia
|
||||
'DC', // District of Columbia
|
||||
];
|
||||
|
||||
interface DiscoveryResult {
|
||||
stateCode: string;
|
||||
citiesCrawled: number;
|
||||
locationsFound: number;
|
||||
locationsUpserted: number;
|
||||
durationMs: number;
|
||||
errors: string[];
|
||||
}
|
||||
|
||||
function parseArgs() {
|
||||
const args = process.argv.slice(2);
|
||||
const flags: Record<string, string | boolean> = {};
|
||||
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
const arg = args[i];
|
||||
if (arg.startsWith('--')) {
|
||||
const [key, value] = arg.slice(2).split('=');
|
||||
if (value !== undefined) {
|
||||
flags[key] = value;
|
||||
} else if (args[i + 1] && !args[i + 1].startsWith('--')) {
|
||||
flags[key] = args[i + 1];
|
||||
i++;
|
||||
} else {
|
||||
flags[key] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const flags = parseArgs();
|
||||
const dryRun = Boolean(flags['dry-run']);
|
||||
const verbose = Boolean(flags.verbose);
|
||||
const reset = Boolean(flags.reset);
|
||||
const resume = Boolean(flags.resume);
|
||||
let startFrom = flags['start-from'] as string | undefined;
|
||||
const specificStates = flags.states
|
||||
? (flags.states as string).split(',').map((s) => s.trim().toUpperCase())
|
||||
: null;
|
||||
|
||||
// Handle reset flag
|
||||
if (reset) {
|
||||
clearProgress();
|
||||
}
|
||||
|
||||
// Determine which states to process
|
||||
let statesToProcess = specificStates || US_STATES;
|
||||
|
||||
// Check for saved progress (auto-resume unless --reset or --start-from specified)
|
||||
const savedProgress = loadProgress();
|
||||
if (savedProgress && !reset && !startFrom && !specificStates) {
|
||||
const nextIndex = savedProgress.lastCompletedIndex + 1;
|
||||
if (nextIndex < US_STATES.length) {
|
||||
startFrom = US_STATES[nextIndex];
|
||||
console.log(`[Progress] Resuming from saved progress`);
|
||||
console.log(`[Progress] Last completed: ${savedProgress.lastCompletedState} (${savedProgress.completedStates.length} states done)`);
|
||||
console.log(`[Progress] Started at: ${savedProgress.startedAt}`);
|
||||
console.log(`[Progress] Last update: ${savedProgress.updatedAt}`);
|
||||
console.log('');
|
||||
} else {
|
||||
console.log(`[Progress] All states already completed! Use --reset to start over.`);
|
||||
process.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
if (startFrom) {
|
||||
const startIndex = statesToProcess.indexOf(startFrom.toUpperCase());
|
||||
if (startIndex === -1) {
|
||||
console.error(`ERROR: State ${startFrom} not found in list`);
|
||||
process.exit(1);
|
||||
}
|
||||
statesToProcess = statesToProcess.slice(startIndex);
|
||||
console.log(`Starting from ${startFrom}, ${statesToProcess.length} states remaining`);
|
||||
}
|
||||
|
||||
// Initialize progress tracking
|
||||
let progress: ProgressData = savedProgress || {
|
||||
lastCompletedState: null,
|
||||
lastCompletedIndex: -1,
|
||||
startedAt: new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString(),
|
||||
completedStates: [],
|
||||
};
|
||||
|
||||
console.log('='.repeat(70));
|
||||
console.log('DUTCHIE ALL-STATES DISCOVERY');
|
||||
console.log('='.repeat(70));
|
||||
console.log(`Mode: ${dryRun ? 'DRY RUN' : 'LIVE'}`);
|
||||
console.log(`States to process: ${statesToProcess.length}`);
|
||||
console.log(`States: ${statesToProcess.join(', ')}`);
|
||||
console.log('');
|
||||
|
||||
// Create database pool
|
||||
const connectionString = process.env.DATABASE_URL;
|
||||
if (!connectionString) {
|
||||
console.error('ERROR: DATABASE_URL environment variable is required');
|
||||
process.exit(1);
|
||||
}
|
||||
const pool = new Pool({ connectionString });
|
||||
|
||||
const results: DiscoveryResult[] = [];
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
for (let i = 0; i < statesToProcess.length; i++) {
|
||||
const stateCode = statesToProcess[i];
|
||||
|
||||
console.log('');
|
||||
console.log('─'.repeat(70));
|
||||
console.log(`[${i + 1}/${statesToProcess.length}] Discovering ${stateCode}...`);
|
||||
console.log('─'.repeat(70));
|
||||
|
||||
try {
|
||||
const result = await discoverState(pool, stateCode, {
|
||||
dryRun,
|
||||
verbose,
|
||||
cityLimit: 200, // Allow up to 200 cities per state
|
||||
});
|
||||
|
||||
const discoveryResult: DiscoveryResult = {
|
||||
stateCode,
|
||||
citiesCrawled: result.locations.length,
|
||||
locationsFound: result.totalLocationsFound,
|
||||
locationsUpserted: result.totalLocationsUpserted,
|
||||
durationMs: result.durationMs,
|
||||
errors: [],
|
||||
};
|
||||
|
||||
// Collect errors from city results
|
||||
result.locations.forEach((loc) => {
|
||||
if (loc.errors && loc.errors.length > 0) {
|
||||
discoveryResult.errors.push(...loc.errors);
|
||||
}
|
||||
});
|
||||
|
||||
results.push(discoveryResult);
|
||||
|
||||
// Save progress after each successful state
|
||||
const stateIndex = US_STATES.indexOf(stateCode);
|
||||
progress.lastCompletedState = stateCode;
|
||||
progress.lastCompletedIndex = stateIndex;
|
||||
if (!progress.completedStates.includes(stateCode)) {
|
||||
progress.completedStates.push(stateCode);
|
||||
}
|
||||
saveProgress(progress);
|
||||
|
||||
console.log(`\n[${stateCode}] COMPLETE:`);
|
||||
console.log(` Cities crawled: ${discoveryResult.citiesCrawled}`);
|
||||
console.log(` Locations found: ${discoveryResult.locationsFound}`);
|
||||
console.log(` Locations upserted: ${discoveryResult.locationsUpserted}`);
|
||||
console.log(` Duration: ${(discoveryResult.durationMs / 1000).toFixed(1)}s`);
|
||||
console.log(` Progress saved (${progress.completedStates.length}/${US_STATES.length} states)`);
|
||||
|
||||
if (discoveryResult.errors.length > 0) {
|
||||
console.log(` Errors: ${discoveryResult.errors.length}`);
|
||||
}
|
||||
|
||||
// Delay between states to avoid rate limiting
|
||||
if (i < statesToProcess.length - 1) {
|
||||
const delaySeconds = 5;
|
||||
console.log(`\n Waiting ${delaySeconds}s before next state...`);
|
||||
await new Promise((r) => setTimeout(r, delaySeconds * 1000));
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.error(`\n[${stateCode}] ERROR: ${error.message}`);
|
||||
results.push({
|
||||
stateCode,
|
||||
citiesCrawled: 0,
|
||||
locationsFound: 0,
|
||||
locationsUpserted: 0,
|
||||
durationMs: 0,
|
||||
errors: [error.message],
|
||||
});
|
||||
|
||||
// Continue to next state even on error
|
||||
await new Promise((r) => setTimeout(r, 3000));
|
||||
}
|
||||
}
|
||||
|
||||
// Print summary
|
||||
const totalDuration = Date.now() - startTime;
|
||||
const totalLocations = results.reduce((sum, r) => sum + r.locationsFound, 0);
|
||||
const totalUpserted = results.reduce((sum, r) => sum + r.locationsUpserted, 0);
|
||||
const totalCities = results.reduce((sum, r) => sum + r.citiesCrawled, 0);
|
||||
const statesWithErrors = results.filter((r) => r.errors.length > 0);
|
||||
|
||||
console.log('');
|
||||
console.log('='.repeat(70));
|
||||
console.log('DISCOVERY COMPLETE - SUMMARY');
|
||||
console.log('='.repeat(70));
|
||||
console.log(`Total states processed: ${results.length}`);
|
||||
console.log(`Total cities crawled: ${totalCities}`);
|
||||
console.log(`Total locations found: ${totalLocations}`);
|
||||
console.log(`Total locations upserted: ${totalUpserted}`);
|
||||
console.log(`Total duration: ${(totalDuration / 1000 / 60).toFixed(1)} minutes`);
|
||||
console.log('');
|
||||
|
||||
if (statesWithErrors.length > 0) {
|
||||
console.log('States with errors:');
|
||||
statesWithErrors.forEach((r) => {
|
||||
console.log(` ${r.stateCode}: ${r.errors.length} error(s)`);
|
||||
});
|
||||
console.log('');
|
||||
}
|
||||
|
||||
// Print per-state breakdown
|
||||
console.log('Per-state results:');
|
||||
console.log('-'.repeat(70));
|
||||
console.log('State\tCities\tFound\tUpserted\tDuration\tStatus');
|
||||
console.log('-'.repeat(70));
|
||||
|
||||
results.forEach((r) => {
|
||||
const status = r.errors.length > 0 ? 'ERRORS' : 'OK';
|
||||
const duration = (r.durationMs / 1000).toFixed(1) + 's';
|
||||
console.log(
|
||||
`${r.stateCode}\t${r.citiesCrawled}\t${r.locationsFound}\t${r.locationsUpserted}\t\t${duration}\t\t${status}`
|
||||
);
|
||||
});
|
||||
|
||||
// Final count from database
|
||||
console.log('');
|
||||
console.log('='.repeat(70));
|
||||
console.log('DATABASE TOTALS');
|
||||
console.log('='.repeat(70));
|
||||
|
||||
const { rows: locationCounts } = await pool.query(`
|
||||
SELECT
|
||||
state_code,
|
||||
COUNT(*) as count,
|
||||
COUNT(CASE WHEN status = 'discovered' THEN 1 END) as discovered,
|
||||
COUNT(CASE WHEN status = 'promoted' THEN 1 END) as promoted
|
||||
FROM dutchie_discovery_locations
|
||||
WHERE active = TRUE
|
||||
GROUP BY state_code
|
||||
ORDER BY count DESC
|
||||
`);
|
||||
|
||||
console.log('State\tTotal\tDiscovered\tPromoted');
|
||||
console.log('-'.repeat(50));
|
||||
locationCounts.forEach((row: any) => {
|
||||
console.log(`${row.state_code || 'N/A'}\t${row.count}\t${row.discovered}\t\t${row.promoted}`);
|
||||
});
|
||||
|
||||
const { rows: totalRow } = await pool.query(`
|
||||
SELECT COUNT(*) as total FROM dutchie_discovery_locations WHERE active = TRUE
|
||||
`);
|
||||
console.log('-'.repeat(50));
|
||||
console.log(`TOTAL: ${totalRow[0].total} locations in discovery table`);
|
||||
|
||||
const { rows: dispRow } = await pool.query(`
|
||||
SELECT COUNT(*) as total FROM dispensaries WHERE menu_type = 'dutchie'
|
||||
`);
|
||||
console.log(`DISPENSARIES: ${dispRow[0].total} Dutchie dispensaries in main table`);
|
||||
|
||||
// Clear progress file on successful completion of all states
|
||||
if (results.length === US_STATES.length || (savedProgress && progress.completedStates.length === US_STATES.length)) {
|
||||
clearProgress();
|
||||
console.log('\n[Progress] All states completed! Progress file cleared.');
|
||||
}
|
||||
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
console.error('Fatal error:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
173
backend/src/scripts/estimate-bandwidth.ts
Normal file
173
backend/src/scripts/estimate-bandwidth.ts
Normal file
@@ -0,0 +1,173 @@
|
||||
import axios from 'axios';
|
||||
import { Pool } from 'pg';
|
||||
|
||||
const DUTCHIE_GRAPHQL_URL = 'https://dutchie.com/graphql';
|
||||
|
||||
const MENU_PRODUCTS_QUERY = `
|
||||
query FilteredProducts($productsFilter: ProductFilterInput!) {
|
||||
filteredProducts(productsFilter: $productsFilter) {
|
||||
products {
|
||||
id
|
||||
name
|
||||
brand
|
||||
category
|
||||
subcategory
|
||||
strainType
|
||||
description
|
||||
image
|
||||
images {
|
||||
id
|
||||
url
|
||||
}
|
||||
posId
|
||||
potencyCbd {
|
||||
formatted
|
||||
range
|
||||
unit
|
||||
}
|
||||
potencyThc {
|
||||
formatted
|
||||
range
|
||||
unit
|
||||
}
|
||||
variants {
|
||||
id
|
||||
option
|
||||
price
|
||||
priceMed
|
||||
priceRec
|
||||
quantity
|
||||
specialPrice
|
||||
}
|
||||
status
|
||||
}
|
||||
}
|
||||
}
|
||||
`;
|
||||
|
||||
function formatBytes(bytes: number): string {
|
||||
if (bytes < 1024) return `${bytes} B`;
|
||||
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(2)} KB`;
|
||||
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(2)} MB`;
|
||||
return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
|
||||
}
|
||||
|
||||
async function measureRequest(dispensaryId: string, mode: 'A' | 'B') {
|
||||
const variables: any = {
|
||||
productsFilter: {
|
||||
dispensaryId,
|
||||
pricingType: 'rec',
|
||||
Status: mode === 'A' ? 'Active' : null,
|
||||
}
|
||||
};
|
||||
|
||||
const requestBody = JSON.stringify({
|
||||
query: MENU_PRODUCTS_QUERY,
|
||||
variables,
|
||||
});
|
||||
|
||||
const requestSize = Buffer.byteLength(requestBody, 'utf8');
|
||||
|
||||
try {
|
||||
const response = await axios.post(DUTCHIE_GRAPHQL_URL, requestBody, {
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
||||
'Origin': 'https://dutchie.com',
|
||||
},
|
||||
timeout: 30000,
|
||||
});
|
||||
|
||||
const responseSize = Buffer.byteLength(JSON.stringify(response.data), 'utf8');
|
||||
const productCount = response.data?.data?.filteredProducts?.products?.length || 0;
|
||||
|
||||
// Debug: show what we got
|
||||
if (productCount === 0) {
|
||||
console.log(` Response preview: ${JSON.stringify(response.data).slice(0, 300)}...`);
|
||||
}
|
||||
|
||||
return { requestSize, responseSize, productCount };
|
||||
} catch (error: any) {
|
||||
console.error(` Error: ${error.message}`);
|
||||
if (error.response) {
|
||||
console.error(` Status: ${error.response.status}`);
|
||||
console.error(` Data: ${JSON.stringify(error.response.data).slice(0, 200)}`);
|
||||
}
|
||||
return { requestSize, responseSize: 0, productCount: 0, error: error.message };
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const pool = new Pool({ connectionString: process.env.DATABASE_URL });
|
||||
|
||||
// Get one store with products (use a known good ID)
|
||||
const { rows } = await pool.query(`
|
||||
SELECT d.platform_dispensary_id, d.name, COUNT(sp.id) as product_count
|
||||
FROM dispensaries d
|
||||
LEFT JOIN store_products sp ON d.id = sp.dispensary_id
|
||||
WHERE d.platform_dispensary_id IS NOT NULL
|
||||
GROUP BY d.id
|
||||
ORDER BY product_count DESC
|
||||
LIMIT 1
|
||||
`);
|
||||
|
||||
if (rows.length === 0) {
|
||||
console.log('No crawlable stores found');
|
||||
await pool.end();
|
||||
return;
|
||||
}
|
||||
|
||||
const store = rows[0];
|
||||
console.log('=== Dutchie GraphQL Bandwidth for One Store ===\n');
|
||||
console.log(`Store: ${store.name}`);
|
||||
console.log(`Platform ID: ${store.platform_dispensary_id}`);
|
||||
console.log(`Products in DB: ${store.product_count || 'unknown'}\n`);
|
||||
|
||||
// Mode A (Active products with pricing)
|
||||
console.log('Fetching Mode A (Active products)...');
|
||||
const modeA = await measureRequest(store.platform_dispensary_id, 'A');
|
||||
|
||||
// Mode B (All products)
|
||||
console.log('Fetching Mode B (All products)...');
|
||||
const modeB = await measureRequest(store.platform_dispensary_id, 'B');
|
||||
|
||||
console.log('\n=== Results for ONE STORE ===');
|
||||
console.log('\nMode A (Active products with pricing):');
|
||||
console.log(` Request size: ${formatBytes(modeA.requestSize)}`);
|
||||
console.log(` Response size: ${formatBytes(modeA.responseSize)}`);
|
||||
console.log(` Products: ${modeA.productCount}`);
|
||||
if (modeA.productCount > 0) {
|
||||
console.log(` Per product: ${formatBytes(modeA.responseSize / modeA.productCount)}`);
|
||||
}
|
||||
|
||||
console.log('\nMode B (All products incl. OOS):');
|
||||
console.log(` Request size: ${formatBytes(modeB.requestSize)}`);
|
||||
console.log(` Response size: ${formatBytes(modeB.responseSize)}`);
|
||||
console.log(` Products: ${modeB.productCount}`);
|
||||
if (modeB.productCount > 0) {
|
||||
console.log(` Per product: ${formatBytes(modeB.responseSize / modeB.productCount)}`);
|
||||
}
|
||||
|
||||
console.log('\nDual-Mode Crawl (what we actually do):');
|
||||
const totalRequest = modeA.requestSize + modeB.requestSize;
|
||||
const totalResponse = modeA.responseSize + modeB.responseSize;
|
||||
const totalBandwidth = totalRequest + totalResponse;
|
||||
console.log(` Total request: ${formatBytes(totalRequest)}`);
|
||||
console.log(` Total response: ${formatBytes(totalResponse)}`);
|
||||
console.log(` TOTAL BANDWIDTH: ${formatBytes(totalBandwidth)}`);
|
||||
|
||||
// Per-product average
|
||||
const avgProducts = Math.max(modeA.productCount, modeB.productCount);
|
||||
const bytesPerProduct = avgProducts > 0 ? totalResponse / avgProducts : 0;
|
||||
|
||||
console.log('\n=== Quick Reference ===');
|
||||
console.log(`Average bytes per product: ~${formatBytes(bytesPerProduct)}`);
|
||||
console.log(`\nTypical store sizes:`);
|
||||
console.log(` Small (100 products): ~${formatBytes(bytesPerProduct * 100 + totalRequest)}`);
|
||||
console.log(` Medium (300 products): ~${formatBytes(bytesPerProduct * 300 + totalRequest)}`);
|
||||
console.log(` Large (500 products): ~${formatBytes(bytesPerProduct * 500 + totalRequest)}`);
|
||||
|
||||
await pool.end();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
137
backend/src/scripts/retry-platform-ids.ts
Normal file
137
backend/src/scripts/retry-platform-ids.ts
Normal file
@@ -0,0 +1,137 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
/**
|
||||
* Retry resolving platform IDs for Dutchie stores that have menu_url but no platform_dispensary_id
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx src/scripts/retry-platform-ids.ts
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import dotenv from 'dotenv';
|
||||
import { resolveDispensaryIdWithDetails } from '../platforms/dutchie/queries';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
const pool = new Pool({
|
||||
connectionString: process.env.DATABASE_URL ||
|
||||
`postgresql://${process.env.CANNAIQ_DB_USER || 'dutchie'}:${process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass'}@${process.env.CANNAIQ_DB_HOST || 'localhost'}:${process.env.CANNAIQ_DB_PORT || '54320'}/${process.env.CANNAIQ_DB_NAME || 'dutchie_menus'}`
|
||||
});
|
||||
|
||||
interface DispensaryRow {
|
||||
id: number;
|
||||
name: string;
|
||||
menu_url: string;
|
||||
}
|
||||
|
||||
function extractSlugFromUrl(menuUrl: string): string | null {
|
||||
// Extract slug from Dutchie URLs like:
|
||||
// https://dutchie.com/stores/Nirvana-North-Phoenix
|
||||
// https://dutchie.com/dispensary/curaleaf-dispensary-peoria
|
||||
// https://dutchie.com/embedded-menu/some-slug
|
||||
|
||||
const patterns = [
|
||||
/dutchie\.com\/stores\/([^/?]+)/i,
|
||||
/dutchie\.com\/dispensary\/([^/?]+)/i,
|
||||
/dutchie\.com\/embedded-menu\/([^/?]+)/i,
|
||||
];
|
||||
|
||||
for (const pattern of patterns) {
|
||||
const match = menuUrl.match(pattern);
|
||||
if (match) {
|
||||
return match[1];
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('Retry Platform ID Resolution');
|
||||
console.log('='.repeat(60));
|
||||
console.log('');
|
||||
|
||||
// Get Dutchie dispensaries with menu_url but no platform_dispensary_id
|
||||
const result = await pool.query<DispensaryRow>(`
|
||||
SELECT id, name, menu_url
|
||||
FROM dispensaries
|
||||
WHERE menu_type = 'dutchie'
|
||||
AND menu_url IS NOT NULL AND menu_url != ''
|
||||
AND (platform_dispensary_id IS NULL OR platform_dispensary_id = '')
|
||||
ORDER BY name
|
||||
`);
|
||||
|
||||
console.log(`Found ${result.rows.length} stores to retry\n`);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
console.log('No stores need platform ID resolution.');
|
||||
await pool.end();
|
||||
return;
|
||||
}
|
||||
|
||||
const successes: { id: number; name: string; platformId: string }[] = [];
|
||||
const failures: { id: number; name: string; slug: string | null; error: string }[] = [];
|
||||
|
||||
for (const row of result.rows) {
|
||||
console.log(`\n[${row.id}] ${row.name}`);
|
||||
console.log(` URL: ${row.menu_url}`);
|
||||
|
||||
const slug = extractSlugFromUrl(row.menu_url);
|
||||
if (!slug) {
|
||||
console.log(` ❌ Could not extract slug from URL`);
|
||||
failures.push({ id: row.id, name: row.name, slug: null, error: 'Could not extract slug' });
|
||||
continue;
|
||||
}
|
||||
|
||||
console.log(` Slug: ${slug}`);
|
||||
|
||||
try {
|
||||
const resolveResult = await resolveDispensaryIdWithDetails(slug);
|
||||
|
||||
if (resolveResult.dispensaryId) {
|
||||
console.log(` ✅ Resolved: ${resolveResult.dispensaryId}`);
|
||||
|
||||
// Update database
|
||||
await pool.query(
|
||||
'UPDATE dispensaries SET platform_dispensary_id = $1 WHERE id = $2',
|
||||
[resolveResult.dispensaryId, row.id]
|
||||
);
|
||||
console.log(` 💾 Updated database`);
|
||||
|
||||
successes.push({ id: row.id, name: row.name, platformId: resolveResult.dispensaryId });
|
||||
} else {
|
||||
const errorMsg = resolveResult.error || 'Unknown error';
|
||||
console.log(` ❌ Failed: ${errorMsg}`);
|
||||
failures.push({ id: row.id, name: row.name, slug, error: errorMsg });
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.log(` ❌ Error: ${error.message}`);
|
||||
failures.push({ id: row.id, name: row.name, slug, error: error.message });
|
||||
}
|
||||
|
||||
// Small delay between requests
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
}
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('SUMMARY');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
console.log(`\n✅ Successes (${successes.length}):`);
|
||||
for (const s of successes) {
|
||||
console.log(` [${s.id}] ${s.name} -> ${s.platformId}`);
|
||||
}
|
||||
|
||||
console.log(`\n❌ Failures (${failures.length}):`);
|
||||
for (const f of failures) {
|
||||
console.log(` [${f.id}] ${f.name} (slug: ${f.slug || 'N/A'})`);
|
||||
console.log(` ${f.error}`);
|
||||
}
|
||||
|
||||
await pool.end();
|
||||
}
|
||||
|
||||
main().catch(e => {
|
||||
console.error('Fatal error:', e);
|
||||
process.exit(1);
|
||||
});
|
||||
@@ -30,8 +30,8 @@ import {
|
||||
discoverState,
|
||||
getDiscoveryStats,
|
||||
seedKnownCities,
|
||||
ARIZONA_CITIES,
|
||||
} from '../discovery';
|
||||
import { getCitiesForState } from '../discovery/location-discovery';
|
||||
|
||||
// Parse command line arguments
|
||||
function parseArgs() {
|
||||
@@ -204,16 +204,22 @@ async function main() {
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
let cities: any[] = [];
|
||||
if (stateCode.toUpperCase() === 'AZ') {
|
||||
cities = ARIZONA_CITIES;
|
||||
} else {
|
||||
console.error(`No predefined cities for state: ${stateCode}`);
|
||||
console.error('Add cities to city-discovery.ts ARIZONA_CITIES array (or add new state arrays)');
|
||||
// Dynamically fetch cities from Dutchie
|
||||
console.log(`\nFetching cities for ${stateCode} from Dutchie...\n`);
|
||||
const cityNames = await getCitiesForState(stateCode.toUpperCase());
|
||||
|
||||
if (cityNames.length === 0) {
|
||||
console.error(`No cities found for state: ${stateCode}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`\nSeeding ${cities.length} cities for ${stateCode}...\n`);
|
||||
const cities = cityNames.map(name => ({
|
||||
name,
|
||||
slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''),
|
||||
stateCode: stateCode.toUpperCase(),
|
||||
}));
|
||||
|
||||
console.log(`Seeding ${cities.length} cities for ${stateCode}...\n`);
|
||||
const result = await seedKnownCities(pool, cities);
|
||||
console.log(`Created: ${result.created} new cities`);
|
||||
console.log(`Updated: ${result.updated} existing cities`);
|
||||
|
||||
271
backend/src/scripts/test-crawl-to-canonical.ts
Normal file
271
backend/src/scripts/test-crawl-to-canonical.ts
Normal file
@@ -0,0 +1,271 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
/**
|
||||
* Test Script: Crawl a single dispensary and write to canonical tables
|
||||
*
|
||||
* This script:
|
||||
* 1. Fetches products from Dutchie GraphQL
|
||||
* 2. Normalizes via DutchieNormalizer
|
||||
* 3. Writes to store_products, product_variants, snapshots via hydrateToCanonical
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx src/scripts/test-crawl-to-canonical.ts <dispensaryId>
|
||||
* npx tsx src/scripts/test-crawl-to-canonical.ts 235
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import dotenv from 'dotenv';
|
||||
import {
|
||||
executeGraphQL,
|
||||
GRAPHQL_HASHES,
|
||||
DUTCHIE_CONFIG,
|
||||
} from '../platforms/dutchie';
|
||||
import {
|
||||
DutchieNormalizer,
|
||||
hydrateToCanonical,
|
||||
} from '../hydration';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
// ============================================================
|
||||
// DATABASE CONNECTION
|
||||
// ============================================================
|
||||
|
||||
function getConnectionString(): string {
|
||||
if (process.env.CANNAIQ_DB_URL) {
|
||||
return process.env.CANNAIQ_DB_URL;
|
||||
}
|
||||
if (process.env.DATABASE_URL) {
|
||||
return process.env.DATABASE_URL;
|
||||
}
|
||||
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
|
||||
const port = process.env.CANNAIQ_DB_PORT || '54320';
|
||||
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
|
||||
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
|
||||
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
|
||||
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
|
||||
}
|
||||
|
||||
const pool = new Pool({ connectionString: getConnectionString() });
|
||||
|
||||
// ============================================================
|
||||
// FETCH PRODUCTS FROM DUTCHIE
|
||||
// ============================================================
|
||||
|
||||
interface FetchResult {
|
||||
products: any[];
|
||||
totalPages: number;
|
||||
totalProducts: number;
|
||||
}
|
||||
|
||||
async function fetchAllProducts(platformDispensaryId: string, cName: string): Promise<FetchResult> {
|
||||
const allProducts: any[] = [];
|
||||
let page = 0;
|
||||
let totalPages = 1;
|
||||
let totalProducts = 0;
|
||||
|
||||
console.log(`[Fetch] Starting fetch for ${platformDispensaryId} (cName: ${cName})`);
|
||||
|
||||
while (page < totalPages && page < DUTCHIE_CONFIG.maxPages) {
|
||||
const variables = {
|
||||
includeEnterpriseSpecials: false,
|
||||
productsFilter: {
|
||||
dispensaryId: platformDispensaryId,
|
||||
pricingType: 'rec',
|
||||
Status: 'Active', // 'Active' = in-stock products with pricing
|
||||
types: [],
|
||||
useCache: true,
|
||||
isDefaultSort: true,
|
||||
sortBy: 'popularSortIdx',
|
||||
sortDirection: 1,
|
||||
bypassOnlineThresholds: true,
|
||||
isKioskMenu: false,
|
||||
removeProductsBelowOptionThresholds: false,
|
||||
},
|
||||
page,
|
||||
perPage: DUTCHIE_CONFIG.perPage,
|
||||
};
|
||||
|
||||
try {
|
||||
const result = await executeGraphQL(
|
||||
'FilteredProducts',
|
||||
variables,
|
||||
GRAPHQL_HASHES.FilteredProducts,
|
||||
{ cName, maxRetries: 3 }
|
||||
);
|
||||
|
||||
const data = result?.data?.filteredProducts;
|
||||
if (!data) {
|
||||
console.error(`[Fetch] No data returned for page ${page}`);
|
||||
break;
|
||||
}
|
||||
|
||||
const products = data.products || [];
|
||||
totalProducts = data.queryInfo?.totalCount || 0;
|
||||
totalPages = Math.ceil(totalProducts / DUTCHIE_CONFIG.perPage);
|
||||
|
||||
allProducts.push(...products);
|
||||
console.log(`[Fetch] Page ${page + 1}/${totalPages}: ${products.length} products (total so far: ${allProducts.length})`);
|
||||
|
||||
page++;
|
||||
|
||||
if (page < totalPages) {
|
||||
await new Promise(r => setTimeout(r, DUTCHIE_CONFIG.pageDelayMs));
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.error(`[Fetch] Error on page ${page}: ${error.message}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return { products: allProducts, totalPages, totalProducts };
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// MAIN
|
||||
// ============================================================
|
||||
|
||||
async function main() {
|
||||
const dispensaryId = parseInt(process.argv[2], 10);
|
||||
|
||||
if (!dispensaryId) {
|
||||
console.error('Usage: npx tsx src/scripts/test-crawl-to-canonical.ts <dispensaryId>');
|
||||
console.error('Example: npx tsx src/scripts/test-crawl-to-canonical.ts 235');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log('============================================================');
|
||||
console.log(`Test Crawl to Canonical - Dispensary ${dispensaryId}`);
|
||||
console.log('============================================================\n');
|
||||
|
||||
try {
|
||||
// Step 1: Get dispensary info
|
||||
console.log('[Step 1] Getting dispensary info...');
|
||||
const dispResult = await pool.query(`
|
||||
SELECT id, name, platform_dispensary_id, menu_url
|
||||
FROM dispensaries
|
||||
WHERE id = $1
|
||||
`, [dispensaryId]);
|
||||
|
||||
if (dispResult.rows.length === 0) {
|
||||
throw new Error(`Dispensary ${dispensaryId} not found`);
|
||||
}
|
||||
|
||||
const disp = dispResult.rows[0];
|
||||
console.log(` Name: ${disp.name}`);
|
||||
console.log(` Platform ID: ${disp.platform_dispensary_id}`);
|
||||
console.log(` Menu URL: ${disp.menu_url}`);
|
||||
|
||||
if (!disp.platform_dispensary_id) {
|
||||
throw new Error('Dispensary does not have a platform_dispensary_id');
|
||||
}
|
||||
|
||||
// Extract cName from menu_url
|
||||
const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
|
||||
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
|
||||
console.log(` cName: ${cName}\n`);
|
||||
|
||||
// Step 2: Fetch products from Dutchie
|
||||
console.log('[Step 2] Fetching products from Dutchie GraphQL...');
|
||||
const fetchResult = await fetchAllProducts(disp.platform_dispensary_id, cName);
|
||||
console.log(` Total products fetched: ${fetchResult.products.length}\n`);
|
||||
|
||||
if (fetchResult.products.length === 0) {
|
||||
console.log('No products fetched. Exiting.');
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
// Step 3: Normalize
|
||||
console.log('[Step 3] Normalizing products...');
|
||||
const normalizer = new DutchieNormalizer();
|
||||
|
||||
// Construct a RawPayload structure that the normalizer expects
|
||||
// The normalizer.normalize() expects: { raw_json, dispensary_id, ... }
|
||||
const rawPayloadForValidation = {
|
||||
products: fetchResult.products,
|
||||
queryInfo: {
|
||||
totalCount: fetchResult.totalProducts,
|
||||
},
|
||||
};
|
||||
|
||||
const validation = normalizer.validatePayload(rawPayloadForValidation);
|
||||
if (!validation.valid) {
|
||||
console.error(` Validation failed: ${validation.errors?.join(', ')}`);
|
||||
process.exit(1);
|
||||
}
|
||||
console.log(` Validation: PASS`);
|
||||
|
||||
// Build proper RawPayload for normalize()
|
||||
const rawPayload = {
|
||||
id: `test-${Date.now()}`,
|
||||
dispensary_id: dispensaryId,
|
||||
crawl_run_id: null,
|
||||
platform: 'dutchie',
|
||||
payload_version: 1,
|
||||
raw_json: rawPayloadForValidation,
|
||||
product_count: fetchResult.totalProducts,
|
||||
pricing_type: 'rec',
|
||||
crawl_mode: 'active',
|
||||
fetched_at: new Date(),
|
||||
processed: false,
|
||||
normalized_at: null,
|
||||
hydration_error: null,
|
||||
hydration_attempts: 0,
|
||||
created_at: new Date(),
|
||||
};
|
||||
|
||||
const normResult = normalizer.normalize(rawPayload);
|
||||
console.log(` Normalized products: ${normResult.products.length}`);
|
||||
console.log(` Brands extracted: ${normResult.brands.length}`);
|
||||
console.log(` Sample product: ${normResult.products[0]?.name}\n`);
|
||||
|
||||
// Step 4: Write to canonical tables
|
||||
console.log('[Step 4] Writing to canonical tables via hydrateToCanonical...');
|
||||
const hydrateResult = await hydrateToCanonical(
|
||||
pool,
|
||||
dispensaryId,
|
||||
normResult,
|
||||
null // no crawl_run_id for this test
|
||||
);
|
||||
|
||||
console.log(` Products upserted: ${hydrateResult.productsUpserted}`);
|
||||
console.log(` Products new: ${hydrateResult.productsNew}`);
|
||||
console.log(` Snapshots created: ${hydrateResult.snapshotsCreated}`);
|
||||
console.log(` Variants upserted: ${hydrateResult.variantsUpserted}`);
|
||||
console.log(` Brands created: ${hydrateResult.brandsCreated}\n`);
|
||||
|
||||
// Step 5: Verify
|
||||
console.log('[Step 5] Verifying data in canonical tables...');
|
||||
|
||||
const productCount = await pool.query(`
|
||||
SELECT COUNT(*) as count FROM store_products WHERE dispensary_id = $1
|
||||
`, [dispensaryId]);
|
||||
console.log(` store_products count: ${productCount.rows[0].count}`);
|
||||
|
||||
const variantCount = await pool.query(`
|
||||
SELECT COUNT(*) as count FROM product_variants WHERE dispensary_id = $1
|
||||
`, [dispensaryId]);
|
||||
console.log(` product_variants count: ${variantCount.rows[0].count}`);
|
||||
|
||||
const snapshotCount = await pool.query(`
|
||||
SELECT COUNT(*) as count FROM store_product_snapshots WHERE dispensary_id = $1
|
||||
`, [dispensaryId]);
|
||||
console.log(` store_product_snapshots count: ${snapshotCount.rows[0].count}`);
|
||||
|
||||
console.log('\n============================================================');
|
||||
console.log('SUCCESS - Crawl and hydration complete!');
|
||||
console.log('============================================================');
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('\n============================================================');
|
||||
console.error('ERROR:', error.message);
|
||||
console.error('============================================================');
|
||||
if (error.stack) {
|
||||
console.error(error.stack);
|
||||
}
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
Reference in New Issue
Block a user