feat: SEO template library, discovery pipeline, and orchestrator enhancements

## SEO Template Library
- Add complete template library with 7 page types (state, city, category, brand, product, search, regeneration)
- Add Template Library tab in SEO Orchestrator with accordion-based editors
- Add template preview, validation, and variable injection engine
- Add API endpoints: /api/seo/templates, preview, validate, generate, regenerate

## Discovery Pipeline
- Add promotion.ts for discovery location validation and promotion
- Add discover-all-states.ts script for multi-state discovery
- Add promotion log migration (067)
- Enhance discovery routes and types

## Orchestrator & Admin
- Add crawl_enabled filter to stores page
- Add API permissions page
- Add job queue management
- Add price analytics routes
- Add markets and intelligence routes
- Enhance dashboard and worker monitoring

## Infrastructure
- Add migrations for worker definitions, SEO settings, field alignment
- Add canonical pipeline for scraper v2
- Update hydration and sync orchestrator
- Enhance multi-state query service

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-09 00:05:34 -07:00
parent 9711d594db
commit 2f483b3084
83 changed files with 16700 additions and 1277 deletions

View File

@@ -0,0 +1,385 @@
#!/usr/bin/env npx tsx
/**
* Discover All States - Sequential State-by-State Dutchie Discovery
*
* This script discovers all Dutchie dispensaries for every US state,
* processing one state at a time with delays between states.
*
* Progress is automatically saved to /tmp/discovery-progress.json
* so the script can resume from where it left off if interrupted.
*
* Usage:
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --dry-run
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --start-from CA
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --resume
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --reset # Clear progress, start fresh
*
* Options:
* --dry-run Don't save to database, just show what would happen
* --start-from Start from a specific state (skip earlier states)
* --states Comma-separated list of specific states to run (e.g., AZ,CA,CO)
* --verbose Show detailed output
* --resume Auto-resume from last saved progress (default if progress file exists)
* --reset Clear progress file and start fresh
*/
import { Pool } from 'pg';
import * as fs from 'fs';
import * as path from 'path';
const PROGRESS_FILE = '/tmp/discovery-progress.json';
interface ProgressData {
lastCompletedState: string | null;
lastCompletedIndex: number;
startedAt: string;
updatedAt: string;
completedStates: string[];
}
function loadProgress(): ProgressData | null {
try {
if (fs.existsSync(PROGRESS_FILE)) {
const data = JSON.parse(fs.readFileSync(PROGRESS_FILE, 'utf-8'));
return data;
}
} catch (e) {
console.warn('[Progress] Could not load progress file:', e);
}
return null;
}
function saveProgress(progress: ProgressData): void {
try {
progress.updatedAt = new Date().toISOString();
fs.writeFileSync(PROGRESS_FILE, JSON.stringify(progress, null, 2));
} catch (e) {
console.warn('[Progress] Could not save progress:', e);
}
}
function clearProgress(): void {
try {
if (fs.existsSync(PROGRESS_FILE)) {
fs.unlinkSync(PROGRESS_FILE);
console.log('[Progress] Cleared progress file');
}
} catch (e) {
console.warn('[Progress] Could not clear progress:', e);
}
}
import { discoverState } from '../discovery';
// US states with legal cannabis (medical or recreational)
// Ordered roughly by market size / likelihood of Dutchie presence
const US_STATES = [
'AZ', // Arizona
'CA', // California
'CO', // Colorado
'FL', // Florida
'IL', // Illinois
'MA', // Massachusetts
'MI', // Michigan
'NV', // Nevada
'NJ', // New Jersey
'NY', // New York
'OH', // Ohio
'OR', // Oregon
'PA', // Pennsylvania
'WA', // Washington
'MD', // Maryland
'MO', // Missouri
'CT', // Connecticut
'NM', // New Mexico
'ME', // Maine
'VT', // Vermont
'MT', // Montana
'AK', // Alaska
'OK', // Oklahoma
'AR', // Arkansas
'ND', // North Dakota
'SD', // South Dakota
'MN', // Minnesota
'NH', // New Hampshire
'RI', // Rhode Island
'DE', // Delaware
'HI', // Hawaii
'WV', // West Virginia
'LA', // Louisiana
'UT', // Utah
'VA', // Virginia
'DC', // District of Columbia
];
interface DiscoveryResult {
stateCode: string;
citiesCrawled: number;
locationsFound: number;
locationsUpserted: number;
durationMs: number;
errors: string[];
}
function parseArgs() {
const args = process.argv.slice(2);
const flags: Record<string, string | boolean> = {};
for (let i = 0; i < args.length; i++) {
const arg = args[i];
if (arg.startsWith('--')) {
const [key, value] = arg.slice(2).split('=');
if (value !== undefined) {
flags[key] = value;
} else if (args[i + 1] && !args[i + 1].startsWith('--')) {
flags[key] = args[i + 1];
i++;
} else {
flags[key] = true;
}
}
}
return flags;
}
async function main() {
const flags = parseArgs();
const dryRun = Boolean(flags['dry-run']);
const verbose = Boolean(flags.verbose);
const reset = Boolean(flags.reset);
const resume = Boolean(flags.resume);
let startFrom = flags['start-from'] as string | undefined;
const specificStates = flags.states
? (flags.states as string).split(',').map((s) => s.trim().toUpperCase())
: null;
// Handle reset flag
if (reset) {
clearProgress();
}
// Determine which states to process
let statesToProcess = specificStates || US_STATES;
// Check for saved progress (auto-resume unless --reset or --start-from specified)
const savedProgress = loadProgress();
if (savedProgress && !reset && !startFrom && !specificStates) {
const nextIndex = savedProgress.lastCompletedIndex + 1;
if (nextIndex < US_STATES.length) {
startFrom = US_STATES[nextIndex];
console.log(`[Progress] Resuming from saved progress`);
console.log(`[Progress] Last completed: ${savedProgress.lastCompletedState} (${savedProgress.completedStates.length} states done)`);
console.log(`[Progress] Started at: ${savedProgress.startedAt}`);
console.log(`[Progress] Last update: ${savedProgress.updatedAt}`);
console.log('');
} else {
console.log(`[Progress] All states already completed! Use --reset to start over.`);
process.exit(0);
}
}
if (startFrom) {
const startIndex = statesToProcess.indexOf(startFrom.toUpperCase());
if (startIndex === -1) {
console.error(`ERROR: State ${startFrom} not found in list`);
process.exit(1);
}
statesToProcess = statesToProcess.slice(startIndex);
console.log(`Starting from ${startFrom}, ${statesToProcess.length} states remaining`);
}
// Initialize progress tracking
let progress: ProgressData = savedProgress || {
lastCompletedState: null,
lastCompletedIndex: -1,
startedAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
completedStates: [],
};
console.log('='.repeat(70));
console.log('DUTCHIE ALL-STATES DISCOVERY');
console.log('='.repeat(70));
console.log(`Mode: ${dryRun ? 'DRY RUN' : 'LIVE'}`);
console.log(`States to process: ${statesToProcess.length}`);
console.log(`States: ${statesToProcess.join(', ')}`);
console.log('');
// Create database pool
const connectionString = process.env.DATABASE_URL;
if (!connectionString) {
console.error('ERROR: DATABASE_URL environment variable is required');
process.exit(1);
}
const pool = new Pool({ connectionString });
const results: DiscoveryResult[] = [];
const startTime = Date.now();
try {
for (let i = 0; i < statesToProcess.length; i++) {
const stateCode = statesToProcess[i];
console.log('');
console.log('─'.repeat(70));
console.log(`[${i + 1}/${statesToProcess.length}] Discovering ${stateCode}...`);
console.log('─'.repeat(70));
try {
const result = await discoverState(pool, stateCode, {
dryRun,
verbose,
cityLimit: 200, // Allow up to 200 cities per state
});
const discoveryResult: DiscoveryResult = {
stateCode,
citiesCrawled: result.locations.length,
locationsFound: result.totalLocationsFound,
locationsUpserted: result.totalLocationsUpserted,
durationMs: result.durationMs,
errors: [],
};
// Collect errors from city results
result.locations.forEach((loc) => {
if (loc.errors && loc.errors.length > 0) {
discoveryResult.errors.push(...loc.errors);
}
});
results.push(discoveryResult);
// Save progress after each successful state
const stateIndex = US_STATES.indexOf(stateCode);
progress.lastCompletedState = stateCode;
progress.lastCompletedIndex = stateIndex;
if (!progress.completedStates.includes(stateCode)) {
progress.completedStates.push(stateCode);
}
saveProgress(progress);
console.log(`\n[${stateCode}] COMPLETE:`);
console.log(` Cities crawled: ${discoveryResult.citiesCrawled}`);
console.log(` Locations found: ${discoveryResult.locationsFound}`);
console.log(` Locations upserted: ${discoveryResult.locationsUpserted}`);
console.log(` Duration: ${(discoveryResult.durationMs / 1000).toFixed(1)}s`);
console.log(` Progress saved (${progress.completedStates.length}/${US_STATES.length} states)`);
if (discoveryResult.errors.length > 0) {
console.log(` Errors: ${discoveryResult.errors.length}`);
}
// Delay between states to avoid rate limiting
if (i < statesToProcess.length - 1) {
const delaySeconds = 5;
console.log(`\n Waiting ${delaySeconds}s before next state...`);
await new Promise((r) => setTimeout(r, delaySeconds * 1000));
}
} catch (error: any) {
console.error(`\n[${stateCode}] ERROR: ${error.message}`);
results.push({
stateCode,
citiesCrawled: 0,
locationsFound: 0,
locationsUpserted: 0,
durationMs: 0,
errors: [error.message],
});
// Continue to next state even on error
await new Promise((r) => setTimeout(r, 3000));
}
}
// Print summary
const totalDuration = Date.now() - startTime;
const totalLocations = results.reduce((sum, r) => sum + r.locationsFound, 0);
const totalUpserted = results.reduce((sum, r) => sum + r.locationsUpserted, 0);
const totalCities = results.reduce((sum, r) => sum + r.citiesCrawled, 0);
const statesWithErrors = results.filter((r) => r.errors.length > 0);
console.log('');
console.log('='.repeat(70));
console.log('DISCOVERY COMPLETE - SUMMARY');
console.log('='.repeat(70));
console.log(`Total states processed: ${results.length}`);
console.log(`Total cities crawled: ${totalCities}`);
console.log(`Total locations found: ${totalLocations}`);
console.log(`Total locations upserted: ${totalUpserted}`);
console.log(`Total duration: ${(totalDuration / 1000 / 60).toFixed(1)} minutes`);
console.log('');
if (statesWithErrors.length > 0) {
console.log('States with errors:');
statesWithErrors.forEach((r) => {
console.log(` ${r.stateCode}: ${r.errors.length} error(s)`);
});
console.log('');
}
// Print per-state breakdown
console.log('Per-state results:');
console.log('-'.repeat(70));
console.log('State\tCities\tFound\tUpserted\tDuration\tStatus');
console.log('-'.repeat(70));
results.forEach((r) => {
const status = r.errors.length > 0 ? 'ERRORS' : 'OK';
const duration = (r.durationMs / 1000).toFixed(1) + 's';
console.log(
`${r.stateCode}\t${r.citiesCrawled}\t${r.locationsFound}\t${r.locationsUpserted}\t\t${duration}\t\t${status}`
);
});
// Final count from database
console.log('');
console.log('='.repeat(70));
console.log('DATABASE TOTALS');
console.log('='.repeat(70));
const { rows: locationCounts } = await pool.query(`
SELECT
state_code,
COUNT(*) as count,
COUNT(CASE WHEN status = 'discovered' THEN 1 END) as discovered,
COUNT(CASE WHEN status = 'promoted' THEN 1 END) as promoted
FROM dutchie_discovery_locations
WHERE active = TRUE
GROUP BY state_code
ORDER BY count DESC
`);
console.log('State\tTotal\tDiscovered\tPromoted');
console.log('-'.repeat(50));
locationCounts.forEach((row: any) => {
console.log(`${row.state_code || 'N/A'}\t${row.count}\t${row.discovered}\t\t${row.promoted}`);
});
const { rows: totalRow } = await pool.query(`
SELECT COUNT(*) as total FROM dutchie_discovery_locations WHERE active = TRUE
`);
console.log('-'.repeat(50));
console.log(`TOTAL: ${totalRow[0].total} locations in discovery table`);
const { rows: dispRow } = await pool.query(`
SELECT COUNT(*) as total FROM dispensaries WHERE menu_type = 'dutchie'
`);
console.log(`DISPENSARIES: ${dispRow[0].total} Dutchie dispensaries in main table`);
// Clear progress file on successful completion of all states
if (results.length === US_STATES.length || (savedProgress && progress.completedStates.length === US_STATES.length)) {
clearProgress();
console.log('\n[Progress] All states completed! Progress file cleared.');
}
} finally {
await pool.end();
}
}
main().catch((error) => {
console.error('Fatal error:', error);
process.exit(1);
});

View File

@@ -0,0 +1,173 @@
import axios from 'axios';
import { Pool } from 'pg';
const DUTCHIE_GRAPHQL_URL = 'https://dutchie.com/graphql';
const MENU_PRODUCTS_QUERY = `
query FilteredProducts($productsFilter: ProductFilterInput!) {
filteredProducts(productsFilter: $productsFilter) {
products {
id
name
brand
category
subcategory
strainType
description
image
images {
id
url
}
posId
potencyCbd {
formatted
range
unit
}
potencyThc {
formatted
range
unit
}
variants {
id
option
price
priceMed
priceRec
quantity
specialPrice
}
status
}
}
}
`;
function formatBytes(bytes: number): string {
if (bytes < 1024) return `${bytes} B`;
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(2)} KB`;
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(2)} MB`;
return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
}
async function measureRequest(dispensaryId: string, mode: 'A' | 'B') {
const variables: any = {
productsFilter: {
dispensaryId,
pricingType: 'rec',
Status: mode === 'A' ? 'Active' : null,
}
};
const requestBody = JSON.stringify({
query: MENU_PRODUCTS_QUERY,
variables,
});
const requestSize = Buffer.byteLength(requestBody, 'utf8');
try {
const response = await axios.post(DUTCHIE_GRAPHQL_URL, requestBody, {
headers: {
'Content-Type': 'application/json',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Origin': 'https://dutchie.com',
},
timeout: 30000,
});
const responseSize = Buffer.byteLength(JSON.stringify(response.data), 'utf8');
const productCount = response.data?.data?.filteredProducts?.products?.length || 0;
// Debug: show what we got
if (productCount === 0) {
console.log(` Response preview: ${JSON.stringify(response.data).slice(0, 300)}...`);
}
return { requestSize, responseSize, productCount };
} catch (error: any) {
console.error(` Error: ${error.message}`);
if (error.response) {
console.error(` Status: ${error.response.status}`);
console.error(` Data: ${JSON.stringify(error.response.data).slice(0, 200)}`);
}
return { requestSize, responseSize: 0, productCount: 0, error: error.message };
}
}
async function main() {
const pool = new Pool({ connectionString: process.env.DATABASE_URL });
// Get one store with products (use a known good ID)
const { rows } = await pool.query(`
SELECT d.platform_dispensary_id, d.name, COUNT(sp.id) as product_count
FROM dispensaries d
LEFT JOIN store_products sp ON d.id = sp.dispensary_id
WHERE d.platform_dispensary_id IS NOT NULL
GROUP BY d.id
ORDER BY product_count DESC
LIMIT 1
`);
if (rows.length === 0) {
console.log('No crawlable stores found');
await pool.end();
return;
}
const store = rows[0];
console.log('=== Dutchie GraphQL Bandwidth for One Store ===\n');
console.log(`Store: ${store.name}`);
console.log(`Platform ID: ${store.platform_dispensary_id}`);
console.log(`Products in DB: ${store.product_count || 'unknown'}\n`);
// Mode A (Active products with pricing)
console.log('Fetching Mode A (Active products)...');
const modeA = await measureRequest(store.platform_dispensary_id, 'A');
// Mode B (All products)
console.log('Fetching Mode B (All products)...');
const modeB = await measureRequest(store.platform_dispensary_id, 'B');
console.log('\n=== Results for ONE STORE ===');
console.log('\nMode A (Active products with pricing):');
console.log(` Request size: ${formatBytes(modeA.requestSize)}`);
console.log(` Response size: ${formatBytes(modeA.responseSize)}`);
console.log(` Products: ${modeA.productCount}`);
if (modeA.productCount > 0) {
console.log(` Per product: ${formatBytes(modeA.responseSize / modeA.productCount)}`);
}
console.log('\nMode B (All products incl. OOS):');
console.log(` Request size: ${formatBytes(modeB.requestSize)}`);
console.log(` Response size: ${formatBytes(modeB.responseSize)}`);
console.log(` Products: ${modeB.productCount}`);
if (modeB.productCount > 0) {
console.log(` Per product: ${formatBytes(modeB.responseSize / modeB.productCount)}`);
}
console.log('\nDual-Mode Crawl (what we actually do):');
const totalRequest = modeA.requestSize + modeB.requestSize;
const totalResponse = modeA.responseSize + modeB.responseSize;
const totalBandwidth = totalRequest + totalResponse;
console.log(` Total request: ${formatBytes(totalRequest)}`);
console.log(` Total response: ${formatBytes(totalResponse)}`);
console.log(` TOTAL BANDWIDTH: ${formatBytes(totalBandwidth)}`);
// Per-product average
const avgProducts = Math.max(modeA.productCount, modeB.productCount);
const bytesPerProduct = avgProducts > 0 ? totalResponse / avgProducts : 0;
console.log('\n=== Quick Reference ===');
console.log(`Average bytes per product: ~${formatBytes(bytesPerProduct)}`);
console.log(`\nTypical store sizes:`);
console.log(` Small (100 products): ~${formatBytes(bytesPerProduct * 100 + totalRequest)}`);
console.log(` Medium (300 products): ~${formatBytes(bytesPerProduct * 300 + totalRequest)}`);
console.log(` Large (500 products): ~${formatBytes(bytesPerProduct * 500 + totalRequest)}`);
await pool.end();
}
main().catch(console.error);

View File

@@ -0,0 +1,137 @@
#!/usr/bin/env npx tsx
/**
* Retry resolving platform IDs for Dutchie stores that have menu_url but no platform_dispensary_id
*
* Usage:
* npx tsx src/scripts/retry-platform-ids.ts
*/
import { Pool } from 'pg';
import dotenv from 'dotenv';
import { resolveDispensaryIdWithDetails } from '../platforms/dutchie/queries';
dotenv.config();
const pool = new Pool({
connectionString: process.env.DATABASE_URL ||
`postgresql://${process.env.CANNAIQ_DB_USER || 'dutchie'}:${process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass'}@${process.env.CANNAIQ_DB_HOST || 'localhost'}:${process.env.CANNAIQ_DB_PORT || '54320'}/${process.env.CANNAIQ_DB_NAME || 'dutchie_menus'}`
});
interface DispensaryRow {
id: number;
name: string;
menu_url: string;
}
function extractSlugFromUrl(menuUrl: string): string | null {
// Extract slug from Dutchie URLs like:
// https://dutchie.com/stores/Nirvana-North-Phoenix
// https://dutchie.com/dispensary/curaleaf-dispensary-peoria
// https://dutchie.com/embedded-menu/some-slug
const patterns = [
/dutchie\.com\/stores\/([^/?]+)/i,
/dutchie\.com\/dispensary\/([^/?]+)/i,
/dutchie\.com\/embedded-menu\/([^/?]+)/i,
];
for (const pattern of patterns) {
const match = menuUrl.match(pattern);
if (match) {
return match[1];
}
}
return null;
}
async function main() {
console.log('='.repeat(60));
console.log('Retry Platform ID Resolution');
console.log('='.repeat(60));
console.log('');
// Get Dutchie dispensaries with menu_url but no platform_dispensary_id
const result = await pool.query<DispensaryRow>(`
SELECT id, name, menu_url
FROM dispensaries
WHERE menu_type = 'dutchie'
AND menu_url IS NOT NULL AND menu_url != ''
AND (platform_dispensary_id IS NULL OR platform_dispensary_id = '')
ORDER BY name
`);
console.log(`Found ${result.rows.length} stores to retry\n`);
if (result.rows.length === 0) {
console.log('No stores need platform ID resolution.');
await pool.end();
return;
}
const successes: { id: number; name: string; platformId: string }[] = [];
const failures: { id: number; name: string; slug: string | null; error: string }[] = [];
for (const row of result.rows) {
console.log(`\n[${row.id}] ${row.name}`);
console.log(` URL: ${row.menu_url}`);
const slug = extractSlugFromUrl(row.menu_url);
if (!slug) {
console.log(` ❌ Could not extract slug from URL`);
failures.push({ id: row.id, name: row.name, slug: null, error: 'Could not extract slug' });
continue;
}
console.log(` Slug: ${slug}`);
try {
const resolveResult = await resolveDispensaryIdWithDetails(slug);
if (resolveResult.dispensaryId) {
console.log(` ✅ Resolved: ${resolveResult.dispensaryId}`);
// Update database
await pool.query(
'UPDATE dispensaries SET platform_dispensary_id = $1 WHERE id = $2',
[resolveResult.dispensaryId, row.id]
);
console.log(` 💾 Updated database`);
successes.push({ id: row.id, name: row.name, platformId: resolveResult.dispensaryId });
} else {
const errorMsg = resolveResult.error || 'Unknown error';
console.log(` ❌ Failed: ${errorMsg}`);
failures.push({ id: row.id, name: row.name, slug, error: errorMsg });
}
} catch (error: any) {
console.log(` ❌ Error: ${error.message}`);
failures.push({ id: row.id, name: row.name, slug, error: error.message });
}
// Small delay between requests
await new Promise(r => setTimeout(r, 500));
}
console.log('\n' + '='.repeat(60));
console.log('SUMMARY');
console.log('='.repeat(60));
console.log(`\n✅ Successes (${successes.length}):`);
for (const s of successes) {
console.log(` [${s.id}] ${s.name} -> ${s.platformId}`);
}
console.log(`\n❌ Failures (${failures.length}):`);
for (const f of failures) {
console.log(` [${f.id}] ${f.name} (slug: ${f.slug || 'N/A'})`);
console.log(` ${f.error}`);
}
await pool.end();
}
main().catch(e => {
console.error('Fatal error:', e);
process.exit(1);
});

View File

@@ -30,8 +30,8 @@ import {
discoverState,
getDiscoveryStats,
seedKnownCities,
ARIZONA_CITIES,
} from '../discovery';
import { getCitiesForState } from '../discovery/location-discovery';
// Parse command line arguments
function parseArgs() {
@@ -204,16 +204,22 @@ async function main() {
process.exit(1);
}
let cities: any[] = [];
if (stateCode.toUpperCase() === 'AZ') {
cities = ARIZONA_CITIES;
} else {
console.error(`No predefined cities for state: ${stateCode}`);
console.error('Add cities to city-discovery.ts ARIZONA_CITIES array (or add new state arrays)');
// Dynamically fetch cities from Dutchie
console.log(`\nFetching cities for ${stateCode} from Dutchie...\n`);
const cityNames = await getCitiesForState(stateCode.toUpperCase());
if (cityNames.length === 0) {
console.error(`No cities found for state: ${stateCode}`);
process.exit(1);
}
console.log(`\nSeeding ${cities.length} cities for ${stateCode}...\n`);
const cities = cityNames.map(name => ({
name,
slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''),
stateCode: stateCode.toUpperCase(),
}));
console.log(`Seeding ${cities.length} cities for ${stateCode}...\n`);
const result = await seedKnownCities(pool, cities);
console.log(`Created: ${result.created} new cities`);
console.log(`Updated: ${result.updated} existing cities`);

View File

@@ -0,0 +1,271 @@
#!/usr/bin/env npx tsx
/**
* Test Script: Crawl a single dispensary and write to canonical tables
*
* This script:
* 1. Fetches products from Dutchie GraphQL
* 2. Normalizes via DutchieNormalizer
* 3. Writes to store_products, product_variants, snapshots via hydrateToCanonical
*
* Usage:
* npx tsx src/scripts/test-crawl-to-canonical.ts <dispensaryId>
* npx tsx src/scripts/test-crawl-to-canonical.ts 235
*/
import { Pool } from 'pg';
import dotenv from 'dotenv';
import {
executeGraphQL,
GRAPHQL_HASHES,
DUTCHIE_CONFIG,
} from '../platforms/dutchie';
import {
DutchieNormalizer,
hydrateToCanonical,
} from '../hydration';
dotenv.config();
// ============================================================
// DATABASE CONNECTION
// ============================================================
function getConnectionString(): string {
if (process.env.CANNAIQ_DB_URL) {
return process.env.CANNAIQ_DB_URL;
}
if (process.env.DATABASE_URL) {
return process.env.DATABASE_URL;
}
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
const port = process.env.CANNAIQ_DB_PORT || '54320';
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
}
const pool = new Pool({ connectionString: getConnectionString() });
// ============================================================
// FETCH PRODUCTS FROM DUTCHIE
// ============================================================
interface FetchResult {
products: any[];
totalPages: number;
totalProducts: number;
}
async function fetchAllProducts(platformDispensaryId: string, cName: string): Promise<FetchResult> {
const allProducts: any[] = [];
let page = 0;
let totalPages = 1;
let totalProducts = 0;
console.log(`[Fetch] Starting fetch for ${platformDispensaryId} (cName: ${cName})`);
while (page < totalPages && page < DUTCHIE_CONFIG.maxPages) {
const variables = {
includeEnterpriseSpecials: false,
productsFilter: {
dispensaryId: platformDispensaryId,
pricingType: 'rec',
Status: 'Active', // 'Active' = in-stock products with pricing
types: [],
useCache: true,
isDefaultSort: true,
sortBy: 'popularSortIdx',
sortDirection: 1,
bypassOnlineThresholds: true,
isKioskMenu: false,
removeProductsBelowOptionThresholds: false,
},
page,
perPage: DUTCHIE_CONFIG.perPage,
};
try {
const result = await executeGraphQL(
'FilteredProducts',
variables,
GRAPHQL_HASHES.FilteredProducts,
{ cName, maxRetries: 3 }
);
const data = result?.data?.filteredProducts;
if (!data) {
console.error(`[Fetch] No data returned for page ${page}`);
break;
}
const products = data.products || [];
totalProducts = data.queryInfo?.totalCount || 0;
totalPages = Math.ceil(totalProducts / DUTCHIE_CONFIG.perPage);
allProducts.push(...products);
console.log(`[Fetch] Page ${page + 1}/${totalPages}: ${products.length} products (total so far: ${allProducts.length})`);
page++;
if (page < totalPages) {
await new Promise(r => setTimeout(r, DUTCHIE_CONFIG.pageDelayMs));
}
} catch (error: any) {
console.error(`[Fetch] Error on page ${page}: ${error.message}`);
break;
}
}
return { products: allProducts, totalPages, totalProducts };
}
// ============================================================
// MAIN
// ============================================================
async function main() {
const dispensaryId = parseInt(process.argv[2], 10);
if (!dispensaryId) {
console.error('Usage: npx tsx src/scripts/test-crawl-to-canonical.ts <dispensaryId>');
console.error('Example: npx tsx src/scripts/test-crawl-to-canonical.ts 235');
process.exit(1);
}
console.log('============================================================');
console.log(`Test Crawl to Canonical - Dispensary ${dispensaryId}`);
console.log('============================================================\n');
try {
// Step 1: Get dispensary info
console.log('[Step 1] Getting dispensary info...');
const dispResult = await pool.query(`
SELECT id, name, platform_dispensary_id, menu_url
FROM dispensaries
WHERE id = $1
`, [dispensaryId]);
if (dispResult.rows.length === 0) {
throw new Error(`Dispensary ${dispensaryId} not found`);
}
const disp = dispResult.rows[0];
console.log(` Name: ${disp.name}`);
console.log(` Platform ID: ${disp.platform_dispensary_id}`);
console.log(` Menu URL: ${disp.menu_url}`);
if (!disp.platform_dispensary_id) {
throw new Error('Dispensary does not have a platform_dispensary_id');
}
// Extract cName from menu_url
const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
console.log(` cName: ${cName}\n`);
// Step 2: Fetch products from Dutchie
console.log('[Step 2] Fetching products from Dutchie GraphQL...');
const fetchResult = await fetchAllProducts(disp.platform_dispensary_id, cName);
console.log(` Total products fetched: ${fetchResult.products.length}\n`);
if (fetchResult.products.length === 0) {
console.log('No products fetched. Exiting.');
process.exit(0);
}
// Step 3: Normalize
console.log('[Step 3] Normalizing products...');
const normalizer = new DutchieNormalizer();
// Construct a RawPayload structure that the normalizer expects
// The normalizer.normalize() expects: { raw_json, dispensary_id, ... }
const rawPayloadForValidation = {
products: fetchResult.products,
queryInfo: {
totalCount: fetchResult.totalProducts,
},
};
const validation = normalizer.validatePayload(rawPayloadForValidation);
if (!validation.valid) {
console.error(` Validation failed: ${validation.errors?.join(', ')}`);
process.exit(1);
}
console.log(` Validation: PASS`);
// Build proper RawPayload for normalize()
const rawPayload = {
id: `test-${Date.now()}`,
dispensary_id: dispensaryId,
crawl_run_id: null,
platform: 'dutchie',
payload_version: 1,
raw_json: rawPayloadForValidation,
product_count: fetchResult.totalProducts,
pricing_type: 'rec',
crawl_mode: 'active',
fetched_at: new Date(),
processed: false,
normalized_at: null,
hydration_error: null,
hydration_attempts: 0,
created_at: new Date(),
};
const normResult = normalizer.normalize(rawPayload);
console.log(` Normalized products: ${normResult.products.length}`);
console.log(` Brands extracted: ${normResult.brands.length}`);
console.log(` Sample product: ${normResult.products[0]?.name}\n`);
// Step 4: Write to canonical tables
console.log('[Step 4] Writing to canonical tables via hydrateToCanonical...');
const hydrateResult = await hydrateToCanonical(
pool,
dispensaryId,
normResult,
null // no crawl_run_id for this test
);
console.log(` Products upserted: ${hydrateResult.productsUpserted}`);
console.log(` Products new: ${hydrateResult.productsNew}`);
console.log(` Snapshots created: ${hydrateResult.snapshotsCreated}`);
console.log(` Variants upserted: ${hydrateResult.variantsUpserted}`);
console.log(` Brands created: ${hydrateResult.brandsCreated}\n`);
// Step 5: Verify
console.log('[Step 5] Verifying data in canonical tables...');
const productCount = await pool.query(`
SELECT COUNT(*) as count FROM store_products WHERE dispensary_id = $1
`, [dispensaryId]);
console.log(` store_products count: ${productCount.rows[0].count}`);
const variantCount = await pool.query(`
SELECT COUNT(*) as count FROM product_variants WHERE dispensary_id = $1
`, [dispensaryId]);
console.log(` product_variants count: ${variantCount.rows[0].count}`);
const snapshotCount = await pool.query(`
SELECT COUNT(*) as count FROM store_product_snapshots WHERE dispensary_id = $1
`, [dispensaryId]);
console.log(` store_product_snapshots count: ${snapshotCount.rows[0].count}`);
console.log('\n============================================================');
console.log('SUCCESS - Crawl and hydration complete!');
console.log('============================================================');
} catch (error: any) {
console.error('\n============================================================');
console.error('ERROR:', error.message);
console.error('============================================================');
if (error.stack) {
console.error(error.stack);
}
process.exit(1);
} finally {
await pool.end();
}
}
main();