feat: Add v2 architecture with multi-state support and orchestrator services

Major additions:
- Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare
- Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator
- Discovery system: dutchie discovery service, geo validation, city seeding scripts
- Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages
- Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram)
- Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata

Frontend pages added:
- Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores
- StateHeatmap, CrossStateCompare, SyncInfoPanel

Components added:
- StateSelector, OrchestratorTraceModal, WorkflowStepper

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-07 11:30:57 -07:00
parent 8ac64ba077
commit b4a2fb7d03
248 changed files with 60714 additions and 666 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -11,7 +11,7 @@
* npx tsx src/scripts/backfill-store-dispensary.ts --verbose # Show all match details
*/
import { pool } from '../db/migrate';
import { pool } from '../db/pool';
import { logger } from '../services/logger';
const args = process.argv.slice(2);

View File

@@ -14,7 +14,7 @@
* npx tsx src/scripts/bootstrap-discovery.ts --status # Show current status only
*/
import { pool } from '../db/migrate';
import { pool } from '../db/pool';
import {
ensureAllDispensariesHaveSchedules,
runDispensaryOrchestrator,

View File

@@ -0,0 +1,101 @@
/**
* LOCAL-ONLY Admin Bootstrap Script
*
* Creates or resets a local admin user for development.
* This script is ONLY for local development - never use in production.
*
* Usage:
* cd backend
* npx tsx src/scripts/bootstrap-local-admin.ts
*
* Default credentials:
* Email: admin@local.test
* Password: admin123
*/
import bcrypt from 'bcrypt';
import { query, closePool } from '../dutchie-az/db/connection';
// Local admin credentials - deterministic for dev
const LOCAL_ADMIN_EMAIL = 'admin@local.test';
const LOCAL_ADMIN_PASSWORD = 'admin123';
const LOCAL_ADMIN_ROLE = 'admin'; // Match existing schema (admin, not superadmin)
async function bootstrapLocalAdmin(): Promise<void> {
console.log('='.repeat(60));
console.log('LOCAL ADMIN BOOTSTRAP');
console.log('='.repeat(60));
console.log('');
console.log('This script creates/resets a local admin user for development.');
console.log('');
try {
// Hash the password with bcrypt (10 rounds, matching existing code)
const passwordHash = await bcrypt.hash(LOCAL_ADMIN_PASSWORD, 10);
// Check if user exists
const existing = await query<{ id: number; email: string }>(
'SELECT id, email FROM users WHERE email = $1',
[LOCAL_ADMIN_EMAIL]
);
if (existing.rows.length > 0) {
// User exists - update password and role
console.log(`User "${LOCAL_ADMIN_EMAIL}" already exists (id=${existing.rows[0].id})`);
console.log('Resetting password and ensuring admin role...');
await query(
`UPDATE users
SET password_hash = $1,
role = $2,
updated_at = NOW()
WHERE email = $3`,
[passwordHash, LOCAL_ADMIN_ROLE, LOCAL_ADMIN_EMAIL]
);
console.log('User updated successfully.');
} else {
// User doesn't exist - create new
console.log(`Creating new admin user: ${LOCAL_ADMIN_EMAIL}`);
const result = await query<{ id: number }>(
`INSERT INTO users (email, password_hash, role, created_at, updated_at)
VALUES ($1, $2, $3, NOW(), NOW())
RETURNING id`,
[LOCAL_ADMIN_EMAIL, passwordHash, LOCAL_ADMIN_ROLE]
);
console.log(`User created successfully (id=${result.rows[0].id})`);
}
console.log('');
console.log('='.repeat(60));
console.log('LOCAL ADMIN READY');
console.log('='.repeat(60));
console.log('');
console.log('Login credentials:');
console.log(` Email: ${LOCAL_ADMIN_EMAIL}`);
console.log(` Password: ${LOCAL_ADMIN_PASSWORD}`);
console.log('');
console.log('Admin UI: http://localhost:8080/admin');
console.log('');
} catch (error: any) {
console.error('');
console.error('ERROR: Failed to bootstrap local admin');
console.error(error.message);
if (error.message.includes('relation "users" does not exist')) {
console.error('');
console.error('The "users" table does not exist.');
console.error('Run migrations first: npm run migrate');
}
process.exit(1);
} finally {
await closePool();
}
}
// Run the bootstrap
bootstrapLocalAdmin();

View File

@@ -0,0 +1,86 @@
#!/usr/bin/env npx tsx
/**
* Dutchie City Discovery CLI Runner
*
* Discovers cities from Dutchie's /cities page and upserts to dutchie_discovery_cities.
*
* Usage:
* npm run discovery:dutchie:cities
* npx tsx src/scripts/discovery-dutchie-cities.ts
*
* Environment:
* DATABASE_URL - PostgreSQL connection string (required)
*/
import { Pool } from 'pg';
import { DutchieCityDiscovery } from '../dutchie-az/discovery/DutchieCityDiscovery';
async function main() {
console.log('='.repeat(60));
console.log('DUTCHIE CITY DISCOVERY');
console.log('='.repeat(60));
// Get database URL from environment
const connectionString = process.env.DATABASE_URL;
if (!connectionString) {
console.error('ERROR: DATABASE_URL environment variable is required');
console.error('');
console.error('Usage:');
console.error(' DATABASE_URL="postgresql://..." npm run discovery:dutchie:cities');
process.exit(1);
}
// Create pool
const pool = new Pool({ connectionString });
try {
// Test connection
await pool.query('SELECT 1');
console.log('[CLI] Database connection established');
// Run discovery
const discovery = new DutchieCityDiscovery(pool);
const result = await discovery.run();
// Print summary
console.log('');
console.log('='.repeat(60));
console.log('DISCOVERY COMPLETE');
console.log('='.repeat(60));
console.log(`Cities found: ${result.citiesFound}`);
console.log(`Cities inserted: ${result.citiesInserted}`);
console.log(`Cities updated: ${result.citiesUpdated}`);
console.log(`Errors: ${result.errors.length}`);
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
if (result.errors.length > 0) {
console.log('');
console.log('Errors:');
result.errors.forEach((e) => console.log(` - ${e}`));
}
// Show stats
console.log('');
console.log('Current Statistics:');
const stats = await discovery.getStats();
console.log(` Total cities: ${stats.total}`);
console.log(` Crawl enabled: ${stats.crawlEnabled}`);
console.log(` Never crawled: ${stats.neverCrawled}`);
console.log('');
console.log('By Country:');
stats.byCountry.forEach((c) => console.log(` ${c.countryCode}: ${c.count}`));
console.log('');
console.log('By State (top 10):');
stats.byState.slice(0, 10).forEach((s) => console.log(` ${s.stateCode} (${s.countryCode}): ${s.count}`));
process.exit(result.errors.length > 0 ? 1 : 0);
} catch (error: any) {
console.error('FATAL ERROR:', error.message);
console.error(error.stack);
process.exit(1);
} finally {
await pool.end();
}
}
main();

View File

@@ -0,0 +1,189 @@
#!/usr/bin/env npx tsx
/**
* Dutchie Location Discovery CLI Runner
*
* Discovers store locations for cities and upserts to dutchie_discovery_locations.
*
* Usage:
* npm run discovery:dutchie:locations -- --all-enabled
* npm run discovery:dutchie:locations -- --city-slug=phoenix
* npm run discovery:dutchie:locations -- --all-enabled --limit=10
*
* npx tsx src/scripts/discovery-dutchie-locations.ts --all-enabled
* npx tsx src/scripts/discovery-dutchie-locations.ts --city-slug=phoenix
*
* Options:
* --city-slug=<slug> Run for a single city by its slug
* --all-enabled Run for all cities where crawl_enabled = TRUE
* --limit=<n> Limit the number of cities to process
* --delay=<ms> Delay between cities in ms (default: 2000)
*
* Environment:
* DATABASE_URL - PostgreSQL connection string (required)
*/
import { Pool } from 'pg';
import { DutchieLocationDiscovery } from '../dutchie-az/discovery/DutchieLocationDiscovery';
// Parse command line arguments
function parseArgs(): {
citySlug: string | null;
allEnabled: boolean;
limit: number | undefined;
delay: number;
} {
const args = process.argv.slice(2);
let citySlug: string | null = null;
let allEnabled = false;
let limit: number | undefined = undefined;
let delay = 2000;
for (const arg of args) {
if (arg.startsWith('--city-slug=')) {
citySlug = arg.split('=')[1];
} else if (arg === '--all-enabled') {
allEnabled = true;
} else if (arg.startsWith('--limit=')) {
limit = parseInt(arg.split('=')[1], 10);
} else if (arg.startsWith('--delay=')) {
delay = parseInt(arg.split('=')[1], 10);
}
}
return { citySlug, allEnabled, limit, delay };
}
function printUsage() {
console.log(`
Dutchie Location Discovery CLI
Usage:
npx tsx src/scripts/discovery-dutchie-locations.ts [options]
Options:
--city-slug=<slug> Run for a single city by its slug
--all-enabled Run for all cities where crawl_enabled = TRUE
--limit=<n> Limit the number of cities to process
--delay=<ms> Delay between cities in ms (default: 2000)
Examples:
npx tsx src/scripts/discovery-dutchie-locations.ts --all-enabled
npx tsx src/scripts/discovery-dutchie-locations.ts --city-slug=phoenix
npx tsx src/scripts/discovery-dutchie-locations.ts --all-enabled --limit=5
Environment:
DATABASE_URL - PostgreSQL connection string (required)
`);
}
async function main() {
const { citySlug, allEnabled, limit, delay } = parseArgs();
if (!citySlug && !allEnabled) {
console.error('ERROR: Must specify either --city-slug=<slug> or --all-enabled');
printUsage();
process.exit(1);
}
console.log('='.repeat(60));
console.log('DUTCHIE LOCATION DISCOVERY');
console.log('='.repeat(60));
if (citySlug) {
console.log(`Mode: Single city (${citySlug})`);
} else {
console.log(`Mode: All enabled cities${limit ? ` (limit: ${limit})` : ''}`);
}
console.log(`Delay between cities: ${delay}ms`);
console.log('');
// Get database URL from environment
const connectionString = process.env.DATABASE_URL;
if (!connectionString) {
console.error('ERROR: DATABASE_URL environment variable is required');
console.error('');
console.error('Usage:');
console.error(' DATABASE_URL="postgresql://..." npx tsx src/scripts/discovery-dutchie-locations.ts --all-enabled');
process.exit(1);
}
// Create pool
const pool = new Pool({ connectionString });
try {
// Test connection
await pool.query('SELECT 1');
console.log('[CLI] Database connection established');
const discovery = new DutchieLocationDiscovery(pool);
if (citySlug) {
// Single city mode
const city = await discovery.getCityBySlug(citySlug);
if (!city) {
console.error(`ERROR: City not found: ${citySlug}`);
console.error('');
console.error('Make sure you have run city discovery first:');
console.error(' npm run discovery:dutchie:cities');
process.exit(1);
}
const result = await discovery.discoverForCity(city);
console.log('');
console.log('='.repeat(60));
console.log('DISCOVERY COMPLETE');
console.log('='.repeat(60));
console.log(`City: ${city.cityName}, ${city.stateCode}`);
console.log(`Locations found: ${result.locationsFound}`);
console.log(`Inserted: ${result.locationsInserted}`);
console.log(`Updated: ${result.locationsUpdated}`);
console.log(`Skipped (protected): ${result.locationsSkipped}`);
console.log(`Errors: ${result.errors.length}`);
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
if (result.errors.length > 0) {
console.log('');
console.log('Errors:');
result.errors.forEach((e) => console.log(` - ${e}`));
}
process.exit(result.errors.length > 0 ? 1 : 0);
} else {
// All enabled cities mode
const result = await discovery.discoverAllEnabled({ limit, delayMs: delay });
console.log('');
console.log('='.repeat(60));
console.log('DISCOVERY COMPLETE');
console.log('='.repeat(60));
console.log(`Total cities processed: ${result.totalCities}`);
console.log(`Total locations found: ${result.totalLocationsFound}`);
console.log(`Total inserted: ${result.totalInserted}`);
console.log(`Total updated: ${result.totalUpdated}`);
console.log(`Total skipped: ${result.totalSkipped}`);
console.log(`Total errors: ${result.errors.length}`);
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
if (result.errors.length > 0 && result.errors.length <= 20) {
console.log('');
console.log('Errors:');
result.errors.forEach((e) => console.log(` - ${e}`));
} else if (result.errors.length > 20) {
console.log('');
console.log(`First 20 of ${result.errors.length} errors:`);
result.errors.slice(0, 20).forEach((e) => console.log(` - ${e}`));
}
process.exit(result.errors.length > 0 ? 1 : 0);
}
} catch (error: any) {
console.error('FATAL ERROR:', error.message);
console.error(error.stack);
process.exit(1);
} finally {
await pool.end();
}
}
main();

View File

@@ -0,0 +1,833 @@
/**
* ETL Script: 042 Legacy Import
*
* Copies data from legacy dutchie_legacy database into canonical CannaiQ tables
* in the dutchie_menus database.
*
* CRITICAL DATABASE ARCHITECTURE:
* - SOURCE (READ-ONLY): dutchie_legacy - Contains legacy dutchie_* tables
* - DESTINATION (WRITE): dutchie_menus - Contains canonical CannaiQ tables
*
* IMPORTANT:
* - This script is INSERT-ONLY and IDEMPOTENT
* - Uses ON CONFLICT DO NOTHING for all inserts
* - NO deletes, NO truncates, NO schema changes
* - Legacy database is READ-ONLY - never modified
*
* Run manually with:
* cd backend
* npx tsx src/scripts/etl/042_legacy_import.ts
*
* Prerequisites:
* - Migration 041_cannaiq_canonical_schema.sql must be run on dutchie_menus FIRST
* - Both CANNAIQ_DB_* and LEGACY_DB_* env vars must be set
*/
import { Pool } from 'pg';
// =====================================================
// DATABASE CONNECTIONS - DUAL POOL ARCHITECTURE
// =====================================================
/**
* Get connection string for CannaiQ database (dutchie_menus).
* This is the DESTINATION - where we WRITE canonical data.
*/
function getCannaiqConnectionString(): string {
if (process.env.CANNAIQ_DB_URL) {
return process.env.CANNAIQ_DB_URL;
}
const required = ['CANNAIQ_DB_HOST', 'CANNAIQ_DB_PORT', 'CANNAIQ_DB_NAME', 'CANNAIQ_DB_USER', 'CANNAIQ_DB_PASS'];
const missing = required.filter((key) => !process.env[key]);
if (missing.length > 0) {
throw new Error(
`[042_legacy_import] Missing required CannaiQ env vars: ${missing.join(', ')}\n` +
`Set either CANNAIQ_DB_URL or all of: CANNAIQ_DB_HOST, CANNAIQ_DB_PORT, CANNAIQ_DB_NAME, CANNAIQ_DB_USER, CANNAIQ_DB_PASS`
);
}
const host = process.env.CANNAIQ_DB_HOST!;
const port = process.env.CANNAIQ_DB_PORT!;
const name = process.env.CANNAIQ_DB_NAME!;
const user = process.env.CANNAIQ_DB_USER!;
const pass = process.env.CANNAIQ_DB_PASS!;
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
}
/**
* Get connection string for Legacy database (dutchie_legacy).
* This is the SOURCE - where we READ legacy data (READ-ONLY).
*/
function getLegacyConnectionString(): string {
if (process.env.LEGACY_DB_URL) {
return process.env.LEGACY_DB_URL;
}
const required = ['LEGACY_DB_HOST', 'LEGACY_DB_PORT', 'LEGACY_DB_NAME', 'LEGACY_DB_USER', 'LEGACY_DB_PASS'];
const missing = required.filter((key) => !process.env[key]);
if (missing.length > 0) {
throw new Error(
`[042_legacy_import] Missing required Legacy env vars: ${missing.join(', ')}\n` +
`Set either LEGACY_DB_URL or all of: LEGACY_DB_HOST, LEGACY_DB_PORT, LEGACY_DB_NAME, LEGACY_DB_USER, LEGACY_DB_PASS`
);
}
const host = process.env.LEGACY_DB_HOST!;
const port = process.env.LEGACY_DB_PORT!;
const name = process.env.LEGACY_DB_NAME!;
const user = process.env.LEGACY_DB_USER!;
const pass = process.env.LEGACY_DB_PASS!;
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
}
// Create both pools
const cannaiqPool = new Pool({ connectionString: getCannaiqConnectionString() });
const legacyPool = new Pool({ connectionString: getLegacyConnectionString() });
// =====================================================
// LOGGING HELPERS
// =====================================================
interface Stats {
read: number;
inserted: number;
skipped: number;
}
interface StoreProductStats extends Stats {
skipped_missing_store: number;
skipped_duplicate: number;
}
function log(message: string) {
console.log(`[042_legacy_import] ${message}`);
}
function logStats(table: string, stats: Stats) {
log(` ${table}: read=${stats.read}, inserted=${stats.inserted}, skipped=${stats.skipped}`);
}
function logStoreProductStats(stats: StoreProductStats) {
log(` store_products: read=${stats.read}, inserted=${stats.inserted}, skipped_missing_store=${stats.skipped_missing_store}, skipped_duplicate=${stats.skipped_duplicate}`);
}
// =====================================================
// CATEGORY NORMALIZATION HELPER
// =====================================================
// Legacy dutchie_products has only 'subcategory', not 'category'.
// We derive a canonical category from the subcategory value.
const SUBCATEGORY_TO_CATEGORY: Record<string, string> = {
// Flower
'flower': 'Flower',
'pre-rolls': 'Flower',
'pre-roll': 'Flower',
'preroll': 'Flower',
'prerolls': 'Flower',
'shake': 'Flower',
'smalls': 'Flower',
'popcorn': 'Flower',
// Concentrates
'concentrates': 'Concentrates',
'concentrate': 'Concentrates',
'live resin': 'Concentrates',
'live-resin': 'Concentrates',
'rosin': 'Concentrates',
'shatter': 'Concentrates',
'wax': 'Concentrates',
'badder': 'Concentrates',
'crumble': 'Concentrates',
'diamonds': 'Concentrates',
'sauce': 'Concentrates',
'hash': 'Concentrates',
'kief': 'Concentrates',
'rso': 'Concentrates',
'distillate': 'Concentrates',
// Edibles
'edibles': 'Edibles',
'edible': 'Edibles',
'gummies': 'Edibles',
'gummy': 'Edibles',
'chocolates': 'Edibles',
'chocolate': 'Edibles',
'baked goods': 'Edibles',
'beverages': 'Edibles',
'drinks': 'Edibles',
'candy': 'Edibles',
'mints': 'Edibles',
'capsules': 'Edibles',
'tablets': 'Edibles',
// Vapes
'vapes': 'Vapes',
'vape': 'Vapes',
'vaporizers': 'Vapes',
'cartridges': 'Vapes',
'cartridge': 'Vapes',
'carts': 'Vapes',
'cart': 'Vapes',
'pods': 'Vapes',
'disposables': 'Vapes',
'disposable': 'Vapes',
'pax': 'Vapes',
// Topicals
'topicals': 'Topicals',
'topical': 'Topicals',
'lotions': 'Topicals',
'balms': 'Topicals',
'salves': 'Topicals',
'patches': 'Topicals',
'bath': 'Topicals',
// Tinctures
'tinctures': 'Tinctures',
'tincture': 'Tinctures',
'oils': 'Tinctures',
'sublinguals': 'Tinctures',
// Accessories
'accessories': 'Accessories',
'gear': 'Accessories',
'papers': 'Accessories',
'grinders': 'Accessories',
'pipes': 'Accessories',
'bongs': 'Accessories',
'batteries': 'Accessories',
};
/**
* Derive a canonical category from the legacy subcategory field.
* Returns null if subcategory is null/empty or cannot be mapped.
*/
function deriveCategory(subcategory: string | null | undefined): string | null {
if (!subcategory) return null;
const normalized = subcategory.toLowerCase().trim();
// Direct lookup
if (SUBCATEGORY_TO_CATEGORY[normalized]) {
return SUBCATEGORY_TO_CATEGORY[normalized];
}
// Partial match - check if any key is contained in the subcategory
for (const [key, category] of Object.entries(SUBCATEGORY_TO_CATEGORY)) {
if (normalized.includes(key)) {
return category;
}
}
// No match - return the original subcategory as-is for category_raw
return null;
}
// =====================================================
// STEP 1: Backfill dispensaries.state_id (on cannaiq db)
// =====================================================
async function backfillStateIds(): Promise<Stats> {
log('Step 1: Backfill dispensaries.state_id from states table...');
const result = await cannaiqPool.query(`
UPDATE dispensaries d
SET state_id = s.id
FROM states s
WHERE UPPER(d.state) = s.code
AND d.state_id IS NULL
RETURNING d.id
`);
const stats: Stats = {
read: result.rowCount || 0,
inserted: result.rowCount || 0,
skipped: 0,
};
logStats('dispensaries.state_id', stats);
return stats;
}
// =====================================================
// STEP 2: Insert known chains (on cannaiq db)
// =====================================================
async function insertChains(): Promise<Stats> {
log('Step 2: Insert known chains...');
const knownChains = [
{ name: 'Curaleaf', slug: 'curaleaf', website: 'https://curaleaf.com' },
{ name: 'Trulieve', slug: 'trulieve', website: 'https://trulieve.com' },
{ name: 'Harvest', slug: 'harvest', website: 'https://harvesthoc.com' },
{ name: 'Nirvana Center', slug: 'nirvana-center', website: 'https://nirvanacannabis.com' },
{ name: 'Sol Flower', slug: 'sol-flower', website: 'https://solflower.com' },
{ name: 'Mint Cannabis', slug: 'mint-cannabis', website: 'https://mintcannabis.com' },
{ name: 'JARS Cannabis', slug: 'jars-cannabis', website: 'https://jarscannabis.com' },
{ name: 'Zen Leaf', slug: 'zen-leaf', website: 'https://zenleafdispensaries.com' },
{ name: "Nature's Medicines", slug: 'natures-medicines', website: 'https://naturesmedicines.com' },
{ name: 'The Mint', slug: 'the-mint', website: 'https://themintdispensary.com' },
{ name: 'Giving Tree', slug: 'giving-tree', website: 'https://givingtreeaz.com' },
{ name: 'Health for Life', slug: 'health-for-life', website: 'https://healthforlifeaz.com' },
{ name: 'Oasis Cannabis', slug: 'oasis-cannabis', website: 'https://oasiscannabis.com' },
];
let inserted = 0;
for (const chain of knownChains) {
const result = await cannaiqPool.query(
`
INSERT INTO chains (name, slug, website_url)
VALUES ($1, $2, $3)
ON CONFLICT (slug) DO NOTHING
RETURNING id
`,
[chain.name, chain.slug, chain.website]
);
if (result.rowCount && result.rowCount > 0) {
inserted++;
}
}
const stats: Stats = {
read: knownChains.length,
inserted,
skipped: knownChains.length - inserted,
};
logStats('chains', stats);
return stats;
}
// =====================================================
// STEP 3: Link dispensaries to chains by name pattern (on cannaiq db)
// =====================================================
async function linkDispensariesToChains(): Promise<Stats> {
log('Step 3: Link dispensaries to chains by name pattern...');
// Get all chains from cannaiq
const chainsResult = await cannaiqPool.query('SELECT id, name, slug FROM chains');
const chains = chainsResult.rows;
let totalUpdated = 0;
for (const chain of chains) {
// Match by name pattern (case-insensitive)
const result = await cannaiqPool.query(
`
UPDATE dispensaries
SET chain_id = $1
WHERE (name ILIKE $2 OR dba_name ILIKE $2)
AND chain_id IS NULL
RETURNING id
`,
[chain.id, `%${chain.name}%`]
);
if (result.rowCount && result.rowCount > 0) {
log(` Linked ${result.rowCount} dispensaries to chain: ${chain.name}`);
totalUpdated += result.rowCount;
}
}
const stats: Stats = {
read: chains.length,
inserted: totalUpdated,
skipped: 0,
};
logStats('dispensaries.chain_id', stats);
return stats;
}
// =====================================================
// STEP 4: Insert brands from legacy dutchie_products
// =====================================================
async function insertBrands(): Promise<Stats> {
log('Step 4: Insert brands from legacy dutchie_products -> cannaiq brands...');
// READ from legacy database
const brandsResult = await legacyPool.query(`
SELECT DISTINCT TRIM(brand_name) AS brand_name
FROM dutchie_products
WHERE brand_name IS NOT NULL
AND TRIM(brand_name) != ''
ORDER BY brand_name
`);
const stats: Stats = {
read: brandsResult.rowCount || 0,
inserted: 0,
skipped: 0,
};
const BATCH_SIZE = 100;
const brands = brandsResult.rows;
for (let i = 0; i < brands.length; i += BATCH_SIZE) {
const batch = brands.slice(i, i + BATCH_SIZE);
for (const row of batch) {
const brandName = row.brand_name.trim();
// Create slug: lowercase, replace non-alphanumeric with hyphens, collapse multiple hyphens
const slug = brandName
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '')
.substring(0, 250);
if (!slug) continue;
// WRITE to cannaiq database
const result = await cannaiqPool.query(
`
INSERT INTO brands (name, slug)
VALUES ($1, $2)
ON CONFLICT (slug) DO NOTHING
RETURNING id
`,
[brandName, slug]
);
if (result.rowCount && result.rowCount > 0) {
stats.inserted++;
} else {
stats.skipped++;
}
}
log(` Processed ${Math.min(i + BATCH_SIZE, brands.length)}/${brands.length} brands...`);
}
logStats('brands', stats);
return stats;
}
// =====================================================
// STEP 5: Insert store_products from legacy dutchie_products
// =====================================================
async function insertStoreProducts(): Promise<StoreProductStats> {
log('Step 5: Insert store_products from legacy dutchie_products -> cannaiq store_products...');
// Step 5a: Preload valid dispensary IDs from canonical database
log(' Loading valid dispensary IDs from canonical database...');
const dispensaryResult = await cannaiqPool.query('SELECT id FROM dispensaries');
const validDispensaryIds = new Set<number>(dispensaryResult.rows.map((r) => r.id));
log(` Found ${validDispensaryIds.size} valid dispensaries in canonical database`);
// Count total in legacy
const countResult = await legacyPool.query('SELECT COUNT(*) FROM dutchie_products');
const totalCount = parseInt(countResult.rows[0].count, 10);
const stats: StoreProductStats = {
read: totalCount,
inserted: 0,
skipped: 0,
skipped_missing_store: 0,
skipped_duplicate: 0,
};
const BATCH_SIZE = 200;
let offset = 0;
while (offset < totalCount) {
// READ batch from legacy database
// ONLY use columns that actually exist in dutchie_products:
// id, dispensary_id, external_product_id, name, brand_name,
// subcategory, stock_status, primary_image_url, created_at
// Missing columns: category, first_seen_at, last_seen_at, updated_at, thc_content, cbd_content
const batchResult = await legacyPool.query(
`
SELECT
dp.id,
dp.dispensary_id,
dp.external_product_id,
dp.name,
dp.brand_name,
dp.subcategory,
dp.stock_status,
dp.primary_image_url,
dp.created_at
FROM dutchie_products dp
ORDER BY dp.id
LIMIT $1 OFFSET $2
`,
[BATCH_SIZE, offset]
);
for (const row of batchResult.rows) {
// Skip if dispensary_id is missing or not in canonical database
if (!row.dispensary_id || !validDispensaryIds.has(row.dispensary_id)) {
stats.skipped_missing_store++;
stats.skipped++;
continue;
}
// Derive category from subcategory in TypeScript
const categoryRaw = deriveCategory(row.subcategory) || row.subcategory || null;
// Use created_at as first_seen_at if available, otherwise NOW()
const timestamp = row.created_at || new Date();
// WRITE to cannaiq database
try {
const result = await cannaiqPool.query(
`
INSERT INTO store_products (
dispensary_id,
provider,
provider_product_id,
name_raw,
brand_name_raw,
category_raw,
subcategory_raw,
stock_status,
is_in_stock,
image_url,
first_seen_at,
last_seen_at,
created_at,
updated_at
) VALUES (
$1, 'dutchie', $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13
)
ON CONFLICT (dispensary_id, provider, provider_product_id) DO NOTHING
RETURNING id
`,
[
row.dispensary_id,
row.external_product_id,
row.name,
row.brand_name,
categoryRaw,
row.subcategory || null,
row.stock_status || 'in_stock',
row.stock_status !== 'out_of_stock',
row.primary_image_url || null,
timestamp, // first_seen_at = created_at or NOW()
timestamp, // last_seen_at = created_at or NOW()
timestamp, // created_at
timestamp, // updated_at
]
);
if (result.rowCount && result.rowCount > 0) {
stats.inserted++;
} else {
stats.skipped_duplicate++;
stats.skipped++;
}
} catch (err: any) {
// If somehow we still hit an FK error, skip gracefully
if (err.code === '23503') {
// FK violation
stats.skipped_missing_store++;
stats.skipped++;
} else {
throw err; // Re-throw unexpected errors
}
}
}
offset += BATCH_SIZE;
log(` Processed ${Math.min(offset, totalCount)}/${totalCount} products...`);
}
logStoreProductStats(stats);
return stats;
}
// =====================================================
// STEP 6: Link store_products to brands (on cannaiq db)
// =====================================================
async function linkStoreProductsToBrands(): Promise<Stats> {
log('Step 6: Link store_products to brands by brand_name_raw...');
const result = await cannaiqPool.query(`
UPDATE store_products sp
SET brand_id = b.id
FROM brands b
WHERE LOWER(TRIM(sp.brand_name_raw)) = LOWER(b.name)
AND sp.brand_id IS NULL
RETURNING sp.id
`);
const stats: Stats = {
read: result.rowCount || 0,
inserted: result.rowCount || 0,
skipped: 0,
};
logStats('store_products.brand_id', stats);
return stats;
}
// =====================================================
// STEP 7: Insert store_product_snapshots from legacy dutchie_product_snapshots
// =====================================================
async function insertStoreProductSnapshots(): Promise<StoreProductStats> {
log('Step 7: Insert store_product_snapshots from legacy -> cannaiq...');
// Step 7a: Preload valid dispensary IDs from canonical database
log(' Loading valid dispensary IDs from canonical database...');
const dispensaryResult = await cannaiqPool.query('SELECT id FROM dispensaries');
const validDispensaryIds = new Set<number>(dispensaryResult.rows.map((r) => r.id));
log(` Found ${validDispensaryIds.size} valid dispensaries in canonical database`);
// Count total in legacy
const countResult = await legacyPool.query('SELECT COUNT(*) FROM dutchie_product_snapshots');
const totalCount = parseInt(countResult.rows[0].count, 10);
const stats: StoreProductStats = {
read: totalCount,
inserted: 0,
skipped: 0,
skipped_missing_store: 0,
skipped_duplicate: 0,
};
if (totalCount === 0) {
log(' No snapshots to migrate.');
return stats;
}
const BATCH_SIZE = 500;
let offset = 0;
while (offset < totalCount) {
// READ batch from legacy with join to get provider_product_id from dutchie_products
// ONLY use columns that actually exist in dutchie_product_snapshots:
// id, dispensary_id, dutchie_product_id, crawled_at, created_at
// Missing columns: raw_product_data
// We join to dutchie_products for: external_product_id, name, brand_name, subcategory, primary_image_url
const batchResult = await legacyPool.query(
`
SELECT
dps.id,
dps.dispensary_id,
dp.external_product_id AS provider_product_id,
dp.name,
dp.brand_name,
dp.subcategory,
dp.primary_image_url,
dps.crawled_at,
dps.created_at
FROM dutchie_product_snapshots dps
JOIN dutchie_products dp ON dp.id = dps.dutchie_product_id
ORDER BY dps.id
LIMIT $1 OFFSET $2
`,
[BATCH_SIZE, offset]
);
for (const row of batchResult.rows) {
// Skip if dispensary_id is missing or not in canonical database
if (!row.dispensary_id || !validDispensaryIds.has(row.dispensary_id)) {
stats.skipped_missing_store++;
stats.skipped++;
continue;
}
// Derive category from subcategory in TypeScript
const categoryRaw = deriveCategory(row.subcategory) || row.subcategory || null;
// Pricing/THC/CBD/stock data not available (raw_product_data doesn't exist in legacy)
// These will be NULL for legacy snapshots - future crawls will populate them
const timestamp = row.crawled_at || row.created_at || new Date();
// WRITE to cannaiq database
try {
const result = await cannaiqPool.query(
`
INSERT INTO store_product_snapshots (
dispensary_id,
provider,
provider_product_id,
captured_at,
name_raw,
brand_name_raw,
category_raw,
subcategory_raw,
price_rec,
price_med,
price_rec_special,
is_on_special,
is_in_stock,
stock_status,
thc_percent,
cbd_percent,
image_url,
raw_data,
created_at
) VALUES (
$1, 'dutchie', $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18
)
ON CONFLICT DO NOTHING
RETURNING id
`,
[
row.dispensary_id,
row.provider_product_id,
timestamp, // captured_at
row.name,
row.brand_name,
categoryRaw,
row.subcategory || null,
null, // price_rec - not available
null, // price_med - not available
null, // price_rec_special - not available
false, // is_on_special - default false
true, // is_in_stock - default true (unknown)
'unknown', // stock_status - unknown for legacy
null, // thc_percent - not available
null, // cbd_percent - not available
row.primary_image_url || null, // image_url from legacy product
null, // raw_data - not available
row.created_at || timestamp,
]
);
if (result.rowCount && result.rowCount > 0) {
stats.inserted++;
} else {
stats.skipped_duplicate++;
stats.skipped++;
}
} catch (err: any) {
// If somehow we still hit an FK error, skip gracefully
if (err.code === '23503') {
// FK violation
stats.skipped_missing_store++;
stats.skipped++;
} else {
throw err; // Re-throw unexpected errors
}
}
}
offset += BATCH_SIZE;
log(` Processed ${Math.min(offset, totalCount)}/${totalCount} snapshots...`);
}
logStoreProductStats(stats);
return stats;
}
// =====================================================
// STEP 8: Link store_product_snapshots to store_products (on cannaiq db)
// =====================================================
async function linkSnapshotsToStoreProducts(): Promise<Stats> {
log('Step 8: Link store_product_snapshots to store_products...');
const result = await cannaiqPool.query(`
UPDATE store_product_snapshots sps
SET store_product_id = sp.id
FROM store_products sp
WHERE sps.dispensary_id = sp.dispensary_id
AND sps.provider = sp.provider
AND sps.provider_product_id = sp.provider_product_id
AND sps.store_product_id IS NULL
RETURNING sps.id
`);
const stats: Stats = {
read: result.rowCount || 0,
inserted: result.rowCount || 0,
skipped: 0,
};
logStats('store_product_snapshots.store_product_id', stats);
return stats;
}
// =====================================================
// MAIN
// =====================================================
async function main() {
log('='.repeat(60));
log('CannaiQ Legacy Import ETL');
log('='.repeat(60));
log('');
log('This script migrates data from dutchie_legacy -> dutchie_menus.');
log('All operations are INSERT-ONLY and IDEMPOTENT.');
log('');
try {
// Test both connections and show which databases we're connected to
const cannaiqInfo = await cannaiqPool.query('SELECT current_database() as db, current_user as user');
const legacyInfo = await legacyPool.query('SELECT current_database() as db, current_user as user');
log(`DESTINATION (cannaiq): ${cannaiqInfo.rows[0].user}@${cannaiqInfo.rows[0].db}`);
log(`SOURCE (legacy): ${legacyInfo.rows[0].user}@${legacyInfo.rows[0].db}`);
log('');
// Verify we're not writing to legacy
if (legacyInfo.rows[0].db === cannaiqInfo.rows[0].db) {
throw new Error(
'SAFETY CHECK FAILED: Source and destination are the same database!\n' +
'CANNAIQ_DB_NAME must be different from LEGACY_DB_NAME.'
);
}
// Run steps
await backfillStateIds();
log('');
await insertChains();
log('');
await linkDispensariesToChains();
log('');
await insertBrands();
log('');
await insertStoreProducts();
log('');
await linkStoreProductsToBrands();
log('');
await insertStoreProductSnapshots();
log('');
await linkSnapshotsToStoreProducts();
log('');
// Final summary (from cannaiq db)
log('='.repeat(60));
log('SUMMARY (from dutchie_menus)');
log('='.repeat(60));
const summaryQueries = [
{ table: 'states', query: 'SELECT COUNT(*) FROM states' },
{ table: 'chains', query: 'SELECT COUNT(*) FROM chains' },
{ table: 'brands', query: 'SELECT COUNT(*) FROM brands' },
{ table: 'dispensaries (with state_id)', query: 'SELECT COUNT(*) FROM dispensaries WHERE state_id IS NOT NULL' },
{ table: 'dispensaries (with chain_id)', query: 'SELECT COUNT(*) FROM dispensaries WHERE chain_id IS NOT NULL' },
{ table: 'store_products', query: 'SELECT COUNT(*) FROM store_products' },
{ table: 'store_products (with brand_id)', query: 'SELECT COUNT(*) FROM store_products WHERE brand_id IS NOT NULL' },
{ table: 'store_product_snapshots', query: 'SELECT COUNT(*) FROM store_product_snapshots' },
{ table: 'store_product_snapshots (with store_product_id)', query: 'SELECT COUNT(*) FROM store_product_snapshots WHERE store_product_id IS NOT NULL' },
];
for (const sq of summaryQueries) {
const result = await cannaiqPool.query(sq.query);
log(` ${sq.table}: ${result.rows[0].count}`);
}
log('');
log('Legacy import complete!');
} catch (error: any) {
log(`ERROR: ${error.message}`);
console.error(error);
process.exit(1);
} finally {
await cannaiqPool.end();
await legacyPool.end();
}
}
// Run
main();

View File

@@ -0,0 +1,749 @@
/**
* Legacy Data Import ETL Script
*
* DEPRECATED: This script assumed a two-database architecture.
*
* CURRENT ARCHITECTURE (Single Database):
* - All data lives in ONE database: cannaiq (cannaiq-postgres container)
* - Legacy tables exist INSIDE this same database with namespaced prefixes (e.g., legacy_*)
* - The only database is: cannaiq (in cannaiq-postgres container)
*
* If you need to import legacy data:
* 1. Import into namespaced tables (legacy_dispensaries, legacy_products, etc.)
* inside the main cannaiq database
* 2. Use the canonical connection from src/dutchie-az/db/connection.ts
*
* SAFETY RULES:
* - INSERT-ONLY: No UPDATE, no DELETE, no TRUNCATE
* - ON CONFLICT DO NOTHING: Skip duplicates, never overwrite
* - Batch Processing: 500-1000 rows per batch
* - Manual Invocation Only: Requires explicit user execution
*/
import { Pool, PoolClient } from 'pg';
// ============================================================
// CONFIGURATION
// ============================================================
const BATCH_SIZE = 500;
interface ETLConfig {
dryRun: boolean;
tables: string[];
}
interface ETLStats {
table: string;
read: number;
inserted: number;
skipped: number;
errors: number;
durationMs: number;
}
// Parse command line arguments
function parseArgs(): ETLConfig {
const args = process.argv.slice(2);
const config: ETLConfig = {
dryRun: false,
tables: ['dispensaries', 'products', 'dutchie_products', 'dutchie_product_snapshots'],
};
for (const arg of args) {
if (arg === '--dry-run') {
config.dryRun = true;
} else if (arg.startsWith('--tables=')) {
config.tables = arg.replace('--tables=', '').split(',');
}
}
return config;
}
// ============================================================
// DATABASE CONNECTIONS
// ============================================================
// DEPRECATED: Both pools point to the same database (cannaiq)
// Legacy tables exist inside the main database with namespaced prefixes
function createLegacyPool(): Pool {
return new Pool({
host: process.env.CANNAIQ_DB_HOST || 'localhost',
port: parseInt(process.env.CANNAIQ_DB_PORT || '54320'),
user: process.env.CANNAIQ_DB_USER || 'dutchie',
password: process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass',
database: process.env.CANNAIQ_DB_NAME || 'cannaiq',
max: 5,
});
}
function createCannaiqPool(): Pool {
return new Pool({
host: process.env.CANNAIQ_DB_HOST || 'localhost',
port: parseInt(process.env.CANNAIQ_DB_PORT || '54320'),
user: process.env.CANNAIQ_DB_USER || 'dutchie',
password: process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass',
database: process.env.CANNAIQ_DB_NAME || 'cannaiq',
max: 5,
});
}
// ============================================================
// STAGING TABLE CREATION
// ============================================================
const STAGING_TABLES_SQL = `
-- Staging table for legacy dispensaries
CREATE TABLE IF NOT EXISTS dispensaries_from_legacy (
id SERIAL PRIMARY KEY,
legacy_id INTEGER NOT NULL,
name VARCHAR(255) NOT NULL,
slug VARCHAR(255) NOT NULL,
city VARCHAR(100) NOT NULL,
state VARCHAR(10) NOT NULL,
postal_code VARCHAR(20),
address TEXT,
latitude DECIMAL(10,7),
longitude DECIMAL(10,7),
menu_url TEXT,
website TEXT,
legacy_metadata JSONB,
imported_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(legacy_id)
);
-- Staging table for legacy products
CREATE TABLE IF NOT EXISTS products_from_legacy (
id SERIAL PRIMARY KEY,
legacy_product_id INTEGER NOT NULL,
legacy_dispensary_id INTEGER,
external_product_id VARCHAR(255),
name VARCHAR(500) NOT NULL,
brand_name VARCHAR(255),
type VARCHAR(100),
subcategory VARCHAR(100),
strain_type VARCHAR(50),
thc DECIMAL(10,4),
cbd DECIMAL(10,4),
price_cents INTEGER,
original_price_cents INTEGER,
stock_status VARCHAR(20),
weight VARCHAR(100),
primary_image_url TEXT,
first_seen_at TIMESTAMPTZ,
last_seen_at TIMESTAMPTZ,
legacy_raw_payload JSONB,
imported_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(legacy_product_id)
);
-- Staging table for legacy price history
CREATE TABLE IF NOT EXISTS price_history_legacy (
id SERIAL PRIMARY KEY,
legacy_product_id INTEGER NOT NULL,
price_cents INTEGER,
recorded_at TIMESTAMPTZ,
imported_at TIMESTAMPTZ DEFAULT NOW()
);
-- Index for efficient lookups
CREATE INDEX IF NOT EXISTS idx_disp_legacy_slug ON dispensaries_from_legacy(slug, city, state);
CREATE INDEX IF NOT EXISTS idx_prod_legacy_ext_id ON products_from_legacy(external_product_id);
`;
async function createStagingTables(cannaiqPool: Pool, dryRun: boolean): Promise<void> {
console.log('[ETL] Creating staging tables...');
if (dryRun) {
console.log('[ETL] DRY RUN: Would create staging tables');
return;
}
const client = await cannaiqPool.connect();
try {
await client.query(STAGING_TABLES_SQL);
console.log('[ETL] Staging tables created successfully');
} finally {
client.release();
}
}
// ============================================================
// ETL FUNCTIONS
// ============================================================
async function importDispensaries(
legacyPool: Pool,
cannaiqPool: Pool,
dryRun: boolean
): Promise<ETLStats> {
const startTime = Date.now();
const stats: ETLStats = {
table: 'dispensaries',
read: 0,
inserted: 0,
skipped: 0,
errors: 0,
durationMs: 0,
};
console.log('[ETL] Importing dispensaries...');
const legacyClient = await legacyPool.connect();
const cannaiqClient = await cannaiqPool.connect();
try {
// Count total rows
const countResult = await legacyClient.query('SELECT COUNT(*) FROM dispensaries');
const totalRows = parseInt(countResult.rows[0].count);
console.log(`[ETL] Found ${totalRows} dispensaries in legacy database`);
// Process in batches
let offset = 0;
while (offset < totalRows) {
const batchResult = await legacyClient.query(`
SELECT
id, name, slug, city, state, zip, address,
latitude, longitude, menu_url, website, dba_name,
menu_provider, product_provider, provider_detection_data
FROM dispensaries
ORDER BY id
LIMIT $1 OFFSET $2
`, [BATCH_SIZE, offset]);
stats.read += batchResult.rows.length;
if (dryRun) {
console.log(`[ETL] DRY RUN: Would insert batch of ${batchResult.rows.length} dispensaries`);
stats.inserted += batchResult.rows.length;
} else {
for (const row of batchResult.rows) {
try {
const legacyMetadata = {
dba_name: row.dba_name,
menu_provider: row.menu_provider,
product_provider: row.product_provider,
provider_detection_data: row.provider_detection_data,
};
const insertResult = await cannaiqClient.query(`
INSERT INTO dispensaries_from_legacy
(legacy_id, name, slug, city, state, postal_code, address,
latitude, longitude, menu_url, website, legacy_metadata)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
ON CONFLICT (legacy_id) DO NOTHING
RETURNING id
`, [
row.id,
row.name,
row.slug,
row.city,
row.state,
row.zip,
row.address,
row.latitude,
row.longitude,
row.menu_url,
row.website,
JSON.stringify(legacyMetadata),
]);
if (insertResult.rowCount > 0) {
stats.inserted++;
} else {
stats.skipped++;
}
} catch (err: any) {
stats.errors++;
console.error(`[ETL] Error inserting dispensary ${row.id}:`, err.message);
}
}
}
offset += BATCH_SIZE;
console.log(`[ETL] Processed ${Math.min(offset, totalRows)}/${totalRows} dispensaries`);
}
} finally {
legacyClient.release();
cannaiqClient.release();
}
stats.durationMs = Date.now() - startTime;
return stats;
}
async function importProducts(
legacyPool: Pool,
cannaiqPool: Pool,
dryRun: boolean
): Promise<ETLStats> {
const startTime = Date.now();
const stats: ETLStats = {
table: 'products',
read: 0,
inserted: 0,
skipped: 0,
errors: 0,
durationMs: 0,
};
console.log('[ETL] Importing legacy products...');
const legacyClient = await legacyPool.connect();
const cannaiqClient = await cannaiqPool.connect();
try {
const countResult = await legacyClient.query('SELECT COUNT(*) FROM products');
const totalRows = parseInt(countResult.rows[0].count);
console.log(`[ETL] Found ${totalRows} products in legacy database`);
let offset = 0;
while (offset < totalRows) {
const batchResult = await legacyClient.query(`
SELECT
id, dispensary_id, dutchie_product_id, name, brand,
subcategory, strain_type, thc_percentage, cbd_percentage,
price, original_price, in_stock, weight, image_url,
first_seen_at, last_seen_at, raw_data
FROM products
ORDER BY id
LIMIT $1 OFFSET $2
`, [BATCH_SIZE, offset]);
stats.read += batchResult.rows.length;
if (dryRun) {
console.log(`[ETL] DRY RUN: Would insert batch of ${batchResult.rows.length} products`);
stats.inserted += batchResult.rows.length;
} else {
for (const row of batchResult.rows) {
try {
const stockStatus = row.in_stock === true ? 'in_stock' :
row.in_stock === false ? 'out_of_stock' : 'unknown';
const priceCents = row.price ? Math.round(parseFloat(row.price) * 100) : null;
const originalPriceCents = row.original_price ? Math.round(parseFloat(row.original_price) * 100) : null;
const insertResult = await cannaiqClient.query(`
INSERT INTO products_from_legacy
(legacy_product_id, legacy_dispensary_id, external_product_id,
name, brand_name, subcategory, strain_type, thc, cbd,
price_cents, original_price_cents, stock_status, weight,
primary_image_url, first_seen_at, last_seen_at, legacy_raw_payload)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17)
ON CONFLICT (legacy_product_id) DO NOTHING
RETURNING id
`, [
row.id,
row.dispensary_id,
row.dutchie_product_id,
row.name,
row.brand,
row.subcategory,
row.strain_type,
row.thc_percentage,
row.cbd_percentage,
priceCents,
originalPriceCents,
stockStatus,
row.weight,
row.image_url,
row.first_seen_at,
row.last_seen_at,
row.raw_data ? JSON.stringify(row.raw_data) : null,
]);
if (insertResult.rowCount > 0) {
stats.inserted++;
} else {
stats.skipped++;
}
} catch (err: any) {
stats.errors++;
console.error(`[ETL] Error inserting product ${row.id}:`, err.message);
}
}
}
offset += BATCH_SIZE;
console.log(`[ETL] Processed ${Math.min(offset, totalRows)}/${totalRows} products`);
}
} finally {
legacyClient.release();
cannaiqClient.release();
}
stats.durationMs = Date.now() - startTime;
return stats;
}
async function importDutchieProducts(
legacyPool: Pool,
cannaiqPool: Pool,
dryRun: boolean
): Promise<ETLStats> {
const startTime = Date.now();
const stats: ETLStats = {
table: 'dutchie_products',
read: 0,
inserted: 0,
skipped: 0,
errors: 0,
durationMs: 0,
};
console.log('[ETL] Importing dutchie_products...');
const legacyClient = await legacyPool.connect();
const cannaiqClient = await cannaiqPool.connect();
try {
const countResult = await legacyClient.query('SELECT COUNT(*) FROM dutchie_products');
const totalRows = parseInt(countResult.rows[0].count);
console.log(`[ETL] Found ${totalRows} dutchie_products in legacy database`);
// Note: For dutchie_products, we need to map dispensary_id to the canonical dispensary
// This requires the dispensaries to be imported first
// For now, we'll insert directly since the schema is nearly identical
let offset = 0;
while (offset < totalRows) {
const batchResult = await legacyClient.query(`
SELECT *
FROM dutchie_products
ORDER BY id
LIMIT $1 OFFSET $2
`, [BATCH_SIZE, offset]);
stats.read += batchResult.rows.length;
if (dryRun) {
console.log(`[ETL] DRY RUN: Would insert batch of ${batchResult.rows.length} dutchie_products`);
stats.inserted += batchResult.rows.length;
} else {
// For each row, attempt insert with ON CONFLICT DO NOTHING
for (const row of batchResult.rows) {
try {
// Check if dispensary exists in canonical table
const dispCheck = await cannaiqClient.query(`
SELECT id FROM dispensaries WHERE id = $1
`, [row.dispensary_id]);
if (dispCheck.rows.length === 0) {
stats.skipped++;
continue; // Skip products for dispensaries not yet imported
}
const insertResult = await cannaiqClient.query(`
INSERT INTO dutchie_products
(dispensary_id, platform, external_product_id, platform_dispensary_id,
c_name, name, brand_name, brand_id, brand_logo_url,
type, subcategory, strain_type, provider,
thc, thc_content, cbd, cbd_content, cannabinoids_v2, effects,
status, medical_only, rec_only, featured, coming_soon,
certificate_of_analysis_enabled,
is_below_threshold, is_below_kiosk_threshold,
options_below_threshold, options_below_kiosk_threshold,
stock_status, total_quantity_available,
primary_image_url, images, measurements, weight, past_c_names,
created_at_dutchie, updated_at_dutchie, latest_raw_payload)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35, $36, $37, $38, $39)
ON CONFLICT (dispensary_id, external_product_id) DO NOTHING
RETURNING id
`, [
row.dispensary_id,
row.platform || 'dutchie',
row.external_product_id,
row.platform_dispensary_id,
row.c_name,
row.name,
row.brand_name,
row.brand_id,
row.brand_logo_url,
row.type,
row.subcategory,
row.strain_type,
row.provider,
row.thc,
row.thc_content,
row.cbd,
row.cbd_content,
row.cannabinoids_v2,
row.effects,
row.status,
row.medical_only,
row.rec_only,
row.featured,
row.coming_soon,
row.certificate_of_analysis_enabled,
row.is_below_threshold,
row.is_below_kiosk_threshold,
row.options_below_threshold,
row.options_below_kiosk_threshold,
row.stock_status,
row.total_quantity_available,
row.primary_image_url,
row.images,
row.measurements,
row.weight,
row.past_c_names,
row.created_at_dutchie,
row.updated_at_dutchie,
row.latest_raw_payload,
]);
if (insertResult.rowCount > 0) {
stats.inserted++;
} else {
stats.skipped++;
}
} catch (err: any) {
stats.errors++;
if (stats.errors <= 5) {
console.error(`[ETL] Error inserting dutchie_product ${row.id}:`, err.message);
}
}
}
}
offset += BATCH_SIZE;
console.log(`[ETL] Processed ${Math.min(offset, totalRows)}/${totalRows} dutchie_products`);
}
} finally {
legacyClient.release();
cannaiqClient.release();
}
stats.durationMs = Date.now() - startTime;
return stats;
}
async function importDutchieSnapshots(
legacyPool: Pool,
cannaiqPool: Pool,
dryRun: boolean
): Promise<ETLStats> {
const startTime = Date.now();
const stats: ETLStats = {
table: 'dutchie_product_snapshots',
read: 0,
inserted: 0,
skipped: 0,
errors: 0,
durationMs: 0,
};
console.log('[ETL] Importing dutchie_product_snapshots...');
const legacyClient = await legacyPool.connect();
const cannaiqClient = await cannaiqPool.connect();
try {
const countResult = await legacyClient.query('SELECT COUNT(*) FROM dutchie_product_snapshots');
const totalRows = parseInt(countResult.rows[0].count);
console.log(`[ETL] Found ${totalRows} dutchie_product_snapshots in legacy database`);
// Build mapping of legacy product IDs to canonical product IDs
console.log('[ETL] Building product ID mapping...');
const productMapping = new Map<number, number>();
const mappingResult = await cannaiqClient.query(`
SELECT id, external_product_id, dispensary_id FROM dutchie_products
`);
// Create a key from dispensary_id + external_product_id
const productByKey = new Map<string, number>();
for (const row of mappingResult.rows) {
const key = `${row.dispensary_id}:${row.external_product_id}`;
productByKey.set(key, row.id);
}
let offset = 0;
while (offset < totalRows) {
const batchResult = await legacyClient.query(`
SELECT *
FROM dutchie_product_snapshots
ORDER BY id
LIMIT $1 OFFSET $2
`, [BATCH_SIZE, offset]);
stats.read += batchResult.rows.length;
if (dryRun) {
console.log(`[ETL] DRY RUN: Would insert batch of ${batchResult.rows.length} snapshots`);
stats.inserted += batchResult.rows.length;
} else {
for (const row of batchResult.rows) {
try {
// Map legacy product ID to canonical product ID
const key = `${row.dispensary_id}:${row.external_product_id}`;
const canonicalProductId = productByKey.get(key);
if (!canonicalProductId) {
stats.skipped++;
continue; // Skip snapshots for products not yet imported
}
// Insert snapshot (no conflict handling - all snapshots are historical)
await cannaiqClient.query(`
INSERT INTO dutchie_product_snapshots
(dutchie_product_id, dispensary_id, platform_dispensary_id,
external_product_id, pricing_type, crawl_mode,
status, featured, special, medical_only, rec_only,
is_present_in_feed, stock_status,
rec_min_price_cents, rec_max_price_cents, rec_min_special_price_cents,
med_min_price_cents, med_max_price_cents, med_min_special_price_cents,
wholesale_min_price_cents,
total_quantity_available, total_kiosk_quantity_available,
manual_inventory, is_below_threshold, is_below_kiosk_threshold,
options, raw_payload, crawled_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28)
`, [
canonicalProductId,
row.dispensary_id,
row.platform_dispensary_id,
row.external_product_id,
row.pricing_type,
row.crawl_mode,
row.status,
row.featured,
row.special,
row.medical_only,
row.rec_only,
row.is_present_in_feed,
row.stock_status,
row.rec_min_price_cents,
row.rec_max_price_cents,
row.rec_min_special_price_cents,
row.med_min_price_cents,
row.med_max_price_cents,
row.med_min_special_price_cents,
row.wholesale_min_price_cents,
row.total_quantity_available,
row.total_kiosk_quantity_available,
row.manual_inventory,
row.is_below_threshold,
row.is_below_kiosk_threshold,
row.options,
row.raw_payload,
row.crawled_at,
]);
stats.inserted++;
} catch (err: any) {
stats.errors++;
if (stats.errors <= 5) {
console.error(`[ETL] Error inserting snapshot ${row.id}:`, err.message);
}
}
}
}
offset += BATCH_SIZE;
console.log(`[ETL] Processed ${Math.min(offset, totalRows)}/${totalRows} snapshots`);
}
} finally {
legacyClient.release();
cannaiqClient.release();
}
stats.durationMs = Date.now() - startTime;
return stats;
}
// ============================================================
// MAIN
// ============================================================
async function main(): Promise<void> {
console.log('='.repeat(60));
console.log('LEGACY DATA IMPORT ETL');
console.log('='.repeat(60));
const config = parseArgs();
console.log(`Mode: ${config.dryRun ? 'DRY RUN' : 'LIVE'}`);
console.log(`Tables: ${config.tables.join(', ')}`);
console.log('');
// Create connection pools
const legacyPool = createLegacyPool();
const cannaiqPool = createCannaiqPool();
try {
// Test connections
console.log('[ETL] Testing database connections...');
await legacyPool.query('SELECT 1');
console.log('[ETL] Legacy database connected');
await cannaiqPool.query('SELECT 1');
console.log('[ETL] CannaiQ database connected');
console.log('');
// Create staging tables
await createStagingTables(cannaiqPool, config.dryRun);
console.log('');
// Run imports
const allStats: ETLStats[] = [];
if (config.tables.includes('dispensaries')) {
const stats = await importDispensaries(legacyPool, cannaiqPool, config.dryRun);
allStats.push(stats);
console.log('');
}
if (config.tables.includes('products')) {
const stats = await importProducts(legacyPool, cannaiqPool, config.dryRun);
allStats.push(stats);
console.log('');
}
if (config.tables.includes('dutchie_products')) {
const stats = await importDutchieProducts(legacyPool, cannaiqPool, config.dryRun);
allStats.push(stats);
console.log('');
}
if (config.tables.includes('dutchie_product_snapshots')) {
const stats = await importDutchieSnapshots(legacyPool, cannaiqPool, config.dryRun);
allStats.push(stats);
console.log('');
}
// Print summary
console.log('='.repeat(60));
console.log('IMPORT SUMMARY');
console.log('='.repeat(60));
console.log('');
console.log('| Table | Read | Inserted | Skipped | Errors | Duration |');
console.log('|----------------------------|----------|----------|----------|----------|----------|');
for (const s of allStats) {
console.log(`| ${s.table.padEnd(26)} | ${String(s.read).padStart(8)} | ${String(s.inserted).padStart(8)} | ${String(s.skipped).padStart(8)} | ${String(s.errors).padStart(8)} | ${(s.durationMs / 1000).toFixed(1).padStart(7)}s |`);
}
console.log('');
const totalInserted = allStats.reduce((sum, s) => sum + s.inserted, 0);
const totalErrors = allStats.reduce((sum, s) => sum + s.errors, 0);
console.log(`Total inserted: ${totalInserted}`);
console.log(`Total errors: ${totalErrors}`);
if (config.dryRun) {
console.log('');
console.log('DRY RUN COMPLETE - No data was written');
console.log('Run without --dry-run to perform actual import');
}
} catch (error: any) {
console.error('[ETL] Fatal error:', error.message);
process.exit(1);
} finally {
await legacyPool.end();
await cannaiqPool.end();
}
console.log('');
console.log('ETL complete');
}
main().catch((err) => {
console.error('Unhandled error:', err);
process.exit(1);
});

View File

@@ -1,4 +1,4 @@
import { pool } from '../db/migrate';
import { pool } from '../db/pool';
import { getActiveProxy, putProxyInTimeout, isBotDetectionError } from '../services/proxy';
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';

View File

@@ -13,7 +13,7 @@
* npx tsx src/scripts/queue-dispensaries.ts --process # Process queued jobs
*/
import { pool } from '../db/migrate';
import { pool } from '../db/pool';
import { logger } from '../services/logger';
import {
runDetectMenuProviderJob,

View File

@@ -17,7 +17,7 @@
* npx tsx src/scripts/queue-intelligence.ts --dry-run
*/
import { pool } from '../db/migrate';
import { pool } from '../db/pool';
import { logger } from '../services/logger';
import {
detectMultiCategoryProviders,

View File

@@ -0,0 +1,173 @@
#!/usr/bin/env npx tsx
/**
* Dutchie Platform ID Resolver
*
* Standalone script to resolve a Dutchie dispensary slug to its platform ID.
*
* USAGE:
* npx tsx src/scripts/resolve-dutchie-id.ts <slug>
* npx tsx src/scripts/resolve-dutchie-id.ts hydroman-dispensary
* npx tsx src/scripts/resolve-dutchie-id.ts AZ-Deeply-Rooted
*
* RESOLUTION STRATEGY:
* 1. Navigate to https://dutchie.com/embedded-menu/{slug} via Puppeteer
* 2. Extract window.reactEnv.dispensaryId (preferred - fastest)
* 3. If reactEnv fails, call GraphQL GetAddressBasedDispensaryData as fallback
*
* OUTPUT:
* - dispensaryId: The MongoDB ObjectId (e.g., "6405ef617056e8014d79101b")
* - source: "reactEnv" or "graphql"
* - httpStatus: HTTP status from embedded menu page
* - error: Error message if resolution failed
*/
import { resolveDispensaryIdWithDetails, ResolveDispensaryResult } from '../dutchie-az/services/graphql-client';
async function main() {
const args = process.argv.slice(2);
if (args.length === 0 || args.includes('--help') || args.includes('-h')) {
console.log(`
Dutchie Platform ID Resolver
Usage:
npx tsx src/scripts/resolve-dutchie-id.ts <slug>
Examples:
npx tsx src/scripts/resolve-dutchie-id.ts hydroman-dispensary
npx tsx src/scripts/resolve-dutchie-id.ts AZ-Deeply-Rooted
npx tsx src/scripts/resolve-dutchie-id.ts mint-cannabis
Resolution Strategy:
1. Puppeteer navigates to https://dutchie.com/embedded-menu/{slug}
2. Extracts window.reactEnv.dispensaryId (preferred)
3. Falls back to GraphQL GetAddressBasedDispensaryData if needed
Output Fields:
- dispensaryId: MongoDB ObjectId (e.g., "6405ef617056e8014d79101b")
- source: "reactEnv" (from page) or "graphql" (from API)
- httpStatus: HTTP status code from page load
- error: Error message if resolution failed
`);
process.exit(0);
}
const slug = args[0];
console.log('='.repeat(60));
console.log('DUTCHIE PLATFORM ID RESOLVER');
console.log('='.repeat(60));
console.log(`Slug: ${slug}`);
console.log(`Embedded Menu URL: https://dutchie.com/embedded-menu/${slug}`);
console.log('');
console.log('Resolving...');
console.log('');
const startTime = Date.now();
try {
const result: ResolveDispensaryResult = await resolveDispensaryIdWithDetails(slug);
const duration = Date.now() - startTime;
console.log('='.repeat(60));
console.log('RESOLUTION RESULT');
console.log('='.repeat(60));
if (result.dispensaryId) {
console.log(`✓ SUCCESS`);
console.log('');
console.log(` Dispensary ID: ${result.dispensaryId}`);
console.log(` Source: ${result.source}`);
console.log(` HTTP Status: ${result.httpStatus || 'N/A'}`);
console.log(` Duration: ${duration}ms`);
console.log('');
// Show how to use this ID
console.log('='.repeat(60));
console.log('USAGE');
console.log('='.repeat(60));
console.log('');
console.log('Use this ID in GraphQL FilteredProducts query:');
console.log('');
console.log(' POST https://dutchie.com/api-3/graphql');
console.log('');
console.log(' Body:');
console.log(` {
"operationName": "FilteredProducts",
"variables": {
"productsFilter": {
"dispensaryId": "${result.dispensaryId}",
"pricingType": "rec",
"Status": "Active"
},
"page": 0,
"perPage": 100
},
"extensions": {
"persistedQuery": {
"version": 1,
"sha256Hash": "ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0"
}
}
}`);
console.log('');
// Output for piping/scripting
console.log('='.repeat(60));
console.log('JSON OUTPUT');
console.log('='.repeat(60));
console.log(JSON.stringify({
success: true,
slug,
dispensaryId: result.dispensaryId,
source: result.source,
httpStatus: result.httpStatus,
durationMs: duration,
}, null, 2));
} else {
console.log(`✗ FAILED`);
console.log('');
console.log(` Error: ${result.error || 'Unknown error'}`);
console.log(` HTTP Status: ${result.httpStatus || 'N/A'}`);
console.log(` Duration: ${duration}ms`);
console.log('');
if (result.httpStatus === 403 || result.httpStatus === 404) {
console.log('NOTE: This store may be removed or not accessible on Dutchie.');
console.log(' Mark dispensary as not_crawlable in the database.');
}
console.log('');
console.log('JSON OUTPUT:');
console.log(JSON.stringify({
success: false,
slug,
error: result.error,
httpStatus: result.httpStatus,
durationMs: duration,
}, null, 2));
process.exit(1);
}
} catch (error: any) {
const duration = Date.now() - startTime;
console.error('='.repeat(60));
console.error('ERROR');
console.error('='.repeat(60));
console.error(`Message: ${error.message}`);
console.error(`Duration: ${duration}ms`);
console.error('');
if (error.message.includes('net::ERR_NAME_NOT_RESOLVED')) {
console.error('NOTE: DNS resolution failed. This typically happens when running');
console.error(' locally due to network restrictions. Try running from the');
console.error(' Kubernetes pod or a cloud environment.');
}
process.exit(1);
}
}
main();

View File

@@ -0,0 +1,105 @@
#!/usr/bin/env npx tsx
/**
* Run Backfill CLI
*
* Import historical payloads from existing data sources.
*
* Usage:
* npx tsx src/scripts/run-backfill.ts [options]
*
* Options:
* --source SOURCE Source to backfill from:
* - dutchie_products (default)
* - snapshots
* - cache_files
* - all
* --dry-run Print changes without modifying DB
* --limit N Max payloads to create (default: unlimited)
* --dispensary ID Only backfill specific dispensary
* --cache-path PATH Path to cache files (default: ./cache/payloads)
*/
import { Pool } from 'pg';
import { runBackfill, BackfillOptions } from '../hydration';
async function main() {
const args = process.argv.slice(2);
const dryRun = args.includes('--dry-run');
let source: BackfillOptions['source'] = 'dutchie_products';
const sourceIdx = args.indexOf('--source');
if (sourceIdx !== -1 && args[sourceIdx + 1]) {
source = args[sourceIdx + 1] as BackfillOptions['source'];
}
let limit: number | undefined;
const limitIdx = args.indexOf('--limit');
if (limitIdx !== -1 && args[limitIdx + 1]) {
limit = parseInt(args[limitIdx + 1], 10);
}
let dispensaryId: number | undefined;
const dispIdx = args.indexOf('--dispensary');
if (dispIdx !== -1 && args[dispIdx + 1]) {
dispensaryId = parseInt(args[dispIdx + 1], 10);
}
let cachePath: string | undefined;
const cacheIdx = args.indexOf('--cache-path');
if (cacheIdx !== -1 && args[cacheIdx + 1]) {
cachePath = args[cacheIdx + 1];
}
const pool = new Pool({
connectionString: process.env.DATABASE_URL,
});
try {
console.log('='.repeat(60));
console.log('BACKFILL RUNNER');
console.log('='.repeat(60));
console.log(`Source: ${source}`);
console.log(`Dry run: ${dryRun}`);
if (limit) console.log(`Limit: ${limit}`);
if (dispensaryId) console.log(`Dispensary: ${dispensaryId}`);
if (cachePath) console.log(`Cache path: ${cachePath}`);
console.log('');
const results = await runBackfill(pool, {
dryRun,
source,
limit,
dispensaryId,
cachePath,
});
console.log('\nBackfill Results:');
console.log('='.repeat(40));
for (const result of results) {
console.log(`\n${result.source}:`);
console.log(` Payloads created: ${result.payloadsCreated}`);
console.log(` Skipped: ${result.skipped}`);
console.log(` Errors: ${result.errors.length}`);
console.log(` Duration: ${result.durationMs}ms`);
if (result.errors.length > 0) {
console.log(' First 5 errors:');
for (const err of result.errors.slice(0, 5)) {
console.log(` - ${err}`);
}
}
}
const totalCreated = results.reduce((sum, r) => sum + r.payloadsCreated, 0);
console.log(`\nTotal payloads created: ${totalCreated}`);
} catch (error: any) {
console.error('Backfill error:', error.message);
process.exit(1);
} finally {
await pool.end();
}
}
main();

View File

@@ -0,0 +1,309 @@
#!/usr/bin/env npx tsx
/**
* Dutchie Discovery CLI
*
* Command-line interface for running the Dutchie store discovery pipeline.
*
* Usage:
* npx tsx src/scripts/run-discovery.ts <command> [options]
*
* Commands:
* discover:state <state> - Discover all stores in a state (e.g., AZ)
* discover:city <city> - Discover stores in a single city
* discover:full - Run full discovery pipeline
* seed:cities <state> - Seed known cities for a state
* stats - Show discovery statistics
* list - List discovered locations
*
* Examples:
* npx tsx src/scripts/run-discovery.ts discover:state AZ
* npx tsx src/scripts/run-discovery.ts discover:city phoenix --state AZ
* npx tsx src/scripts/run-discovery.ts seed:cities AZ
* npx tsx src/scripts/run-discovery.ts stats
* npx tsx src/scripts/run-discovery.ts list --status discovered --state AZ
*/
import { Pool } from 'pg';
import {
runFullDiscovery,
discoverCity,
discoverState,
getDiscoveryStats,
seedKnownCities,
ARIZONA_CITIES,
} from '../discovery';
// Parse command line arguments
function parseArgs() {
const args = process.argv.slice(2);
const command = args[0] || 'help';
const positional: string[] = [];
const flags: Record<string, string | boolean> = {};
for (let i = 1; i < args.length; i++) {
const arg = args[i];
if (arg.startsWith('--')) {
const [key, value] = arg.slice(2).split('=');
if (value !== undefined) {
flags[key] = value;
} else if (args[i + 1] && !args[i + 1].startsWith('--')) {
flags[key] = args[i + 1];
i++;
} else {
flags[key] = true;
}
} else {
positional.push(arg);
}
}
return { command, positional, flags };
}
// Create database pool
function createPool(): Pool {
const connectionString = process.env.DATABASE_URL;
if (!connectionString) {
console.error('ERROR: DATABASE_URL environment variable is required');
process.exit(1);
}
return new Pool({ connectionString });
}
// Print help
function printHelp() {
console.log(`
Dutchie Discovery CLI
Usage:
npx tsx src/scripts/run-discovery.ts <command> [options]
Commands:
discover:state <state> Discover all stores in a state (e.g., AZ)
discover:city <city> Discover stores in a single city
discover:full Run full discovery pipeline
seed:cities <state> Seed known cities for a state
stats Show discovery statistics
list List discovered locations
Options:
--state <code> State code (e.g., AZ, CA, ON)
--country <code> Country code (default: US)
--status <status> Filter by status (discovered, verified, rejected, merged)
--limit <n> Limit results (default: varies by command)
--dry-run Don't make any changes, just show what would happen
--verbose Show detailed output
Examples:
npx tsx src/scripts/run-discovery.ts discover:state AZ
npx tsx src/scripts/run-discovery.ts discover:city phoenix --state AZ
npx tsx src/scripts/run-discovery.ts seed:cities AZ
npx tsx src/scripts/run-discovery.ts stats
npx tsx src/scripts/run-discovery.ts list --status discovered --state AZ --limit 20
`);
}
// Main
async function main() {
const { command, positional, flags } = parseArgs();
if (command === 'help' || flags.help) {
printHelp();
process.exit(0);
}
const pool = createPool();
try {
switch (command) {
case 'discover:state': {
const stateCode = positional[0] || (flags.state as string);
if (!stateCode) {
console.error('ERROR: State code is required');
console.error('Usage: discover:state <state>');
process.exit(1);
}
console.log(`\nDiscovering stores in ${stateCode}...\n`);
const result = await discoverState(pool, stateCode.toUpperCase(), {
dryRun: Boolean(flags['dry-run']),
verbose: Boolean(flags.verbose),
cityLimit: flags.limit ? parseInt(flags.limit as string, 10) : 100,
});
console.log('\n=== DISCOVERY RESULTS ===');
console.log(`Cities crawled: ${result.locations.length}`);
console.log(`Locations found: ${result.totalLocationsFound}`);
console.log(`Locations upserted: ${result.totalLocationsUpserted}`);
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
break;
}
case 'discover:city': {
const citySlug = positional[0];
if (!citySlug) {
console.error('ERROR: City slug is required');
console.error('Usage: discover:city <city-slug> [--state AZ]');
process.exit(1);
}
console.log(`\nDiscovering stores in ${citySlug}...\n`);
const result = await discoverCity(pool, citySlug, {
stateCode: flags.state as string,
countryCode: (flags.country as string) || 'US',
dryRun: Boolean(flags['dry-run']),
verbose: Boolean(flags.verbose),
});
if (!result) {
console.error(`City not found: ${citySlug}`);
process.exit(1);
}
console.log('\n=== DISCOVERY RESULTS ===');
console.log(`City: ${result.citySlug}`);
console.log(`Locations found: ${result.locationsFound}`);
console.log(`Locations upserted: ${result.locationsUpserted}`);
console.log(`New: ${result.locationsNew}, Updated: ${result.locationsUpdated}`);
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
if (result.errors.length > 0) {
console.log(`Errors: ${result.errors.length}`);
result.errors.forEach((e) => console.log(` - ${e}`));
}
break;
}
case 'discover:full': {
console.log('\nRunning full discovery pipeline...\n');
const result = await runFullDiscovery(pool, {
stateCode: flags.state as string,
countryCode: (flags.country as string) || 'US',
cityLimit: flags.limit ? parseInt(flags.limit as string, 10) : 50,
skipCityDiscovery: Boolean(flags['skip-cities']),
onlyStale: !flags.all,
staleDays: flags['stale-days'] ? parseInt(flags['stale-days'] as string, 10) : 7,
dryRun: Boolean(flags['dry-run']),
verbose: Boolean(flags.verbose),
});
console.log('\n=== FULL DISCOVERY RESULTS ===');
console.log(`Cities discovered: ${result.cities.citiesFound}`);
console.log(`Cities upserted: ${result.cities.citiesUpserted}`);
console.log(`Cities crawled: ${result.locations.length}`);
console.log(`Total locations found: ${result.totalLocationsFound}`);
console.log(`Total locations upserted: ${result.totalLocationsUpserted}`);
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
break;
}
case 'seed:cities': {
const stateCode = positional[0] || (flags.state as string);
if (!stateCode) {
console.error('ERROR: State code is required');
console.error('Usage: seed:cities <state>');
process.exit(1);
}
let cities: any[] = [];
if (stateCode.toUpperCase() === 'AZ') {
cities = ARIZONA_CITIES;
} else {
console.error(`No predefined cities for state: ${stateCode}`);
console.error('Add cities to city-discovery.ts ARIZONA_CITIES array (or add new state arrays)');
process.exit(1);
}
console.log(`\nSeeding ${cities.length} cities for ${stateCode}...\n`);
const result = await seedKnownCities(pool, cities);
console.log(`Created: ${result.created} new cities`);
console.log(`Updated: ${result.updated} existing cities`);
break;
}
case 'stats': {
console.log('\nFetching discovery statistics...\n');
const stats = await getDiscoveryStats(pool);
console.log('=== CITIES ===');
console.log(`Total: ${stats.cities.total}`);
console.log(`Crawled (24h): ${stats.cities.crawledLast24h}`);
console.log(`Never crawled: ${stats.cities.neverCrawled}`);
console.log('');
console.log('=== LOCATIONS ===');
console.log(`Total active: ${stats.locations.total}`);
console.log(`Discovered: ${stats.locations.discovered}`);
console.log(`Verified: ${stats.locations.verified}`);
console.log(`Merged: ${stats.locations.merged}`);
console.log(`Rejected: ${stats.locations.rejected}`);
console.log('');
console.log('=== BY STATE ===');
stats.locations.byState.forEach((s) => {
console.log(` ${s.stateCode}: ${s.count}`);
});
break;
}
case 'list': {
const status = flags.status as string;
const stateCode = flags.state as string;
const limit = flags.limit ? parseInt(flags.limit as string, 10) : 50;
let whereClause = 'WHERE active = TRUE';
const params: any[] = [];
let paramIndex = 1;
if (status) {
whereClause += ` AND status = $${paramIndex}`;
params.push(status);
paramIndex++;
}
if (stateCode) {
whereClause += ` AND state_code = $${paramIndex}`;
params.push(stateCode.toUpperCase());
paramIndex++;
}
params.push(limit);
const { rows } = await pool.query(
`
SELECT id, platform, name, city, state_code, status, platform_menu_url, first_seen_at
FROM dutchie_discovery_locations
${whereClause}
ORDER BY first_seen_at DESC
LIMIT $${paramIndex}
`,
params
);
console.log(`\nFound ${rows.length} locations:\n`);
console.log('ID\tStatus\t\tState\tCity\t\tName');
console.log('-'.repeat(80));
rows.forEach((row: any) => {
const cityDisplay = (row.city || '').substring(0, 12).padEnd(12);
const nameDisplay = (row.name || '').substring(0, 30);
console.log(
`${row.id}\t${row.status.padEnd(12)}\t${row.state_code || 'N/A'}\t${cityDisplay}\t${nameDisplay}`
);
});
break;
}
default:
console.error(`Unknown command: ${command}`);
printHelp();
process.exit(1);
}
} catch (error: any) {
console.error('ERROR:', error.message);
if (flags.verbose) {
console.error(error.stack);
}
process.exit(1);
} finally {
await pool.end();
}
}
main();

View File

@@ -1,5 +1,8 @@
/**
* Run Dutchie GraphQL Scrape
* LEGACY SCRIPT - Run Dutchie GraphQL Scrape
*
* DEPRECATED: This script creates its own database pool.
* Future implementations should use the CannaiQ API endpoints instead.
*
* This script demonstrates the full pipeline:
* 1. Puppeteer navigates to Dutchie menu
@@ -7,12 +10,21 @@
* 3. Products are normalized to our schema
* 4. Products are upserted to database
* 5. Derived views (brands, categories, specials) are automatically updated
*
* DO NOT:
* - Add this to package.json scripts
* - Run this in automated jobs
* - Use DATABASE_URL directly
*/
import { Pool } from 'pg';
import { scrapeDutchieMenu } from '../scrapers/dutchie-graphql';
const DATABASE_URL = process.env.DATABASE_URL || 'postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus';
console.warn('\n⚠ LEGACY SCRIPT: This script should be replaced with CannaiQ API calls.\n');
// Single database connection (cannaiq in cannaiq-postgres container)
const DATABASE_URL = process.env.CANNAIQ_DB_URL ||
`postgresql://${process.env.CANNAIQ_DB_USER || 'dutchie'}:${process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass'}@${process.env.CANNAIQ_DB_HOST || 'localhost'}:${process.env.CANNAIQ_DB_PORT || '54320'}/${process.env.CANNAIQ_DB_NAME || 'cannaiq'}`;
async function main() {
const pool = new Pool({ connectionString: DATABASE_URL });

View File

@@ -0,0 +1,510 @@
#!/usr/bin/env npx tsx
/**
* Unified Hydration CLI
*
* Central entrypoint for all hydration operations:
*
* MODES:
* payload - Process raw_payloads → canonical tables (existing behavior)
* backfill - Migrate dutchie_* → canonical tables (legacy backfill)
* sync - Sync recent crawls to canonical tables
* status - Show hydration progress
*
* Usage:
* npx tsx src/scripts/run-hydration.ts --mode=<mode> [options]
*
* Examples:
* # Payload-based hydration (default)
* npx tsx src/scripts/run-hydration.ts --mode=payload
*
* # Full legacy backfill
* npx tsx src/scripts/run-hydration.ts --mode=backfill
*
* # Backfill single dispensary
* npx tsx src/scripts/run-hydration.ts --mode=backfill --store=123
*
* # Sync recent crawls
* npx tsx src/scripts/run-hydration.ts --mode=sync --since="2 hours"
*
* # Check status
* npx tsx src/scripts/run-hydration.ts --mode=status
*/
import { Pool } from 'pg';
import dotenv from 'dotenv';
import {
HydrationWorker,
runHydrationBatch,
processPayloadById,
reprocessFailedPayloads,
getPayloadStats,
} from '../hydration';
import { runLegacyBackfill } from '../hydration/legacy-backfill';
import { syncRecentCrawls } from '../hydration/incremental-sync';
dotenv.config();
// ============================================================
// ARGUMENT PARSING
// ============================================================
interface CliArgs {
mode: 'payload' | 'backfill' | 'sync' | 'status';
store?: number;
since?: string;
dryRun: boolean;
verbose: boolean;
limit: number;
loop: boolean;
reprocess: boolean;
payloadId?: string;
startFrom?: number;
}
function parseArgs(): CliArgs {
const args = process.argv.slice(2);
// Defaults
const result: CliArgs = {
mode: 'payload',
dryRun: args.includes('--dry-run'),
verbose: args.includes('--verbose') || args.includes('-v'),
limit: 50,
loop: args.includes('--loop'),
reprocess: args.includes('--reprocess'),
};
// Parse --mode=<value>
const modeArg = args.find(a => a.startsWith('--mode='));
if (modeArg) {
const mode = modeArg.split('=')[1];
if (['payload', 'backfill', 'sync', 'status'].includes(mode)) {
result.mode = mode as CliArgs['mode'];
}
}
// Parse --store=<id>
const storeArg = args.find(a => a.startsWith('--store='));
if (storeArg) {
result.store = parseInt(storeArg.split('=')[1], 10);
}
// Parse --since=<value>
const sinceArg = args.find(a => a.startsWith('--since='));
if (sinceArg) {
result.since = sinceArg.split('=')[1];
}
// Parse --limit=<value> or --limit <value>
const limitArg = args.find(a => a.startsWith('--limit='));
if (limitArg) {
result.limit = parseInt(limitArg.split('=')[1], 10);
} else {
const limitIdx = args.indexOf('--limit');
if (limitIdx !== -1 && args[limitIdx + 1]) {
result.limit = parseInt(args[limitIdx + 1], 10);
}
}
// Parse --payload=<id> or --payload <id>
const payloadArg = args.find(a => a.startsWith('--payload='));
if (payloadArg) {
result.payloadId = payloadArg.split('=')[1];
} else {
const payloadIdx = args.indexOf('--payload');
if (payloadIdx !== -1 && args[payloadIdx + 1]) {
result.payloadId = args[payloadIdx + 1];
}
}
// Parse --start-from=<id>
const startArg = args.find(a => a.startsWith('--start-from='));
if (startArg) {
result.startFrom = parseInt(startArg.split('=')[1], 10);
}
return result;
}
// ============================================================
// DATABASE CONNECTION
// ============================================================
function getConnectionString(): string {
if (process.env.CANNAIQ_DB_URL) {
return process.env.CANNAIQ_DB_URL;
}
const host = process.env.CANNAIQ_DB_HOST;
const port = process.env.CANNAIQ_DB_PORT;
const name = process.env.CANNAIQ_DB_NAME;
const user = process.env.CANNAIQ_DB_USER;
const pass = process.env.CANNAIQ_DB_PASS;
if (host && port && name && user && pass) {
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
}
// Fallback to DATABASE_URL for local development
if (process.env.DATABASE_URL) {
return process.env.DATABASE_URL;
}
throw new Error('Missing database connection environment variables');
}
// ============================================================
// MODE: PAYLOAD (existing behavior)
// ============================================================
async function runPayloadMode(pool: Pool, args: CliArgs): Promise<void> {
console.log('='.repeat(60));
console.log('HYDRATION - PAYLOAD MODE');
console.log('='.repeat(60));
console.log(`Dry run: ${args.dryRun}`);
console.log(`Batch size: ${args.limit}`);
console.log('');
// Show current stats
try {
const stats = await getPayloadStats(pool);
console.log('Current payload stats:');
console.log(` Total: ${stats.total}`);
console.log(` Processed: ${stats.processed}`);
console.log(` Unprocessed: ${stats.unprocessed}`);
console.log(` Failed: ${stats.failed}`);
console.log('');
} catch {
console.log('Note: raw_payloads table not found or empty');
console.log('');
}
if (args.payloadId) {
// Process specific payload
console.log(`Processing payload: ${args.payloadId}`);
const result = await processPayloadById(pool, args.payloadId, { dryRun: args.dryRun });
console.log('Result:', JSON.stringify(result, null, 2));
} else if (args.reprocess) {
// Reprocess failed payloads
console.log('Reprocessing failed payloads...');
const result = await reprocessFailedPayloads(pool, { dryRun: args.dryRun, batchSize: args.limit });
console.log('Result:', JSON.stringify(result, null, 2));
} else if (args.loop) {
// Run continuous loop
const worker = new HydrationWorker(pool, { dryRun: args.dryRun, batchSize: args.limit });
process.on('SIGINT', () => {
console.log('\nStopping hydration loop...');
worker.stop();
});
await worker.runLoop(30000);
} else {
// Run single batch
const result = await runHydrationBatch(pool, { dryRun: args.dryRun, batchSize: args.limit });
console.log('Batch result:');
console.log(` Payloads processed: ${result.payloadsProcessed}`);
console.log(` Payloads failed: ${result.payloadsFailed}`);
console.log(` Products upserted: ${result.totalProductsUpserted}`);
console.log(` Snapshots created: ${result.totalSnapshotsCreated}`);
console.log(` Brands created: ${result.totalBrandsCreated}`);
console.log(` Duration: ${result.durationMs}ms`);
if (result.errors.length > 0) {
console.log('\nErrors:');
for (const err of result.errors.slice(0, 10)) {
console.log(` ${err.payloadId}: ${err.error}`);
}
}
}
}
// ============================================================
// MODE: BACKFILL (legacy dutchie_* → canonical)
// ============================================================
async function runBackfillMode(pool: Pool, args: CliArgs): Promise<void> {
console.log('='.repeat(60));
console.log('HYDRATION - BACKFILL MODE');
console.log('='.repeat(60));
console.log(`Mode: ${args.dryRun ? 'DRY RUN' : 'LIVE'}`);
if (args.store) {
console.log(`Store: ${args.store}`);
}
if (args.startFrom) {
console.log(`Start from product ID: ${args.startFrom}`);
}
console.log('');
await runLegacyBackfill(pool, {
dryRun: args.dryRun,
verbose: args.verbose,
dispensaryId: args.store,
startFromProductId: args.startFrom,
});
}
// ============================================================
// MODE: SYNC (recent crawls → canonical)
// ============================================================
async function runSyncMode(pool: Pool, args: CliArgs): Promise<void> {
const since = args.since || '1 hour';
console.log('='.repeat(60));
console.log('HYDRATION - SYNC MODE');
console.log('='.repeat(60));
console.log(`Mode: ${args.dryRun ? 'DRY RUN' : 'LIVE'}`);
console.log(`Since: ${since}`);
console.log(`Limit: ${args.limit}`);
if (args.store) {
console.log(`Store: ${args.store}`);
}
console.log('');
const result = await syncRecentCrawls(pool, {
dryRun: args.dryRun,
verbose: args.verbose,
since,
dispensaryId: args.store,
limit: args.limit,
});
console.log('');
console.log('=== Sync Results ===');
console.log(`Crawls synced: ${result.synced}`);
console.log(`Errors: ${result.errors.length}`);
if (result.errors.length > 0) {
console.log('');
console.log('Errors:');
for (const error of result.errors.slice(0, 10)) {
console.log(` - ${error}`);
}
if (result.errors.length > 10) {
console.log(` ... and ${result.errors.length - 10} more`);
}
}
}
// ============================================================
// MODE: STATUS
// ============================================================
async function runStatusMode(pool: Pool): Promise<void> {
console.log('='.repeat(60));
console.log('HYDRATION STATUS');
console.log('='.repeat(60));
console.log('');
// Check if v_hydration_status view exists
const viewExists = await pool.query(`
SELECT EXISTS (
SELECT 1 FROM pg_views WHERE viewname = 'v_hydration_status'
) as exists
`);
if (viewExists.rows[0].exists) {
const { rows } = await pool.query('SELECT * FROM v_hydration_status');
console.log('Hydration Progress:');
console.log('-'.repeat(70));
console.log(
'Table'.padEnd(30) +
'Source'.padEnd(12) +
'Hydrated'.padEnd(12) +
'Progress'
);
console.log('-'.repeat(70));
for (const row of rows) {
const progress = row.hydration_pct ? `${row.hydration_pct}%` : 'N/A';
console.log(
row.source_table.padEnd(30) +
String(row.source_count).padEnd(12) +
String(row.hydrated_count).padEnd(12) +
progress
);
}
console.log('-'.repeat(70));
} else {
console.log('Note: v_hydration_status view not found. Run migration 052 first.');
}
// Get counts from canonical tables
console.log('\nCanonical Table Counts:');
console.log('-'.repeat(40));
const tables = ['store_products', 'store_product_snapshots', 'crawl_runs'];
for (const table of tables) {
try {
const { rows } = await pool.query(`SELECT COUNT(*) as cnt FROM ${table}`);
console.log(`${table}: ${rows[0].cnt}`);
} catch {
console.log(`${table}: (table not found)`);
}
}
// Get legacy table counts
console.log('\nLegacy Table Counts:');
console.log('-'.repeat(40));
const legacyTables = ['dutchie_products', 'dutchie_product_snapshots', 'dispensary_crawl_jobs'];
for (const table of legacyTables) {
try {
const { rows } = await pool.query(`SELECT COUNT(*) as cnt FROM ${table}`);
console.log(`${table}: ${rows[0].cnt}`);
} catch {
console.log(`${table}: (table not found)`);
}
}
// Show recent sync activity
console.log('\nRecent Crawl Runs (last 24h):');
console.log('-'.repeat(40));
try {
const { rows } = await pool.query(`
SELECT status, COUNT(*) as count
FROM crawl_runs
WHERE started_at > NOW() - INTERVAL '24 hours'
GROUP BY status
ORDER BY count DESC
`);
if (rows.length === 0) {
console.log('No crawl runs in last 24 hours');
} else {
for (const row of rows) {
console.log(`${row.status}: ${row.count}`);
}
}
} catch {
console.log('(crawl_runs table not found)');
}
// Payload stats
console.log('\nPayload Hydration:');
console.log('-'.repeat(40));
try {
const stats = await getPayloadStats(pool);
console.log(`Total payloads: ${stats.total}`);
console.log(`Processed: ${stats.processed}`);
console.log(`Unprocessed: ${stats.unprocessed}`);
console.log(`Failed: ${stats.failed}`);
} catch {
console.log('(raw_payloads table not found)');
}
}
// ============================================================
// HELP
// ============================================================
function showHelp(): void {
console.log(`
Unified Hydration CLI
Usage:
npx tsx src/scripts/run-hydration.ts --mode=<mode> [options]
Modes:
payload Process raw_payloads → canonical tables (default)
backfill Migrate dutchie_* → canonical tables
sync Sync recent crawls to canonical tables
status Show hydration progress
Common Options:
--dry-run Print changes without modifying database
--verbose, -v Show detailed progress
--store=<id> Limit to a single dispensary
--limit=<n> Batch size (default: 50)
Payload Mode Options:
--loop Run continuous hydration loop
--reprocess Reprocess failed payloads
--payload=<id> Process a specific payload by ID
Backfill Mode Options:
--start-from=<id> Resume from a specific product ID
Sync Mode Options:
--since=<interval> Time window (default: "1 hour")
Examples: "30 minutes", "2 hours", "1 day"
Examples:
# Full legacy backfill (dutchie_* → canonical)
npx tsx src/scripts/run-hydration.ts --mode=backfill
# Backfill single dispensary (dry run)
npx tsx src/scripts/run-hydration.ts --mode=backfill --store=123 --dry-run
# Sync recent crawls from last 4 hours
npx tsx src/scripts/run-hydration.ts --mode=sync --since="4 hours"
# Sync single dispensary
npx tsx src/scripts/run-hydration.ts --mode=sync --store=123
# Run payload hydration loop
npx tsx src/scripts/run-hydration.ts --mode=payload --loop
# Check hydration status
npx tsx src/scripts/run-hydration.ts --mode=status
`);
}
// ============================================================
// MAIN
// ============================================================
async function main(): Promise<void> {
const rawArgs = process.argv.slice(2);
if (rawArgs.includes('--help') || rawArgs.includes('-h')) {
showHelp();
process.exit(0);
}
const args = parseArgs();
const pool = new Pool({
connectionString: getConnectionString(),
max: 5,
});
try {
// Verify connection
await pool.query('SELECT 1');
console.log('Database connection: OK\n');
switch (args.mode) {
case 'payload':
await runPayloadMode(pool, args);
break;
case 'backfill':
await runBackfillMode(pool, args);
break;
case 'sync':
await runSyncMode(pool, args);
break;
case 'status':
await runStatusMode(pool);
break;
default:
console.error(`Unknown mode: ${args.mode}`);
showHelp();
process.exit(1);
}
} catch (error: any) {
console.error('Error:', error.message);
process.exit(1);
} finally {
await pool.end();
}
}
main();

View File

@@ -0,0 +1,225 @@
/**
* Sandbox Crawl Script for Dispensary 101 (Trulieve Scottsdale)
*
* Runs a full crawl and captures trace data for observability.
* NO automatic promotion or status changes.
*/
import { Pool } from 'pg';
import { crawlDispensaryProducts } from '../dutchie-az/services/product-crawler';
import { Dispensary } from '../dutchie-az/types';
const pool = new Pool({ connectionString: process.env.DATABASE_URL });
async function main() {
console.log('=== SANDBOX CRAWL: Dispensary 101 (Trulieve Scottsdale) ===\n');
const startTime = Date.now();
// Load dispensary from database (only columns that exist in local schema)
const dispResult = await pool.query(`
SELECT id, name, city, state, menu_type, platform_dispensary_id, menu_url
FROM dispensaries
WHERE id = 101
`);
if (!dispResult.rows[0]) {
console.log('ERROR: Dispensary 101 not found');
await pool.end();
return;
}
const row = dispResult.rows[0];
// Map to Dispensary interface (snake_case -> camelCase)
const dispensary: Dispensary = {
id: row.id,
platform: 'dutchie',
name: row.name,
slug: row.name.toLowerCase().replace(/\s+/g, '-'),
city: row.city,
state: row.state,
platformDispensaryId: row.platform_dispensary_id,
menuType: row.menu_type,
menuUrl: row.menu_url,
createdAt: new Date(),
updatedAt: new Date(),
};
console.log('=== DISPENSARY INFO ===');
console.log(`Name: ${dispensary.name}`);
console.log(`Location: ${dispensary.city}, ${dispensary.state}`);
console.log(`Menu Type: ${dispensary.menuType}`);
console.log(`Platform ID: ${dispensary.platformDispensaryId}`);
console.log(`Menu URL: ${dispensary.menuUrl}`);
console.log('');
// Get profile info
const profileResult = await pool.query(`
SELECT id, profile_key, status, config FROM dispensary_crawler_profiles
WHERE dispensary_id = 101
`);
const profile = profileResult.rows[0];
if (profile) {
console.log('=== PROFILE ===');
console.log(`Profile Key: ${profile.profile_key}`);
console.log(`Profile Status: ${profile.status}`);
console.log(`Config: ${JSON.stringify(profile.config, null, 2)}`);
console.log('');
} else {
console.log('=== PROFILE ===');
console.log('No profile found - will use defaults');
console.log('');
}
// Run the crawl
console.log('=== STARTING CRAWL ===');
console.log('Options: useBothModes=true, downloadImages=false (sandbox)');
console.log('');
try {
const result = await crawlDispensaryProducts(dispensary, 'rec', {
useBothModes: true,
downloadImages: false, // Skip images in sandbox mode for speed
});
console.log('');
console.log('=== CRAWL RESULT ===');
console.log(`Success: ${result.success}`);
console.log(`Products Found: ${result.productsFound}`);
console.log(`Products Fetched: ${result.productsFetched}`);
console.log(`Products Upserted: ${result.productsUpserted}`);
console.log(`Snapshots Created: ${result.snapshotsCreated}`);
if (result.errorMessage) {
console.log(`Error: ${result.errorMessage}`);
}
console.log(`Duration: ${result.durationMs}ms`);
console.log('');
// Show sample products from database
if (result.productsUpserted > 0) {
const sampleProducts = await pool.query(`
SELECT
id, name, brand_name, type, subcategory, strain_type,
price_rec, price_rec_original, stock_status, external_product_id
FROM dutchie_products
WHERE dispensary_id = 101
ORDER BY updated_at DESC
LIMIT 10
`);
console.log('=== SAMPLE PRODUCTS (10) ===');
sampleProducts.rows.forEach((p: any, i: number) => {
console.log(`${i + 1}. ${p.name}`);
console.log(` Brand: ${p.brand_name || 'N/A'}`);
console.log(` Type: ${p.type} / ${p.subcategory || 'N/A'}`);
console.log(` Strain: ${p.strain_type || 'N/A'}`);
console.log(` Price: $${p.price_rec || 'N/A'} (orig: $${p.price_rec_original || 'N/A'})`);
console.log(` Stock: ${p.stock_status}`);
console.log(` External ID: ${p.external_product_id}`);
console.log('');
});
// Show field coverage stats
const fieldStats = await pool.query(`
SELECT
COUNT(*) as total,
COUNT(brand_name) as with_brand,
COUNT(type) as with_type,
COUNT(strain_type) as with_strain,
COUNT(price_rec) as with_price,
COUNT(image_url) as with_image,
COUNT(description) as with_description,
COUNT(thc_content) as with_thc,
COUNT(cbd_content) as with_cbd
FROM dutchie_products
WHERE dispensary_id = 101
`);
const stats = fieldStats.rows[0];
console.log('=== FIELD COVERAGE ===');
console.log(`Total products: ${stats.total}`);
console.log(`With brand: ${stats.with_brand} (${Math.round(stats.with_brand / stats.total * 100)}%)`);
console.log(`With type: ${stats.with_type} (${Math.round(stats.with_type / stats.total * 100)}%)`);
console.log(`With strain_type: ${stats.with_strain} (${Math.round(stats.with_strain / stats.total * 100)}%)`);
console.log(`With price_rec: ${stats.with_price} (${Math.round(stats.with_price / stats.total * 100)}%)`);
console.log(`With image_url: ${stats.with_image} (${Math.round(stats.with_image / stats.total * 100)}%)`);
console.log(`With description: ${stats.with_description} (${Math.round(stats.with_description / stats.total * 100)}%)`);
console.log(`With THC: ${stats.with_thc} (${Math.round(stats.with_thc / stats.total * 100)}%)`);
console.log(`With CBD: ${stats.with_cbd} (${Math.round(stats.with_cbd / stats.total * 100)}%)`);
console.log('');
}
// Insert trace record for observability
const traceData = {
crawlResult: result,
dispensaryInfo: {
id: dispensary.id,
name: dispensary.name,
platformDispensaryId: dispensary.platformDispensaryId,
menuUrl: dispensary.menuUrl,
},
profile: profile || null,
timestamp: new Date().toISOString(),
};
await pool.query(`
INSERT INTO crawl_orchestration_traces
(dispensary_id, profile_id, profile_key, crawler_module, mode,
state_at_start, state_at_end, trace, success, products_found,
duration_ms, started_at, completed_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, NOW())
`, [
101,
profile?.id || null,
profile?.profile_key || null,
'product-crawler',
'sandbox',
profile?.status || 'no_profile',
profile?.status || 'no_profile', // No status change in sandbox
JSON.stringify(traceData),
result.success,
result.productsFound,
result.durationMs,
new Date(startTime),
]);
console.log('=== TRACE RECORDED ===');
console.log('Trace saved to crawl_orchestration_traces table');
} catch (error: any) {
console.error('=== CRAWL ERROR ===');
console.error('Error:', error.message);
console.error('Stack:', error.stack);
// Record error trace
await pool.query(`
INSERT INTO crawl_orchestration_traces
(dispensary_id, profile_id, profile_key, crawler_module, mode,
state_at_start, state_at_end, trace, success, error_message,
duration_ms, started_at, completed_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, NOW())
`, [
101,
profile?.id || null,
profile?.profile_key || null,
'product-crawler',
'sandbox',
profile?.status || 'no_profile',
profile?.status || 'no_profile',
JSON.stringify({ error: error.message, stack: error.stack }),
false,
error.message,
Date.now() - startTime,
new Date(startTime),
]);
}
await pool.end();
console.log('=== SANDBOX CRAWL COMPLETE ===');
}
main().catch(e => {
console.error('Fatal error:', e.message);
process.exit(1);
});

View File

@@ -0,0 +1,181 @@
/**
* LEGACY SCRIPT - Sandbox Crawl Test
*
* DEPRECATED: This script uses direct database connections.
* Future implementations should use the CannaiQ API endpoints instead.
*
* This script runs sandbox crawl for a dispensary and captures the full trace.
* It is kept for historical reference and manual testing only.
*
* DO NOT:
* - Add this to package.json scripts
* - Run this in automated jobs
* - Use DATABASE_URL directly
*
* Usage (manual only):
* STORAGE_DRIVER=local npx tsx src/scripts/sandbox-test.ts <dispensary_id>
*
* LOCAL MODE REQUIREMENTS:
* - STORAGE_DRIVER=local
* - STORAGE_BASE_PATH=./storage
* - Local cannaiq-postgres on port 54320
* - NO MinIO, NO Kubernetes
*/
import { query, getClient, closePool } from '../dutchie-az/db/connection';
import { runDispensaryOrchestrator } from '../services/dispensary-orchestrator';
// Verify local mode
function verifyLocalMode(): void {
const storageDriver = process.env.STORAGE_DRIVER || 'local';
const minioEndpoint = process.env.MINIO_ENDPOINT;
console.log('=== LOCAL MODE VERIFICATION ===');
console.log(`STORAGE_DRIVER: ${storageDriver}`);
console.log(`MINIO_ENDPOINT: ${minioEndpoint || 'NOT SET (good)'}`);
console.log(`STORAGE_BASE_PATH: ${process.env.STORAGE_BASE_PATH || './storage'}`);
console.log('DB Connection: Using canonical CannaiQ pool');
if (storageDriver !== 'local') {
console.error('ERROR: STORAGE_DRIVER must be "local"');
process.exit(1);
}
if (minioEndpoint) {
console.error('ERROR: MINIO_ENDPOINT should NOT be set in local mode');
process.exit(1);
}
console.log('✅ Local mode verified\n');
}
async function getDispensaryInfo(dispensaryId: number) {
const result = await query(`
SELECT d.id, d.name, d.city, d.menu_type, d.platform_dispensary_id, d.menu_url,
p.profile_key, p.status as profile_status, p.config
FROM dispensaries d
LEFT JOIN dispensary_crawler_profiles p ON p.dispensary_id = d.id
WHERE d.id = $1
`, [dispensaryId]);
return result.rows[0];
}
async function getLatestTrace(dispensaryId: number) {
const result = await query(`
SELECT *
FROM crawl_orchestration_traces
WHERE dispensary_id = $1
ORDER BY created_at DESC
LIMIT 1
`, [dispensaryId]);
return result.rows[0];
}
async function main() {
console.warn('\n⚠ LEGACY SCRIPT: This script should be replaced with CannaiQ API calls.\n');
const dispensaryId = parseInt(process.argv[2], 10);
if (!dispensaryId || isNaN(dispensaryId)) {
console.error('Usage: npx tsx src/scripts/sandbox-test.ts <dispensary_id>');
console.error('Example: npx tsx src/scripts/sandbox-test.ts 101');
process.exit(1);
}
// Verify local mode first
verifyLocalMode();
try {
// Get dispensary info
console.log(`=== DISPENSARY INFO (ID: ${dispensaryId}) ===`);
const dispensary = await getDispensaryInfo(dispensaryId);
if (!dispensary) {
console.error(`Dispensary ${dispensaryId} not found`);
process.exit(1);
}
console.log(`Name: ${dispensary.name}`);
console.log(`City: ${dispensary.city}`);
console.log(`Menu Type: ${dispensary.menu_type}`);
console.log(`Platform Dispensary ID: ${dispensary.platform_dispensary_id || 'NULL'}`);
console.log(`Menu URL: ${dispensary.menu_url || 'NULL'}`);
console.log(`Profile Key: ${dispensary.profile_key || 'NONE'}`);
console.log(`Profile Status: ${dispensary.profile_status || 'N/A'}`);
console.log(`Profile Config: ${JSON.stringify(dispensary.config, null, 2)}`);
console.log('');
// Run sandbox crawl
console.log('=== RUNNING SANDBOX CRAWL ===');
console.log(`Starting sandbox crawl for ${dispensary.name}...`);
const startTime = Date.now();
const result = await runDispensaryOrchestrator(dispensaryId);
const duration = Date.now() - startTime;
console.log('\n=== CRAWL RESULT ===');
console.log(`Status: ${result.status}`);
console.log(`Summary: ${result.summary}`);
console.log(`Run ID: ${result.runId}`);
console.log(`Duration: ${duration}ms`);
console.log(`Detection Ran: ${result.detectionRan}`);
console.log(`Crawl Ran: ${result.crawlRan}`);
console.log(`Crawl Type: ${result.crawlType || 'N/A'}`);
console.log(`Products Found: ${result.productsFound || 0}`);
console.log(`Products New: ${result.productsNew || 0}`);
console.log(`Products Updated: ${result.productsUpdated || 0}`);
if (result.error) {
console.log(`Error: ${result.error}`);
}
// Get the trace
console.log('\n=== ORCHESTRATOR TRACE ===');
const trace = await getLatestTrace(dispensaryId);
if (trace) {
console.log(`Trace ID: ${trace.id}`);
console.log(`Profile Key: ${trace.profile_key || 'N/A'}`);
console.log(`Mode: ${trace.mode}`);
console.log(`Status: ${trace.status}`);
console.log(`Started At: ${trace.started_at}`);
console.log(`Completed At: ${trace.completed_at || 'In Progress'}`);
if (trace.steps && Array.isArray(trace.steps)) {
console.log(`\nSteps (${trace.steps.length} total):`);
trace.steps.forEach((step: any, i: number) => {
const status = step.status === 'completed' ? '✅' : step.status === 'failed' ? '❌' : '⏳';
console.log(` ${i + 1}. ${status} ${step.action}: ${step.description}`);
if (step.output && Object.keys(step.output).length > 0) {
console.log(` Output: ${JSON.stringify(step.output)}`);
}
if (step.error) {
console.log(` Error: ${step.error}`);
}
});
}
if (trace.result) {
console.log(`\nResult: ${JSON.stringify(trace.result, null, 2)}`);
}
if (trace.error_message) {
console.log(`\nError Message: ${trace.error_message}`);
}
} else {
console.log('No trace found for this dispensary');
}
} catch (error: any) {
console.error('Error running sandbox test:', error.message);
console.error(error.stack);
process.exit(1);
} finally {
await closePool();
}
}
main();

View File

@@ -0,0 +1,88 @@
/**
* Sandbox Validation Script for Dispensary 101 (Trulieve Scottsdale)
*
* This script runs a sandbox crawl and captures the trace for observability.
* NO automatic promotion or state changes.
*/
import { Pool } from 'pg';
const pool = new Pool({ connectionString: process.env.DATABASE_URL });
async function main() {
console.log('=== SANDBOX VALIDATION: Dispensary 101 (Trulieve Scottsdale) ===');
console.log('');
// Get dispensary info
const dispResult = await pool.query(`
SELECT d.id, d.name, d.city, d.state, d.menu_type, d.platform_dispensary_id, d.menu_url,
dcp.id as profile_id, dcp.profile_key, dcp.status as profile_status, dcp.config
FROM dispensaries d
LEFT JOIN dispensary_crawler_profiles dcp ON dcp.dispensary_id = d.id
WHERE d.id = 101
`);
if (!dispResult.rows[0]) {
console.log('ERROR: Dispensary 101 not found');
await pool.end();
return;
}
const disp = dispResult.rows[0];
console.log('=== DISPENSARY INFO ===');
console.log('Name:', disp.name);
console.log('Location:', disp.city + ', ' + disp.state);
console.log('Menu Type:', disp.menu_type);
console.log('Platform ID:', disp.platform_dispensary_id);
console.log('Menu URL:', disp.menu_url);
console.log('');
console.log('=== PROFILE ===');
console.log('Profile ID:', disp.profile_id);
console.log('Profile Key:', disp.profile_key);
console.log('Profile Status:', disp.profile_status);
console.log('Config:', JSON.stringify(disp.config, null, 2));
console.log('');
// Get product count
const products = await pool.query('SELECT COUNT(*) FROM dutchie_products WHERE dispensary_id = 101');
console.log('Current product count:', products.rows[0].count);
console.log('');
// Check for traces (local DB uses state_at_start/state_at_end column names)
const traces = await pool.query(`
SELECT id, run_id, state_at_start, state_at_end,
products_found, success, error_message, created_at, trace
FROM crawl_orchestration_traces
WHERE dispensary_id = 101
ORDER BY created_at DESC
LIMIT 3
`);
console.log('=== RECENT TRACES ===');
if (traces.rows.length === 0) {
console.log('No traces found');
} else {
traces.rows.forEach((t: any, i: number) => {
console.log(`${i+1}. [id:${t.id}] ${t.state_at_start} -> ${t.state_at_end}`);
console.log(` Products: ${t.products_found} | Success: ${t.success}`);
if (t.error_message) console.log(` Error: ${t.error_message}`);
if (t.trace && Array.isArray(t.trace)) {
console.log(' Trace steps:');
t.trace.slice(0, 5).forEach((s: any, j: number) => {
console.log(` ${j+1}. [${s.status || s.type}] ${s.step_name || s.message || JSON.stringify(s).slice(0, 60)}`);
});
if (t.trace.length > 5) console.log(` ... and ${t.trace.length - 5} more steps`);
}
console.log('');
});
}
await pool.end();
console.log('=== DATABASE CHECK COMPLETE ===');
}
main().catch(e => {
console.error('Error:', e.message);
process.exit(1);
});

View File

@@ -1,6 +1,16 @@
/**
* Scrape ALL active products via direct GraphQL pagination
* This is more reliable than category navigation
* LEGACY SCRIPT - Scrape All Active Products
*
* DEPRECATED: This script creates its own database pool.
* Future implementations should use the CannaiQ API endpoints instead.
*
* Scrapes ALL active products via direct GraphQL pagination.
* This is more reliable than category navigation.
*
* DO NOT:
* - Add this to package.json scripts
* - Run this in automated jobs
* - Use DATABASE_URL directly
*/
import puppeteer from 'puppeteer-extra';
@@ -10,8 +20,11 @@ import { normalizeDutchieProduct, DutchieProduct } from '../scrapers/dutchie-gra
puppeteer.use(StealthPlugin());
const DATABASE_URL =
process.env.DATABASE_URL || 'postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus';
console.warn('\n⚠ LEGACY SCRIPT: This script should be replaced with CannaiQ API calls.\n');
// Single database connection (cannaiq in cannaiq-postgres container)
const DATABASE_URL = process.env.CANNAIQ_DB_URL ||
`postgresql://${process.env.CANNAIQ_DB_USER || 'dutchie'}:${process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass'}@${process.env.CANNAIQ_DB_HOST || 'localhost'}:${process.env.CANNAIQ_DB_PORT || '54320'}/${process.env.CANNAIQ_DB_NAME || 'cannaiq'}`;
const GRAPHQL_HASH = 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0';
async function scrapeAllProducts(menuUrl: string, storeId: number) {

View File

@@ -0,0 +1,42 @@
import pg from 'pg';
const pool = new pg.Pool({ connectionString: process.env.DATABASE_URL });
async function main() {
// Search broadly for flower power
const result = await pool.query(`
SELECT id, name, address, city, state, zip, menu_url, menu_type, platform_dispensary_id, website
FROM dispensaries
WHERE LOWER(name) LIKE $1 OR LOWER(name) LIKE $2 OR LOWER(address) LIKE $3
ORDER BY name
`, ['%flower%', '%az %', '%union hills%']);
console.log('=== SEARCHING FOR FLOWER/AZ/UNION HILLS ===');
result.rows.forEach((r: any) => console.log(JSON.stringify(r)));
// Also search for any existing Nirvana dispensaries
const nirvana = await pool.query(`
SELECT id, name, address, city, state, zip, menu_url, menu_type, platform_dispensary_id, website
FROM dispensaries
WHERE LOWER(name) LIKE $1
ORDER BY name
`, ['%nirvana%']);
console.log('');
console.log('=== EXISTING NIRVANA DISPENSARIES ===');
nirvana.rows.forEach((r: any) => console.log(JSON.stringify(r)));
// Get all AZ dispensaries for comparison
const allAZ = await pool.query(`
SELECT id, name, address, city, state, zip
FROM dispensaries
WHERE state = 'AZ'
ORDER BY name
`);
console.log('');
console.log('=== ALL AZ DISPENSARIES (' + allAZ.rows.length + ' total) ===');
allAZ.rows.forEach((r: any) => console.log(JSON.stringify({id: r.id, name: r.name, address: r.address, city: r.city})));
await pool.end();
}
main().catch(e => { console.error(e.message); process.exit(1); });

View File

@@ -0,0 +1,307 @@
#!/usr/bin/env npx tsx
/**
* Seed Dutchie Discovery Cities - Bulk
*
* Seeds dutchie_discovery_cities with a static list of major US metros.
* Uses UPSERT to avoid duplicates on re-runs.
*
* Usage:
* npm run seed:dt:cities:bulk
* DATABASE_URL="..." npx tsx src/scripts/seed-dt-cities-bulk.ts
*/
import { Pool } from 'pg';
const DB_URL = process.env.DATABASE_URL || process.env.CANNAIQ_DB_URL ||
'postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus';
// ============================================================================
// Static list of major US metros
// Format: { city_slug, city_name, state_code, country_code }
// ============================================================================
interface CityEntry {
city_slug: string;
city_name: string;
state_code: string;
country_code: string;
}
const CITIES: CityEntry[] = [
// Arizona (priority state)
{ city_slug: 'az-phoenix', city_name: 'Phoenix', state_code: 'AZ', country_code: 'US' },
{ city_slug: 'az-tucson', city_name: 'Tucson', state_code: 'AZ', country_code: 'US' },
{ city_slug: 'az-mesa', city_name: 'Mesa', state_code: 'AZ', country_code: 'US' },
{ city_slug: 'az-scottsdale', city_name: 'Scottsdale', state_code: 'AZ', country_code: 'US' },
{ city_slug: 'az-tempe', city_name: 'Tempe', state_code: 'AZ', country_code: 'US' },
{ city_slug: 'az-chandler', city_name: 'Chandler', state_code: 'AZ', country_code: 'US' },
{ city_slug: 'az-glendale', city_name: 'Glendale', state_code: 'AZ', country_code: 'US' },
{ city_slug: 'az-peoria', city_name: 'Peoria', state_code: 'AZ', country_code: 'US' },
{ city_slug: 'az-flagstaff', city_name: 'Flagstaff', state_code: 'AZ', country_code: 'US' },
{ city_slug: 'az-sedona', city_name: 'Sedona', state_code: 'AZ', country_code: 'US' },
// California
{ city_slug: 'ca-los-angeles', city_name: 'Los Angeles', state_code: 'CA', country_code: 'US' },
{ city_slug: 'ca-san-francisco', city_name: 'San Francisco', state_code: 'CA', country_code: 'US' },
{ city_slug: 'ca-san-diego', city_name: 'San Diego', state_code: 'CA', country_code: 'US' },
{ city_slug: 'ca-san-jose', city_name: 'San Jose', state_code: 'CA', country_code: 'US' },
{ city_slug: 'ca-oakland', city_name: 'Oakland', state_code: 'CA', country_code: 'US' },
{ city_slug: 'ca-sacramento', city_name: 'Sacramento', state_code: 'CA', country_code: 'US' },
{ city_slug: 'ca-fresno', city_name: 'Fresno', state_code: 'CA', country_code: 'US' },
{ city_slug: 'ca-long-beach', city_name: 'Long Beach', state_code: 'CA', country_code: 'US' },
{ city_slug: 'ca-bakersfield', city_name: 'Bakersfield', state_code: 'CA', country_code: 'US' },
{ city_slug: 'ca-anaheim', city_name: 'Anaheim', state_code: 'CA', country_code: 'US' },
{ city_slug: 'ca-santa-ana', city_name: 'Santa Ana', state_code: 'CA', country_code: 'US' },
{ city_slug: 'ca-riverside', city_name: 'Riverside', state_code: 'CA', country_code: 'US' },
{ city_slug: 'ca-stockton', city_name: 'Stockton', state_code: 'CA', country_code: 'US' },
{ city_slug: 'ca-irvine', city_name: 'Irvine', state_code: 'CA', country_code: 'US' },
{ city_slug: 'ca-santa-barbara', city_name: 'Santa Barbara', state_code: 'CA', country_code: 'US' },
// Colorado
{ city_slug: 'co-denver', city_name: 'Denver', state_code: 'CO', country_code: 'US' },
{ city_slug: 'co-colorado-springs', city_name: 'Colorado Springs', state_code: 'CO', country_code: 'US' },
{ city_slug: 'co-aurora', city_name: 'Aurora', state_code: 'CO', country_code: 'US' },
{ city_slug: 'co-boulder', city_name: 'Boulder', state_code: 'CO', country_code: 'US' },
{ city_slug: 'co-fort-collins', city_name: 'Fort Collins', state_code: 'CO', country_code: 'US' },
{ city_slug: 'co-pueblo', city_name: 'Pueblo', state_code: 'CO', country_code: 'US' },
// Florida
{ city_slug: 'fl-miami', city_name: 'Miami', state_code: 'FL', country_code: 'US' },
{ city_slug: 'fl-orlando', city_name: 'Orlando', state_code: 'FL', country_code: 'US' },
{ city_slug: 'fl-tampa', city_name: 'Tampa', state_code: 'FL', country_code: 'US' },
{ city_slug: 'fl-jacksonville', city_name: 'Jacksonville', state_code: 'FL', country_code: 'US' },
{ city_slug: 'fl-fort-lauderdale', city_name: 'Fort Lauderdale', state_code: 'FL', country_code: 'US' },
{ city_slug: 'fl-west-palm-beach', city_name: 'West Palm Beach', state_code: 'FL', country_code: 'US' },
{ city_slug: 'fl-st-petersburg', city_name: 'St. Petersburg', state_code: 'FL', country_code: 'US' },
// Illinois
{ city_slug: 'il-chicago', city_name: 'Chicago', state_code: 'IL', country_code: 'US' },
{ city_slug: 'il-springfield', city_name: 'Springfield', state_code: 'IL', country_code: 'US' },
{ city_slug: 'il-peoria', city_name: 'Peoria', state_code: 'IL', country_code: 'US' },
{ city_slug: 'il-rockford', city_name: 'Rockford', state_code: 'IL', country_code: 'US' },
// Massachusetts
{ city_slug: 'ma-boston', city_name: 'Boston', state_code: 'MA', country_code: 'US' },
{ city_slug: 'ma-worcester', city_name: 'Worcester', state_code: 'MA', country_code: 'US' },
{ city_slug: 'ma-springfield', city_name: 'Springfield', state_code: 'MA', country_code: 'US' },
{ city_slug: 'ma-cambridge', city_name: 'Cambridge', state_code: 'MA', country_code: 'US' },
// Michigan
{ city_slug: 'mi-detroit', city_name: 'Detroit', state_code: 'MI', country_code: 'US' },
{ city_slug: 'mi-grand-rapids', city_name: 'Grand Rapids', state_code: 'MI', country_code: 'US' },
{ city_slug: 'mi-ann-arbor', city_name: 'Ann Arbor', state_code: 'MI', country_code: 'US' },
{ city_slug: 'mi-lansing', city_name: 'Lansing', state_code: 'MI', country_code: 'US' },
{ city_slug: 'mi-flint', city_name: 'Flint', state_code: 'MI', country_code: 'US' },
// Nevada
{ city_slug: 'nv-las-vegas', city_name: 'Las Vegas', state_code: 'NV', country_code: 'US' },
{ city_slug: 'nv-reno', city_name: 'Reno', state_code: 'NV', country_code: 'US' },
{ city_slug: 'nv-henderson', city_name: 'Henderson', state_code: 'NV', country_code: 'US' },
{ city_slug: 'nv-north-las-vegas', city_name: 'North Las Vegas', state_code: 'NV', country_code: 'US' },
// New Jersey
{ city_slug: 'nj-newark', city_name: 'Newark', state_code: 'NJ', country_code: 'US' },
{ city_slug: 'nj-jersey-city', city_name: 'Jersey City', state_code: 'NJ', country_code: 'US' },
{ city_slug: 'nj-paterson', city_name: 'Paterson', state_code: 'NJ', country_code: 'US' },
{ city_slug: 'nj-trenton', city_name: 'Trenton', state_code: 'NJ', country_code: 'US' },
// New Mexico
{ city_slug: 'nm-albuquerque', city_name: 'Albuquerque', state_code: 'NM', country_code: 'US' },
{ city_slug: 'nm-santa-fe', city_name: 'Santa Fe', state_code: 'NM', country_code: 'US' },
{ city_slug: 'nm-las-cruces', city_name: 'Las Cruces', state_code: 'NM', country_code: 'US' },
// New York
{ city_slug: 'ny-new-york', city_name: 'New York', state_code: 'NY', country_code: 'US' },
{ city_slug: 'ny-buffalo', city_name: 'Buffalo', state_code: 'NY', country_code: 'US' },
{ city_slug: 'ny-rochester', city_name: 'Rochester', state_code: 'NY', country_code: 'US' },
{ city_slug: 'ny-albany', city_name: 'Albany', state_code: 'NY', country_code: 'US' },
{ city_slug: 'ny-syracuse', city_name: 'Syracuse', state_code: 'NY', country_code: 'US' },
// Ohio
{ city_slug: 'oh-columbus', city_name: 'Columbus', state_code: 'OH', country_code: 'US' },
{ city_slug: 'oh-cleveland', city_name: 'Cleveland', state_code: 'OH', country_code: 'US' },
{ city_slug: 'oh-cincinnati', city_name: 'Cincinnati', state_code: 'OH', country_code: 'US' },
{ city_slug: 'oh-toledo', city_name: 'Toledo', state_code: 'OH', country_code: 'US' },
{ city_slug: 'oh-akron', city_name: 'Akron', state_code: 'OH', country_code: 'US' },
// Oklahoma
{ city_slug: 'ok-oklahoma-city', city_name: 'Oklahoma City', state_code: 'OK', country_code: 'US' },
{ city_slug: 'ok-tulsa', city_name: 'Tulsa', state_code: 'OK', country_code: 'US' },
{ city_slug: 'ok-norman', city_name: 'Norman', state_code: 'OK', country_code: 'US' },
// Oregon
{ city_slug: 'or-portland', city_name: 'Portland', state_code: 'OR', country_code: 'US' },
{ city_slug: 'or-eugene', city_name: 'Eugene', state_code: 'OR', country_code: 'US' },
{ city_slug: 'or-salem', city_name: 'Salem', state_code: 'OR', country_code: 'US' },
{ city_slug: 'or-bend', city_name: 'Bend', state_code: 'OR', country_code: 'US' },
{ city_slug: 'or-medford', city_name: 'Medford', state_code: 'OR', country_code: 'US' },
// Pennsylvania
{ city_slug: 'pa-philadelphia', city_name: 'Philadelphia', state_code: 'PA', country_code: 'US' },
{ city_slug: 'pa-pittsburgh', city_name: 'Pittsburgh', state_code: 'PA', country_code: 'US' },
{ city_slug: 'pa-allentown', city_name: 'Allentown', state_code: 'PA', country_code: 'US' },
// Texas (limited cannabis, but for completeness)
{ city_slug: 'tx-houston', city_name: 'Houston', state_code: 'TX', country_code: 'US' },
{ city_slug: 'tx-san-antonio', city_name: 'San Antonio', state_code: 'TX', country_code: 'US' },
{ city_slug: 'tx-dallas', city_name: 'Dallas', state_code: 'TX', country_code: 'US' },
{ city_slug: 'tx-austin', city_name: 'Austin', state_code: 'TX', country_code: 'US' },
{ city_slug: 'tx-fort-worth', city_name: 'Fort Worth', state_code: 'TX', country_code: 'US' },
{ city_slug: 'tx-el-paso', city_name: 'El Paso', state_code: 'TX', country_code: 'US' },
// Virginia
{ city_slug: 'va-virginia-beach', city_name: 'Virginia Beach', state_code: 'VA', country_code: 'US' },
{ city_slug: 'va-norfolk', city_name: 'Norfolk', state_code: 'VA', country_code: 'US' },
{ city_slug: 'va-richmond', city_name: 'Richmond', state_code: 'VA', country_code: 'US' },
{ city_slug: 'va-arlington', city_name: 'Arlington', state_code: 'VA', country_code: 'US' },
// Washington
{ city_slug: 'wa-seattle', city_name: 'Seattle', state_code: 'WA', country_code: 'US' },
{ city_slug: 'wa-spokane', city_name: 'Spokane', state_code: 'WA', country_code: 'US' },
{ city_slug: 'wa-tacoma', city_name: 'Tacoma', state_code: 'WA', country_code: 'US' },
{ city_slug: 'wa-vancouver', city_name: 'Vancouver', state_code: 'WA', country_code: 'US' },
{ city_slug: 'wa-bellevue', city_name: 'Bellevue', state_code: 'WA', country_code: 'US' },
// Washington DC
{ city_slug: 'dc-washington', city_name: 'Washington', state_code: 'DC', country_code: 'US' },
// Maryland
{ city_slug: 'md-baltimore', city_name: 'Baltimore', state_code: 'MD', country_code: 'US' },
{ city_slug: 'md-rockville', city_name: 'Rockville', state_code: 'MD', country_code: 'US' },
{ city_slug: 'md-silver-spring', city_name: 'Silver Spring', state_code: 'MD', country_code: 'US' },
// Connecticut
{ city_slug: 'ct-hartford', city_name: 'Hartford', state_code: 'CT', country_code: 'US' },
{ city_slug: 'ct-new-haven', city_name: 'New Haven', state_code: 'CT', country_code: 'US' },
{ city_slug: 'ct-stamford', city_name: 'Stamford', state_code: 'CT', country_code: 'US' },
// Maine
{ city_slug: 'me-portland', city_name: 'Portland', state_code: 'ME', country_code: 'US' },
{ city_slug: 'me-bangor', city_name: 'Bangor', state_code: 'ME', country_code: 'US' },
// Missouri
{ city_slug: 'mo-kansas-city', city_name: 'Kansas City', state_code: 'MO', country_code: 'US' },
{ city_slug: 'mo-st-louis', city_name: 'St. Louis', state_code: 'MO', country_code: 'US' },
{ city_slug: 'mo-springfield', city_name: 'Springfield', state_code: 'MO', country_code: 'US' },
// Minnesota
{ city_slug: 'mn-minneapolis', city_name: 'Minneapolis', state_code: 'MN', country_code: 'US' },
{ city_slug: 'mn-st-paul', city_name: 'St. Paul', state_code: 'MN', country_code: 'US' },
{ city_slug: 'mn-duluth', city_name: 'Duluth', state_code: 'MN', country_code: 'US' },
// Alaska
{ city_slug: 'ak-anchorage', city_name: 'Anchorage', state_code: 'AK', country_code: 'US' },
{ city_slug: 'ak-fairbanks', city_name: 'Fairbanks', state_code: 'AK', country_code: 'US' },
{ city_slug: 'ak-juneau', city_name: 'Juneau', state_code: 'AK', country_code: 'US' },
// Hawaii
{ city_slug: 'hi-honolulu', city_name: 'Honolulu', state_code: 'HI', country_code: 'US' },
{ city_slug: 'hi-maui', city_name: 'Maui', state_code: 'HI', country_code: 'US' },
// Vermont
{ city_slug: 'vt-burlington', city_name: 'Burlington', state_code: 'VT', country_code: 'US' },
// Rhode Island
{ city_slug: 'ri-providence', city_name: 'Providence', state_code: 'RI', country_code: 'US' },
// Delaware
{ city_slug: 'de-wilmington', city_name: 'Wilmington', state_code: 'DE', country_code: 'US' },
// Montana
{ city_slug: 'mt-billings', city_name: 'Billings', state_code: 'MT', country_code: 'US' },
{ city_slug: 'mt-missoula', city_name: 'Missoula', state_code: 'MT', country_code: 'US' },
];
// ============================================================================
// Main
// ============================================================================
async function main() {
console.log('=========================================================');
console.log(' Seed Dutchie Discovery Cities - Bulk');
console.log('=========================================================');
console.log(`\nDatabase: ${DB_URL.replace(/:[^:@]+@/, ':****@')}`);
console.log(`Cities to seed: ${CITIES.length}`);
const pool = new Pool({ connectionString: DB_URL });
try {
// Test connection
const { rows } = await pool.query('SELECT NOW() as time');
console.log(`Connected at: ${rows[0].time}\n`);
let inserted = 0;
let updated = 0;
let errors = 0;
for (const city of CITIES) {
try {
const result = await pool.query(`
INSERT INTO dutchie_discovery_cities (
platform,
city_slug,
city_name,
state_code,
country_code,
crawl_enabled,
created_at,
updated_at
) VALUES (
'dutchie',
$1,
$2,
$3,
$4,
TRUE,
NOW(),
NOW()
)
ON CONFLICT (platform, country_code, state_code, city_slug)
DO UPDATE SET
city_name = EXCLUDED.city_name,
crawl_enabled = TRUE,
updated_at = NOW()
RETURNING (xmax = 0) AS inserted
`, [city.city_slug, city.city_name, city.state_code, city.country_code]);
if (result.rows[0].inserted) {
inserted++;
} else {
updated++;
}
} catch (err: any) {
console.error(` Error seeding ${city.city_slug}: ${err.message}`);
errors++;
}
}
// Get total count
const { rows: countRows } = await pool.query(`
SELECT COUNT(*) as total FROM dutchie_discovery_cities WHERE platform = 'dutchie'
`);
console.log('=========================================================');
console.log(' SUMMARY');
console.log('=========================================================');
console.log(` Cities in static list: ${CITIES.length}`);
console.log(` Inserted: ${inserted}`);
console.log(` Updated: ${updated}`);
console.log(` Errors: ${errors}`);
console.log(` Total in DB: ${countRows[0].total}`);
if (errors > 0) {
console.log('\n Completed with errors');
process.exit(1);
}
console.log('\n Seed completed successfully');
process.exit(0);
} catch (error: any) {
console.error('\n Seed failed:', error.message);
process.exit(1);
} finally {
await pool.end();
}
}
main();

View File

@@ -0,0 +1,166 @@
#!/usr/bin/env npx tsx
/**
* Seed Dutchie City for Discovery
*
* Manually seeds a city into dutchie_discovery_cities for location discovery.
* Use this when /cities scraping is blocked (403) and you need to manually add cities.
*
* Usage:
* npm run seed:platforms:dt:city -- --city-slug=ny-hudson --city-name=Hudson --state-code=NY
* npm run seed:platforms:dt:city -- --city-slug=ma-boston --city-name=Boston --state-code=MA --country-code=US
*
* Options:
* --city-slug Required. URL slug for the city (e.g., "ny-hudson")
* --city-name Required. Display name (e.g., "Hudson")
* --state-code Required. State/province code (e.g., "NY", "CA", "ON")
* --country-code Optional. Country code (default: "US")
*
* After seeding, run location discovery:
* npm run discovery:platforms:dt:locations
*/
import { Pool } from 'pg';
const DB_URL = process.env.DATABASE_URL || process.env.CANNAIQ_DB_URL ||
'postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus';
interface Args {
citySlug?: string;
cityName?: string;
stateCode?: string;
countryCode: string;
}
function parseArgs(): Args {
const args: Args = { countryCode: 'US' };
for (const arg of process.argv.slice(2)) {
const citySlugMatch = arg.match(/--city-slug=(.+)/);
if (citySlugMatch) args.citySlug = citySlugMatch[1];
const cityNameMatch = arg.match(/--city-name=(.+)/);
if (cityNameMatch) args.cityName = cityNameMatch[1];
const stateCodeMatch = arg.match(/--state-code=(.+)/);
if (stateCodeMatch) args.stateCode = stateCodeMatch[1].toUpperCase();
const countryCodeMatch = arg.match(/--country-code=(.+)/);
if (countryCodeMatch) args.countryCode = countryCodeMatch[1].toUpperCase();
}
return args;
}
function printUsage() {
console.log(`
Usage:
npm run seed:platforms:dt:city -- --city-slug=<slug> --city-name=<name> --state-code=<state>
Required arguments:
--city-slug URL slug for the city (e.g., "ny-hudson", "ma-boston")
--city-name Display name (e.g., "Hudson", "Boston")
--state-code State/province code (e.g., "NY", "CA", "ON")
Optional arguments:
--country-code Country code (default: "US")
Examples:
npm run seed:platforms:dt:city -- --city-slug=ny-hudson --city-name=Hudson --state-code=NY
npm run seed:platforms:dt:city -- --city-slug=ca-los-angeles --city-name="Los Angeles" --state-code=CA
npm run seed:platforms:dt:city -- --city-slug=on-toronto --city-name=Toronto --state-code=ON --country-code=CA
`);
}
async function main() {
const args = parseArgs();
console.log('╔══════════════════════════════════════════════════╗');
console.log('║ Seed Dutchie City for Discovery ║');
console.log('╚══════════════════════════════════════════════════╝');
// Validate required args
if (!args.citySlug || !args.cityName || !args.stateCode) {
console.error('\n❌ Error: Missing required arguments\n');
printUsage();
process.exit(1);
}
console.log(`\nCity Slug: ${args.citySlug}`);
console.log(`City Name: ${args.cityName}`);
console.log(`State Code: ${args.stateCode}`);
console.log(`Country Code: ${args.countryCode}`);
console.log(`Database: ${DB_URL.replace(/:[^:@]+@/, ':****@')}`);
const pool = new Pool({ connectionString: DB_URL });
try {
// Test DB connection
const { rows: connTest } = await pool.query('SELECT NOW() as time');
console.log(`\nConnected at: ${connTest[0].time}`);
// Upsert the city
const { rows, rowCount } = await pool.query(`
INSERT INTO dutchie_discovery_cities (
platform,
city_slug,
city_name,
state_code,
country_code,
crawl_enabled,
created_at,
updated_at
) VALUES (
'dutchie',
$1,
$2,
$3,
$4,
TRUE,
NOW(),
NOW()
)
ON CONFLICT (platform, country_code, state_code, city_slug)
DO UPDATE SET
city_name = EXCLUDED.city_name,
crawl_enabled = TRUE,
updated_at = NOW()
RETURNING id, city_slug, city_name, state_code, country_code, crawl_enabled,
(xmax = 0) AS was_inserted
`, [args.citySlug, args.cityName, args.stateCode, args.countryCode]);
if (rows.length > 0) {
const row = rows[0];
const action = row.was_inserted ? 'INSERTED' : 'UPDATED';
console.log(`\n✅ City ${action}:`);
console.log(` ID: ${row.id}`);
console.log(` City Slug: ${row.city_slug}`);
console.log(` City Name: ${row.city_name}`);
console.log(` State Code: ${row.state_code}`);
console.log(` Country Code: ${row.country_code}`);
console.log(` Crawl Enabled: ${row.crawl_enabled}`);
}
// Show current city count
const { rows: countRows } = await pool.query(`
SELECT
COUNT(*) as total,
COUNT(*) FILTER (WHERE crawl_enabled = TRUE) as enabled
FROM dutchie_discovery_cities
WHERE platform = 'dutchie'
`);
console.log(`\nTotal Dutchie cities: ${countRows[0].total} (${countRows[0].enabled} enabled)`);
console.log('\n📍 Next step: Run location discovery');
console.log(' npm run discovery:platforms:dt:locations');
process.exit(0);
} catch (error: any) {
console.error('\n❌ Failed to seed city:', error.message);
process.exit(1);
} finally {
await pool.end();
}
}
main();

View File

@@ -0,0 +1,325 @@
/**
* System Smoke Test
*
* Validates core CannaiQ system components:
* - Database connectivity
* - Required tables and row counts
* - Discovery data (via direct DB query)
* - Analytics V2 services (via direct service calls)
* - Orchestrator route (via HTTP)
*
* Usage: npm run system:smoke-test
* Exit codes: 0 = success, 1 = failure
*/
import { Pool } from 'pg';
import axios from 'axios';
// Configuration
const API_BASE = process.env.API_BASE_URL || 'http://localhost:3010';
const DB_URL = process.env.DATABASE_URL || process.env.CANNAIQ_DB_URL ||
'postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus';
// Test results tracking
interface TestResult {
name: string;
passed: boolean;
message: string;
details?: any;
}
const results: TestResult[] = [];
let hasFailure = false;
function pass(name: string, message: string, details?: any) {
results.push({ name, passed: true, message, details });
console.log(` ✓ PASS: ${name} - ${message}`);
}
function fail(name: string, message: string, details?: any) {
results.push({ name, passed: false, message, details });
console.log(` ✗ FAIL: ${name} - ${message}`);
hasFailure = true;
}
// ============================================================
// DATABASE TESTS
// ============================================================
async function testDatabaseConnection(pool: Pool): Promise<boolean> {
console.log('\n[1/4] DATABASE CONNECTION');
console.log('─'.repeat(50));
try {
const result = await pool.query('SELECT NOW() as time, current_database() as db');
const { time, db } = result.rows[0];
pass('DB Connection', `Connected to ${db} at ${time}`);
return true;
} catch (error: any) {
fail('DB Connection', `Failed: ${error.message}`);
return false;
}
}
async function testRequiredTables(pool: Pool): Promise<void> {
console.log('\n[2/4] REQUIRED TABLES');
console.log('─'.repeat(50));
const tables = [
'states',
'dispensaries',
'store_products',
'store_product_snapshots',
'crawl_runs',
'dutchie_discovery_cities',
'dutchie_discovery_locations',
];
for (const table of tables) {
try {
const result = await pool.query(`SELECT COUNT(*) as count FROM ${table}`);
const count = parseInt(result.rows[0].count, 10);
pass(`Table: ${table}`, `${count.toLocaleString()} rows`);
} catch (error: any) {
if (error.code === '42P01') {
fail(`Table: ${table}`, 'Table does not exist');
} else {
fail(`Table: ${table}`, `Query failed: ${error.message}`);
}
}
}
}
// ============================================================
// DISCOVERY DATA TESTS (Direct DB)
// ============================================================
async function testDiscoveryData(pool: Pool): Promise<void> {
console.log('\n[3/4] DISCOVERY DATA (Direct DB Query)');
console.log('─'.repeat(50));
// Test discovery summary via direct query
try {
const { rows: statusRows } = await pool.query(`
SELECT status, COUNT(*) as cnt
FROM dutchie_discovery_locations
WHERE platform = 'dutchie' AND active = TRUE
GROUP BY status
`);
const statusCounts: Record<string, number> = {};
let totalLocations = 0;
for (const row of statusRows) {
statusCounts[row.status] = parseInt(row.cnt, 10);
totalLocations += parseInt(row.cnt, 10);
}
pass('Discovery Summary', `${totalLocations} total locations`, {
discovered: statusCounts['discovered'] || 0,
verified: statusCounts['verified'] || 0,
merged: statusCounts['merged'] || 0,
rejected: statusCounts['rejected'] || 0,
});
} catch (error: any) {
if (error.code === '42P01') {
fail('Discovery Summary', 'Table dutchie_discovery_locations does not exist');
} else {
fail('Discovery Summary', `Query failed: ${error.message}`);
}
}
// Test discovery locations query
try {
const { rows } = await pool.query(`
SELECT id, name, state_code, status
FROM dutchie_discovery_locations
WHERE platform = 'dutchie' AND active = TRUE
ORDER BY id DESC
LIMIT 1
`);
if (rows.length > 0) {
pass('Discovery Locations', `Found location: ${rows[0].name} (${rows[0].state_code})`);
} else {
pass('Discovery Locations', 'Query succeeded, 0 locations found');
}
} catch (error: any) {
if (error.code === '42P01') {
fail('Discovery Locations', 'Table dutchie_discovery_locations does not exist');
} else {
fail('Discovery Locations', `Query failed: ${error.message}`);
}
}
}
// ============================================================
// ANALYTICS V2 SERVICE TESTS (Direct Service Calls)
// ============================================================
async function testAnalyticsV2Services(pool: Pool): Promise<void> {
console.log('\n[4/4] ANALYTICS V2 (Direct Service Calls)');
console.log('─'.repeat(50));
// Test: State Legal Breakdown
try {
// Recreational states
const { rows: recRows } = await pool.query(`
SELECT code FROM states
WHERE recreational_legal = TRUE
ORDER BY code
`);
// Medical-only states
const { rows: medRows } = await pool.query(`
SELECT code FROM states
WHERE medical_legal = TRUE
AND (recreational_legal = FALSE OR recreational_legal IS NULL)
ORDER BY code
`);
// No-program states
const { rows: noProgramRows } = await pool.query(`
SELECT code FROM states
WHERE (recreational_legal = FALSE OR recreational_legal IS NULL)
AND (medical_legal = FALSE OR medical_legal IS NULL)
ORDER BY code
`);
const breakdown = {
recreational: recRows.length,
medical_only: medRows.length,
no_program: noProgramRows.length,
};
pass('State Legal Breakdown', `rec=${breakdown.recreational}, med=${breakdown.medical_only}, none=${breakdown.no_program}`);
} catch (error: any) {
fail('State Legal Breakdown', `Query failed: ${error.message}`);
}
// Test: Recreational States
try {
const { rows } = await pool.query(`
SELECT code FROM states
WHERE recreational_legal = TRUE
ORDER BY code
`);
const states = rows.map((r: any) => r.code);
pass('Recreational States', `${states.length} states: ${states.slice(0, 5).join(', ')}${states.length > 5 ? '...' : ''}`);
} catch (error: any) {
fail('Recreational States', `Query failed: ${error.message}`);
}
// Test: Medical-Only States
try {
const { rows } = await pool.query(`
SELECT code FROM states
WHERE medical_legal = TRUE
AND (recreational_legal = FALSE OR recreational_legal IS NULL)
ORDER BY code
`);
const states = rows.map((r: any) => r.code);
pass('Medical-Only States', `${states.length} states: ${states.slice(0, 5).join(', ')}${states.length > 5 ? '...' : ''}`);
} catch (error: any) {
fail('Medical-Only States', `Query failed: ${error.message}`);
}
// Test orchestrator route via HTTP (dry run)
console.log('\n[4b/4] ORCHESTRATOR ROUTE (HTTP)');
console.log('─'.repeat(50));
try {
const response = await axios.post(
`${API_BASE}/api/orchestrator/platforms/dt/promote/0`,
{},
{ timeout: 10000 }
);
// ID 0 should fail gracefully
if (response.status === 400 || response.status === 404) {
pass('Orchestrator Promote (dry)', `Route exists, returned ${response.status} for invalid ID`);
} else if (response.status === 200 && response.data.success === false) {
pass('Orchestrator Promote (dry)', 'Route exists, gracefully rejected ID 0');
} else {
pass('Orchestrator Promote (dry)', `Route exists, status ${response.status}`);
}
} catch (error: any) {
if (error.response?.status === 400 || error.response?.status === 404) {
pass('Orchestrator Promote (dry)', `Route exists, returned ${error.response.status} for invalid ID`);
} else {
const msg = error.response?.status
? `HTTP ${error.response.status}: ${error.response.data?.error || error.message}`
: error.message;
fail('Orchestrator Promote (dry)', msg);
}
}
}
// ============================================================
// MAIN
// ============================================================
async function main() {
console.log('╔══════════════════════════════════════════════════╗');
console.log('║ CannaiQ System Smoke Test ║');
console.log('╚══════════════════════════════════════════════════╝');
console.log(`\nAPI Base: ${API_BASE}`);
console.log(`Database: ${DB_URL.replace(/:[^:@]+@/, ':****@')}`);
const pool = new Pool({ connectionString: DB_URL });
try {
// 1. Database connection
const dbConnected = await testDatabaseConnection(pool);
// 2. Required tables (only if DB connected)
if (dbConnected) {
await testRequiredTables(pool);
} else {
console.log('\n[2/4] REQUIRED TABLES - SKIPPED (no DB connection)');
}
// 3. Discovery data (direct DB - only if DB connected)
if (dbConnected) {
await testDiscoveryData(pool);
} else {
console.log('\n[3/4] DISCOVERY DATA - SKIPPED (no DB connection)');
}
// 4. Analytics V2 services (direct DB + orchestrator HTTP)
if (dbConnected) {
await testAnalyticsV2Services(pool);
} else {
console.log('\n[4/4] ANALYTICS V2 - SKIPPED (no DB connection)');
}
} finally {
await pool.end();
}
// Summary
console.log('\n' + '═'.repeat(50));
console.log('SUMMARY');
console.log('═'.repeat(50));
const passed = results.filter(r => r.passed).length;
const failed = results.filter(r => !r.passed).length;
const total = results.length;
console.log(`\nTotal: ${total} | Passed: ${passed} | Failed: ${failed}`);
if (hasFailure) {
console.log('\n❌ SMOKE TEST FAILED\n');
console.log('Failed tests:');
results.filter(r => !r.passed).forEach(r => {
console.log(` - ${r.name}: ${r.message}`);
});
process.exit(1);
} else {
console.log('\n✅ SMOKE TEST PASSED\n');
process.exit(0);
}
}
main().catch((error) => {
console.error('\n❌ SMOKE TEST CRASHED:', error.message);
process.exit(1);
});