feat: Add v2 architecture with multi-state support and orchestrator services
Major additions: - Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare - Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator - Discovery system: dutchie discovery service, geo validation, city seeding scripts - Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages - Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram) - Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata Frontend pages added: - Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores - StateHeatmap, CrossStateCompare, SyncInfoPanel Components added: - StateSelector, OrchestratorTraceModal, WorkflowStepper 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
1038
backend/src/scripts/backfill-legacy-to-canonical.ts
Normal file
1038
backend/src/scripts/backfill-legacy-to-canonical.ts
Normal file
File diff suppressed because it is too large
Load Diff
@@ -11,7 +11,7 @@
|
||||
* npx tsx src/scripts/backfill-store-dispensary.ts --verbose # Show all match details
|
||||
*/
|
||||
|
||||
import { pool } from '../db/migrate';
|
||||
import { pool } from '../db/pool';
|
||||
import { logger } from '../services/logger';
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
* npx tsx src/scripts/bootstrap-discovery.ts --status # Show current status only
|
||||
*/
|
||||
|
||||
import { pool } from '../db/migrate';
|
||||
import { pool } from '../db/pool';
|
||||
import {
|
||||
ensureAllDispensariesHaveSchedules,
|
||||
runDispensaryOrchestrator,
|
||||
|
||||
101
backend/src/scripts/bootstrap-local-admin.ts
Normal file
101
backend/src/scripts/bootstrap-local-admin.ts
Normal file
@@ -0,0 +1,101 @@
|
||||
/**
|
||||
* LOCAL-ONLY Admin Bootstrap Script
|
||||
*
|
||||
* Creates or resets a local admin user for development.
|
||||
* This script is ONLY for local development - never use in production.
|
||||
*
|
||||
* Usage:
|
||||
* cd backend
|
||||
* npx tsx src/scripts/bootstrap-local-admin.ts
|
||||
*
|
||||
* Default credentials:
|
||||
* Email: admin@local.test
|
||||
* Password: admin123
|
||||
*/
|
||||
|
||||
import bcrypt from 'bcrypt';
|
||||
import { query, closePool } from '../dutchie-az/db/connection';
|
||||
|
||||
// Local admin credentials - deterministic for dev
|
||||
const LOCAL_ADMIN_EMAIL = 'admin@local.test';
|
||||
const LOCAL_ADMIN_PASSWORD = 'admin123';
|
||||
const LOCAL_ADMIN_ROLE = 'admin'; // Match existing schema (admin, not superadmin)
|
||||
|
||||
async function bootstrapLocalAdmin(): Promise<void> {
|
||||
console.log('='.repeat(60));
|
||||
console.log('LOCAL ADMIN BOOTSTRAP');
|
||||
console.log('='.repeat(60));
|
||||
console.log('');
|
||||
console.log('This script creates/resets a local admin user for development.');
|
||||
console.log('');
|
||||
|
||||
try {
|
||||
// Hash the password with bcrypt (10 rounds, matching existing code)
|
||||
const passwordHash = await bcrypt.hash(LOCAL_ADMIN_PASSWORD, 10);
|
||||
|
||||
// Check if user exists
|
||||
const existing = await query<{ id: number; email: string }>(
|
||||
'SELECT id, email FROM users WHERE email = $1',
|
||||
[LOCAL_ADMIN_EMAIL]
|
||||
);
|
||||
|
||||
if (existing.rows.length > 0) {
|
||||
// User exists - update password and role
|
||||
console.log(`User "${LOCAL_ADMIN_EMAIL}" already exists (id=${existing.rows[0].id})`);
|
||||
console.log('Resetting password and ensuring admin role...');
|
||||
|
||||
await query(
|
||||
`UPDATE users
|
||||
SET password_hash = $1,
|
||||
role = $2,
|
||||
updated_at = NOW()
|
||||
WHERE email = $3`,
|
||||
[passwordHash, LOCAL_ADMIN_ROLE, LOCAL_ADMIN_EMAIL]
|
||||
);
|
||||
|
||||
console.log('User updated successfully.');
|
||||
} else {
|
||||
// User doesn't exist - create new
|
||||
console.log(`Creating new admin user: ${LOCAL_ADMIN_EMAIL}`);
|
||||
|
||||
const result = await query<{ id: number }>(
|
||||
`INSERT INTO users (email, password_hash, role, created_at, updated_at)
|
||||
VALUES ($1, $2, $3, NOW(), NOW())
|
||||
RETURNING id`,
|
||||
[LOCAL_ADMIN_EMAIL, passwordHash, LOCAL_ADMIN_ROLE]
|
||||
);
|
||||
|
||||
console.log(`User created successfully (id=${result.rows[0].id})`);
|
||||
}
|
||||
|
||||
console.log('');
|
||||
console.log('='.repeat(60));
|
||||
console.log('LOCAL ADMIN READY');
|
||||
console.log('='.repeat(60));
|
||||
console.log('');
|
||||
console.log('Login credentials:');
|
||||
console.log(` Email: ${LOCAL_ADMIN_EMAIL}`);
|
||||
console.log(` Password: ${LOCAL_ADMIN_PASSWORD}`);
|
||||
console.log('');
|
||||
console.log('Admin UI: http://localhost:8080/admin');
|
||||
console.log('');
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('');
|
||||
console.error('ERROR: Failed to bootstrap local admin');
|
||||
console.error(error.message);
|
||||
|
||||
if (error.message.includes('relation "users" does not exist')) {
|
||||
console.error('');
|
||||
console.error('The "users" table does not exist.');
|
||||
console.error('Run migrations first: npm run migrate');
|
||||
}
|
||||
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await closePool();
|
||||
}
|
||||
}
|
||||
|
||||
// Run the bootstrap
|
||||
bootstrapLocalAdmin();
|
||||
86
backend/src/scripts/discovery-dutchie-cities.ts
Normal file
86
backend/src/scripts/discovery-dutchie-cities.ts
Normal file
@@ -0,0 +1,86 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
/**
|
||||
* Dutchie City Discovery CLI Runner
|
||||
*
|
||||
* Discovers cities from Dutchie's /cities page and upserts to dutchie_discovery_cities.
|
||||
*
|
||||
* Usage:
|
||||
* npm run discovery:dutchie:cities
|
||||
* npx tsx src/scripts/discovery-dutchie-cities.ts
|
||||
*
|
||||
* Environment:
|
||||
* DATABASE_URL - PostgreSQL connection string (required)
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import { DutchieCityDiscovery } from '../dutchie-az/discovery/DutchieCityDiscovery';
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('DUTCHIE CITY DISCOVERY');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
// Get database URL from environment
|
||||
const connectionString = process.env.DATABASE_URL;
|
||||
if (!connectionString) {
|
||||
console.error('ERROR: DATABASE_URL environment variable is required');
|
||||
console.error('');
|
||||
console.error('Usage:');
|
||||
console.error(' DATABASE_URL="postgresql://..." npm run discovery:dutchie:cities');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Create pool
|
||||
const pool = new Pool({ connectionString });
|
||||
|
||||
try {
|
||||
// Test connection
|
||||
await pool.query('SELECT 1');
|
||||
console.log('[CLI] Database connection established');
|
||||
|
||||
// Run discovery
|
||||
const discovery = new DutchieCityDiscovery(pool);
|
||||
const result = await discovery.run();
|
||||
|
||||
// Print summary
|
||||
console.log('');
|
||||
console.log('='.repeat(60));
|
||||
console.log('DISCOVERY COMPLETE');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Cities found: ${result.citiesFound}`);
|
||||
console.log(`Cities inserted: ${result.citiesInserted}`);
|
||||
console.log(`Cities updated: ${result.citiesUpdated}`);
|
||||
console.log(`Errors: ${result.errors.length}`);
|
||||
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
|
||||
|
||||
if (result.errors.length > 0) {
|
||||
console.log('');
|
||||
console.log('Errors:');
|
||||
result.errors.forEach((e) => console.log(` - ${e}`));
|
||||
}
|
||||
|
||||
// Show stats
|
||||
console.log('');
|
||||
console.log('Current Statistics:');
|
||||
const stats = await discovery.getStats();
|
||||
console.log(` Total cities: ${stats.total}`);
|
||||
console.log(` Crawl enabled: ${stats.crawlEnabled}`);
|
||||
console.log(` Never crawled: ${stats.neverCrawled}`);
|
||||
console.log('');
|
||||
console.log('By Country:');
|
||||
stats.byCountry.forEach((c) => console.log(` ${c.countryCode}: ${c.count}`));
|
||||
console.log('');
|
||||
console.log('By State (top 10):');
|
||||
stats.byState.slice(0, 10).forEach((s) => console.log(` ${s.stateCode} (${s.countryCode}): ${s.count}`));
|
||||
|
||||
process.exit(result.errors.length > 0 ? 1 : 0);
|
||||
} catch (error: any) {
|
||||
console.error('FATAL ERROR:', error.message);
|
||||
console.error(error.stack);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
189
backend/src/scripts/discovery-dutchie-locations.ts
Normal file
189
backend/src/scripts/discovery-dutchie-locations.ts
Normal file
@@ -0,0 +1,189 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
/**
|
||||
* Dutchie Location Discovery CLI Runner
|
||||
*
|
||||
* Discovers store locations for cities and upserts to dutchie_discovery_locations.
|
||||
*
|
||||
* Usage:
|
||||
* npm run discovery:dutchie:locations -- --all-enabled
|
||||
* npm run discovery:dutchie:locations -- --city-slug=phoenix
|
||||
* npm run discovery:dutchie:locations -- --all-enabled --limit=10
|
||||
*
|
||||
* npx tsx src/scripts/discovery-dutchie-locations.ts --all-enabled
|
||||
* npx tsx src/scripts/discovery-dutchie-locations.ts --city-slug=phoenix
|
||||
*
|
||||
* Options:
|
||||
* --city-slug=<slug> Run for a single city by its slug
|
||||
* --all-enabled Run for all cities where crawl_enabled = TRUE
|
||||
* --limit=<n> Limit the number of cities to process
|
||||
* --delay=<ms> Delay between cities in ms (default: 2000)
|
||||
*
|
||||
* Environment:
|
||||
* DATABASE_URL - PostgreSQL connection string (required)
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import { DutchieLocationDiscovery } from '../dutchie-az/discovery/DutchieLocationDiscovery';
|
||||
|
||||
// Parse command line arguments
|
||||
function parseArgs(): {
|
||||
citySlug: string | null;
|
||||
allEnabled: boolean;
|
||||
limit: number | undefined;
|
||||
delay: number;
|
||||
} {
|
||||
const args = process.argv.slice(2);
|
||||
let citySlug: string | null = null;
|
||||
let allEnabled = false;
|
||||
let limit: number | undefined = undefined;
|
||||
let delay = 2000;
|
||||
|
||||
for (const arg of args) {
|
||||
if (arg.startsWith('--city-slug=')) {
|
||||
citySlug = arg.split('=')[1];
|
||||
} else if (arg === '--all-enabled') {
|
||||
allEnabled = true;
|
||||
} else if (arg.startsWith('--limit=')) {
|
||||
limit = parseInt(arg.split('=')[1], 10);
|
||||
} else if (arg.startsWith('--delay=')) {
|
||||
delay = parseInt(arg.split('=')[1], 10);
|
||||
}
|
||||
}
|
||||
|
||||
return { citySlug, allEnabled, limit, delay };
|
||||
}
|
||||
|
||||
function printUsage() {
|
||||
console.log(`
|
||||
Dutchie Location Discovery CLI
|
||||
|
||||
Usage:
|
||||
npx tsx src/scripts/discovery-dutchie-locations.ts [options]
|
||||
|
||||
Options:
|
||||
--city-slug=<slug> Run for a single city by its slug
|
||||
--all-enabled Run for all cities where crawl_enabled = TRUE
|
||||
--limit=<n> Limit the number of cities to process
|
||||
--delay=<ms> Delay between cities in ms (default: 2000)
|
||||
|
||||
Examples:
|
||||
npx tsx src/scripts/discovery-dutchie-locations.ts --all-enabled
|
||||
npx tsx src/scripts/discovery-dutchie-locations.ts --city-slug=phoenix
|
||||
npx tsx src/scripts/discovery-dutchie-locations.ts --all-enabled --limit=5
|
||||
|
||||
Environment:
|
||||
DATABASE_URL - PostgreSQL connection string (required)
|
||||
`);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const { citySlug, allEnabled, limit, delay } = parseArgs();
|
||||
|
||||
if (!citySlug && !allEnabled) {
|
||||
console.error('ERROR: Must specify either --city-slug=<slug> or --all-enabled');
|
||||
printUsage();
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log('='.repeat(60));
|
||||
console.log('DUTCHIE LOCATION DISCOVERY');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
if (citySlug) {
|
||||
console.log(`Mode: Single city (${citySlug})`);
|
||||
} else {
|
||||
console.log(`Mode: All enabled cities${limit ? ` (limit: ${limit})` : ''}`);
|
||||
}
|
||||
console.log(`Delay between cities: ${delay}ms`);
|
||||
console.log('');
|
||||
|
||||
// Get database URL from environment
|
||||
const connectionString = process.env.DATABASE_URL;
|
||||
if (!connectionString) {
|
||||
console.error('ERROR: DATABASE_URL environment variable is required');
|
||||
console.error('');
|
||||
console.error('Usage:');
|
||||
console.error(' DATABASE_URL="postgresql://..." npx tsx src/scripts/discovery-dutchie-locations.ts --all-enabled');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Create pool
|
||||
const pool = new Pool({ connectionString });
|
||||
|
||||
try {
|
||||
// Test connection
|
||||
await pool.query('SELECT 1');
|
||||
console.log('[CLI] Database connection established');
|
||||
|
||||
const discovery = new DutchieLocationDiscovery(pool);
|
||||
|
||||
if (citySlug) {
|
||||
// Single city mode
|
||||
const city = await discovery.getCityBySlug(citySlug);
|
||||
if (!city) {
|
||||
console.error(`ERROR: City not found: ${citySlug}`);
|
||||
console.error('');
|
||||
console.error('Make sure you have run city discovery first:');
|
||||
console.error(' npm run discovery:dutchie:cities');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const result = await discovery.discoverForCity(city);
|
||||
|
||||
console.log('');
|
||||
console.log('='.repeat(60));
|
||||
console.log('DISCOVERY COMPLETE');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`City: ${city.cityName}, ${city.stateCode}`);
|
||||
console.log(`Locations found: ${result.locationsFound}`);
|
||||
console.log(`Inserted: ${result.locationsInserted}`);
|
||||
console.log(`Updated: ${result.locationsUpdated}`);
|
||||
console.log(`Skipped (protected): ${result.locationsSkipped}`);
|
||||
console.log(`Errors: ${result.errors.length}`);
|
||||
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
|
||||
|
||||
if (result.errors.length > 0) {
|
||||
console.log('');
|
||||
console.log('Errors:');
|
||||
result.errors.forEach((e) => console.log(` - ${e}`));
|
||||
}
|
||||
|
||||
process.exit(result.errors.length > 0 ? 1 : 0);
|
||||
} else {
|
||||
// All enabled cities mode
|
||||
const result = await discovery.discoverAllEnabled({ limit, delayMs: delay });
|
||||
|
||||
console.log('');
|
||||
console.log('='.repeat(60));
|
||||
console.log('DISCOVERY COMPLETE');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Total cities processed: ${result.totalCities}`);
|
||||
console.log(`Total locations found: ${result.totalLocationsFound}`);
|
||||
console.log(`Total inserted: ${result.totalInserted}`);
|
||||
console.log(`Total updated: ${result.totalUpdated}`);
|
||||
console.log(`Total skipped: ${result.totalSkipped}`);
|
||||
console.log(`Total errors: ${result.errors.length}`);
|
||||
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
|
||||
|
||||
if (result.errors.length > 0 && result.errors.length <= 20) {
|
||||
console.log('');
|
||||
console.log('Errors:');
|
||||
result.errors.forEach((e) => console.log(` - ${e}`));
|
||||
} else if (result.errors.length > 20) {
|
||||
console.log('');
|
||||
console.log(`First 20 of ${result.errors.length} errors:`);
|
||||
result.errors.slice(0, 20).forEach((e) => console.log(` - ${e}`));
|
||||
}
|
||||
|
||||
process.exit(result.errors.length > 0 ? 1 : 0);
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.error('FATAL ERROR:', error.message);
|
||||
console.error(error.stack);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
833
backend/src/scripts/etl/042_legacy_import.ts
Normal file
833
backend/src/scripts/etl/042_legacy_import.ts
Normal file
@@ -0,0 +1,833 @@
|
||||
/**
|
||||
* ETL Script: 042 Legacy Import
|
||||
*
|
||||
* Copies data from legacy dutchie_legacy database into canonical CannaiQ tables
|
||||
* in the dutchie_menus database.
|
||||
*
|
||||
* CRITICAL DATABASE ARCHITECTURE:
|
||||
* - SOURCE (READ-ONLY): dutchie_legacy - Contains legacy dutchie_* tables
|
||||
* - DESTINATION (WRITE): dutchie_menus - Contains canonical CannaiQ tables
|
||||
*
|
||||
* IMPORTANT:
|
||||
* - This script is INSERT-ONLY and IDEMPOTENT
|
||||
* - Uses ON CONFLICT DO NOTHING for all inserts
|
||||
* - NO deletes, NO truncates, NO schema changes
|
||||
* - Legacy database is READ-ONLY - never modified
|
||||
*
|
||||
* Run manually with:
|
||||
* cd backend
|
||||
* npx tsx src/scripts/etl/042_legacy_import.ts
|
||||
*
|
||||
* Prerequisites:
|
||||
* - Migration 041_cannaiq_canonical_schema.sql must be run on dutchie_menus FIRST
|
||||
* - Both CANNAIQ_DB_* and LEGACY_DB_* env vars must be set
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
|
||||
// =====================================================
|
||||
// DATABASE CONNECTIONS - DUAL POOL ARCHITECTURE
|
||||
// =====================================================
|
||||
|
||||
/**
|
||||
* Get connection string for CannaiQ database (dutchie_menus).
|
||||
* This is the DESTINATION - where we WRITE canonical data.
|
||||
*/
|
||||
function getCannaiqConnectionString(): string {
|
||||
if (process.env.CANNAIQ_DB_URL) {
|
||||
return process.env.CANNAIQ_DB_URL;
|
||||
}
|
||||
|
||||
const required = ['CANNAIQ_DB_HOST', 'CANNAIQ_DB_PORT', 'CANNAIQ_DB_NAME', 'CANNAIQ_DB_USER', 'CANNAIQ_DB_PASS'];
|
||||
const missing = required.filter((key) => !process.env[key]);
|
||||
|
||||
if (missing.length > 0) {
|
||||
throw new Error(
|
||||
`[042_legacy_import] Missing required CannaiQ env vars: ${missing.join(', ')}\n` +
|
||||
`Set either CANNAIQ_DB_URL or all of: CANNAIQ_DB_HOST, CANNAIQ_DB_PORT, CANNAIQ_DB_NAME, CANNAIQ_DB_USER, CANNAIQ_DB_PASS`
|
||||
);
|
||||
}
|
||||
|
||||
const host = process.env.CANNAIQ_DB_HOST!;
|
||||
const port = process.env.CANNAIQ_DB_PORT!;
|
||||
const name = process.env.CANNAIQ_DB_NAME!;
|
||||
const user = process.env.CANNAIQ_DB_USER!;
|
||||
const pass = process.env.CANNAIQ_DB_PASS!;
|
||||
|
||||
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get connection string for Legacy database (dutchie_legacy).
|
||||
* This is the SOURCE - where we READ legacy data (READ-ONLY).
|
||||
*/
|
||||
function getLegacyConnectionString(): string {
|
||||
if (process.env.LEGACY_DB_URL) {
|
||||
return process.env.LEGACY_DB_URL;
|
||||
}
|
||||
|
||||
const required = ['LEGACY_DB_HOST', 'LEGACY_DB_PORT', 'LEGACY_DB_NAME', 'LEGACY_DB_USER', 'LEGACY_DB_PASS'];
|
||||
const missing = required.filter((key) => !process.env[key]);
|
||||
|
||||
if (missing.length > 0) {
|
||||
throw new Error(
|
||||
`[042_legacy_import] Missing required Legacy env vars: ${missing.join(', ')}\n` +
|
||||
`Set either LEGACY_DB_URL or all of: LEGACY_DB_HOST, LEGACY_DB_PORT, LEGACY_DB_NAME, LEGACY_DB_USER, LEGACY_DB_PASS`
|
||||
);
|
||||
}
|
||||
|
||||
const host = process.env.LEGACY_DB_HOST!;
|
||||
const port = process.env.LEGACY_DB_PORT!;
|
||||
const name = process.env.LEGACY_DB_NAME!;
|
||||
const user = process.env.LEGACY_DB_USER!;
|
||||
const pass = process.env.LEGACY_DB_PASS!;
|
||||
|
||||
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
|
||||
}
|
||||
|
||||
// Create both pools
|
||||
const cannaiqPool = new Pool({ connectionString: getCannaiqConnectionString() });
|
||||
const legacyPool = new Pool({ connectionString: getLegacyConnectionString() });
|
||||
|
||||
// =====================================================
|
||||
// LOGGING HELPERS
|
||||
// =====================================================
|
||||
interface Stats {
|
||||
read: number;
|
||||
inserted: number;
|
||||
skipped: number;
|
||||
}
|
||||
|
||||
interface StoreProductStats extends Stats {
|
||||
skipped_missing_store: number;
|
||||
skipped_duplicate: number;
|
||||
}
|
||||
|
||||
function log(message: string) {
|
||||
console.log(`[042_legacy_import] ${message}`);
|
||||
}
|
||||
|
||||
function logStats(table: string, stats: Stats) {
|
||||
log(` ${table}: read=${stats.read}, inserted=${stats.inserted}, skipped=${stats.skipped}`);
|
||||
}
|
||||
|
||||
function logStoreProductStats(stats: StoreProductStats) {
|
||||
log(` store_products: read=${stats.read}, inserted=${stats.inserted}, skipped_missing_store=${stats.skipped_missing_store}, skipped_duplicate=${stats.skipped_duplicate}`);
|
||||
}
|
||||
|
||||
// =====================================================
|
||||
// CATEGORY NORMALIZATION HELPER
|
||||
// =====================================================
|
||||
// Legacy dutchie_products has only 'subcategory', not 'category'.
|
||||
// We derive a canonical category from the subcategory value.
|
||||
|
||||
const SUBCATEGORY_TO_CATEGORY: Record<string, string> = {
|
||||
// Flower
|
||||
'flower': 'Flower',
|
||||
'pre-rolls': 'Flower',
|
||||
'pre-roll': 'Flower',
|
||||
'preroll': 'Flower',
|
||||
'prerolls': 'Flower',
|
||||
'shake': 'Flower',
|
||||
'smalls': 'Flower',
|
||||
'popcorn': 'Flower',
|
||||
|
||||
// Concentrates
|
||||
'concentrates': 'Concentrates',
|
||||
'concentrate': 'Concentrates',
|
||||
'live resin': 'Concentrates',
|
||||
'live-resin': 'Concentrates',
|
||||
'rosin': 'Concentrates',
|
||||
'shatter': 'Concentrates',
|
||||
'wax': 'Concentrates',
|
||||
'badder': 'Concentrates',
|
||||
'crumble': 'Concentrates',
|
||||
'diamonds': 'Concentrates',
|
||||
'sauce': 'Concentrates',
|
||||
'hash': 'Concentrates',
|
||||
'kief': 'Concentrates',
|
||||
'rso': 'Concentrates',
|
||||
'distillate': 'Concentrates',
|
||||
|
||||
// Edibles
|
||||
'edibles': 'Edibles',
|
||||
'edible': 'Edibles',
|
||||
'gummies': 'Edibles',
|
||||
'gummy': 'Edibles',
|
||||
'chocolates': 'Edibles',
|
||||
'chocolate': 'Edibles',
|
||||
'baked goods': 'Edibles',
|
||||
'beverages': 'Edibles',
|
||||
'drinks': 'Edibles',
|
||||
'candy': 'Edibles',
|
||||
'mints': 'Edibles',
|
||||
'capsules': 'Edibles',
|
||||
'tablets': 'Edibles',
|
||||
|
||||
// Vapes
|
||||
'vapes': 'Vapes',
|
||||
'vape': 'Vapes',
|
||||
'vaporizers': 'Vapes',
|
||||
'cartridges': 'Vapes',
|
||||
'cartridge': 'Vapes',
|
||||
'carts': 'Vapes',
|
||||
'cart': 'Vapes',
|
||||
'pods': 'Vapes',
|
||||
'disposables': 'Vapes',
|
||||
'disposable': 'Vapes',
|
||||
'pax': 'Vapes',
|
||||
|
||||
// Topicals
|
||||
'topicals': 'Topicals',
|
||||
'topical': 'Topicals',
|
||||
'lotions': 'Topicals',
|
||||
'balms': 'Topicals',
|
||||
'salves': 'Topicals',
|
||||
'patches': 'Topicals',
|
||||
'bath': 'Topicals',
|
||||
|
||||
// Tinctures
|
||||
'tinctures': 'Tinctures',
|
||||
'tincture': 'Tinctures',
|
||||
'oils': 'Tinctures',
|
||||
'sublinguals': 'Tinctures',
|
||||
|
||||
// Accessories
|
||||
'accessories': 'Accessories',
|
||||
'gear': 'Accessories',
|
||||
'papers': 'Accessories',
|
||||
'grinders': 'Accessories',
|
||||
'pipes': 'Accessories',
|
||||
'bongs': 'Accessories',
|
||||
'batteries': 'Accessories',
|
||||
};
|
||||
|
||||
/**
|
||||
* Derive a canonical category from the legacy subcategory field.
|
||||
* Returns null if subcategory is null/empty or cannot be mapped.
|
||||
*/
|
||||
function deriveCategory(subcategory: string | null | undefined): string | null {
|
||||
if (!subcategory) return null;
|
||||
|
||||
const normalized = subcategory.toLowerCase().trim();
|
||||
|
||||
// Direct lookup
|
||||
if (SUBCATEGORY_TO_CATEGORY[normalized]) {
|
||||
return SUBCATEGORY_TO_CATEGORY[normalized];
|
||||
}
|
||||
|
||||
// Partial match - check if any key is contained in the subcategory
|
||||
for (const [key, category] of Object.entries(SUBCATEGORY_TO_CATEGORY)) {
|
||||
if (normalized.includes(key)) {
|
||||
return category;
|
||||
}
|
||||
}
|
||||
|
||||
// No match - return the original subcategory as-is for category_raw
|
||||
return null;
|
||||
}
|
||||
|
||||
// =====================================================
|
||||
// STEP 1: Backfill dispensaries.state_id (on cannaiq db)
|
||||
// =====================================================
|
||||
async function backfillStateIds(): Promise<Stats> {
|
||||
log('Step 1: Backfill dispensaries.state_id from states table...');
|
||||
|
||||
const result = await cannaiqPool.query(`
|
||||
UPDATE dispensaries d
|
||||
SET state_id = s.id
|
||||
FROM states s
|
||||
WHERE UPPER(d.state) = s.code
|
||||
AND d.state_id IS NULL
|
||||
RETURNING d.id
|
||||
`);
|
||||
|
||||
const stats: Stats = {
|
||||
read: result.rowCount || 0,
|
||||
inserted: result.rowCount || 0,
|
||||
skipped: 0,
|
||||
};
|
||||
|
||||
logStats('dispensaries.state_id', stats);
|
||||
return stats;
|
||||
}
|
||||
|
||||
// =====================================================
|
||||
// STEP 2: Insert known chains (on cannaiq db)
|
||||
// =====================================================
|
||||
async function insertChains(): Promise<Stats> {
|
||||
log('Step 2: Insert known chains...');
|
||||
|
||||
const knownChains = [
|
||||
{ name: 'Curaleaf', slug: 'curaleaf', website: 'https://curaleaf.com' },
|
||||
{ name: 'Trulieve', slug: 'trulieve', website: 'https://trulieve.com' },
|
||||
{ name: 'Harvest', slug: 'harvest', website: 'https://harvesthoc.com' },
|
||||
{ name: 'Nirvana Center', slug: 'nirvana-center', website: 'https://nirvanacannabis.com' },
|
||||
{ name: 'Sol Flower', slug: 'sol-flower', website: 'https://solflower.com' },
|
||||
{ name: 'Mint Cannabis', slug: 'mint-cannabis', website: 'https://mintcannabis.com' },
|
||||
{ name: 'JARS Cannabis', slug: 'jars-cannabis', website: 'https://jarscannabis.com' },
|
||||
{ name: 'Zen Leaf', slug: 'zen-leaf', website: 'https://zenleafdispensaries.com' },
|
||||
{ name: "Nature's Medicines", slug: 'natures-medicines', website: 'https://naturesmedicines.com' },
|
||||
{ name: 'The Mint', slug: 'the-mint', website: 'https://themintdispensary.com' },
|
||||
{ name: 'Giving Tree', slug: 'giving-tree', website: 'https://givingtreeaz.com' },
|
||||
{ name: 'Health for Life', slug: 'health-for-life', website: 'https://healthforlifeaz.com' },
|
||||
{ name: 'Oasis Cannabis', slug: 'oasis-cannabis', website: 'https://oasiscannabis.com' },
|
||||
];
|
||||
|
||||
let inserted = 0;
|
||||
for (const chain of knownChains) {
|
||||
const result = await cannaiqPool.query(
|
||||
`
|
||||
INSERT INTO chains (name, slug, website_url)
|
||||
VALUES ($1, $2, $3)
|
||||
ON CONFLICT (slug) DO NOTHING
|
||||
RETURNING id
|
||||
`,
|
||||
[chain.name, chain.slug, chain.website]
|
||||
);
|
||||
if (result.rowCount && result.rowCount > 0) {
|
||||
inserted++;
|
||||
}
|
||||
}
|
||||
|
||||
const stats: Stats = {
|
||||
read: knownChains.length,
|
||||
inserted,
|
||||
skipped: knownChains.length - inserted,
|
||||
};
|
||||
|
||||
logStats('chains', stats);
|
||||
return stats;
|
||||
}
|
||||
|
||||
// =====================================================
|
||||
// STEP 3: Link dispensaries to chains by name pattern (on cannaiq db)
|
||||
// =====================================================
|
||||
async function linkDispensariesToChains(): Promise<Stats> {
|
||||
log('Step 3: Link dispensaries to chains by name pattern...');
|
||||
|
||||
// Get all chains from cannaiq
|
||||
const chainsResult = await cannaiqPool.query('SELECT id, name, slug FROM chains');
|
||||
const chains = chainsResult.rows;
|
||||
|
||||
let totalUpdated = 0;
|
||||
|
||||
for (const chain of chains) {
|
||||
// Match by name pattern (case-insensitive)
|
||||
const result = await cannaiqPool.query(
|
||||
`
|
||||
UPDATE dispensaries
|
||||
SET chain_id = $1
|
||||
WHERE (name ILIKE $2 OR dba_name ILIKE $2)
|
||||
AND chain_id IS NULL
|
||||
RETURNING id
|
||||
`,
|
||||
[chain.id, `%${chain.name}%`]
|
||||
);
|
||||
|
||||
if (result.rowCount && result.rowCount > 0) {
|
||||
log(` Linked ${result.rowCount} dispensaries to chain: ${chain.name}`);
|
||||
totalUpdated += result.rowCount;
|
||||
}
|
||||
}
|
||||
|
||||
const stats: Stats = {
|
||||
read: chains.length,
|
||||
inserted: totalUpdated,
|
||||
skipped: 0,
|
||||
};
|
||||
|
||||
logStats('dispensaries.chain_id', stats);
|
||||
return stats;
|
||||
}
|
||||
|
||||
// =====================================================
|
||||
// STEP 4: Insert brands from legacy dutchie_products
|
||||
// =====================================================
|
||||
async function insertBrands(): Promise<Stats> {
|
||||
log('Step 4: Insert brands from legacy dutchie_products -> cannaiq brands...');
|
||||
|
||||
// READ from legacy database
|
||||
const brandsResult = await legacyPool.query(`
|
||||
SELECT DISTINCT TRIM(brand_name) AS brand_name
|
||||
FROM dutchie_products
|
||||
WHERE brand_name IS NOT NULL
|
||||
AND TRIM(brand_name) != ''
|
||||
ORDER BY brand_name
|
||||
`);
|
||||
|
||||
const stats: Stats = {
|
||||
read: brandsResult.rowCount || 0,
|
||||
inserted: 0,
|
||||
skipped: 0,
|
||||
};
|
||||
|
||||
const BATCH_SIZE = 100;
|
||||
const brands = brandsResult.rows;
|
||||
|
||||
for (let i = 0; i < brands.length; i += BATCH_SIZE) {
|
||||
const batch = brands.slice(i, i + BATCH_SIZE);
|
||||
|
||||
for (const row of batch) {
|
||||
const brandName = row.brand_name.trim();
|
||||
// Create slug: lowercase, replace non-alphanumeric with hyphens, collapse multiple hyphens
|
||||
const slug = brandName
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '-')
|
||||
.replace(/^-+|-+$/g, '')
|
||||
.substring(0, 250);
|
||||
|
||||
if (!slug) continue;
|
||||
|
||||
// WRITE to cannaiq database
|
||||
const result = await cannaiqPool.query(
|
||||
`
|
||||
INSERT INTO brands (name, slug)
|
||||
VALUES ($1, $2)
|
||||
ON CONFLICT (slug) DO NOTHING
|
||||
RETURNING id
|
||||
`,
|
||||
[brandName, slug]
|
||||
);
|
||||
|
||||
if (result.rowCount && result.rowCount > 0) {
|
||||
stats.inserted++;
|
||||
} else {
|
||||
stats.skipped++;
|
||||
}
|
||||
}
|
||||
|
||||
log(` Processed ${Math.min(i + BATCH_SIZE, brands.length)}/${brands.length} brands...`);
|
||||
}
|
||||
|
||||
logStats('brands', stats);
|
||||
return stats;
|
||||
}
|
||||
|
||||
// =====================================================
|
||||
// STEP 5: Insert store_products from legacy dutchie_products
|
||||
// =====================================================
|
||||
async function insertStoreProducts(): Promise<StoreProductStats> {
|
||||
log('Step 5: Insert store_products from legacy dutchie_products -> cannaiq store_products...');
|
||||
|
||||
// Step 5a: Preload valid dispensary IDs from canonical database
|
||||
log(' Loading valid dispensary IDs from canonical database...');
|
||||
const dispensaryResult = await cannaiqPool.query('SELECT id FROM dispensaries');
|
||||
const validDispensaryIds = new Set<number>(dispensaryResult.rows.map((r) => r.id));
|
||||
log(` Found ${validDispensaryIds.size} valid dispensaries in canonical database`);
|
||||
|
||||
// Count total in legacy
|
||||
const countResult = await legacyPool.query('SELECT COUNT(*) FROM dutchie_products');
|
||||
const totalCount = parseInt(countResult.rows[0].count, 10);
|
||||
|
||||
const stats: StoreProductStats = {
|
||||
read: totalCount,
|
||||
inserted: 0,
|
||||
skipped: 0,
|
||||
skipped_missing_store: 0,
|
||||
skipped_duplicate: 0,
|
||||
};
|
||||
|
||||
const BATCH_SIZE = 200;
|
||||
let offset = 0;
|
||||
|
||||
while (offset < totalCount) {
|
||||
// READ batch from legacy database
|
||||
// ONLY use columns that actually exist in dutchie_products:
|
||||
// id, dispensary_id, external_product_id, name, brand_name,
|
||||
// subcategory, stock_status, primary_image_url, created_at
|
||||
// Missing columns: category, first_seen_at, last_seen_at, updated_at, thc_content, cbd_content
|
||||
const batchResult = await legacyPool.query(
|
||||
`
|
||||
SELECT
|
||||
dp.id,
|
||||
dp.dispensary_id,
|
||||
dp.external_product_id,
|
||||
dp.name,
|
||||
dp.brand_name,
|
||||
dp.subcategory,
|
||||
dp.stock_status,
|
||||
dp.primary_image_url,
|
||||
dp.created_at
|
||||
FROM dutchie_products dp
|
||||
ORDER BY dp.id
|
||||
LIMIT $1 OFFSET $2
|
||||
`,
|
||||
[BATCH_SIZE, offset]
|
||||
);
|
||||
|
||||
for (const row of batchResult.rows) {
|
||||
// Skip if dispensary_id is missing or not in canonical database
|
||||
if (!row.dispensary_id || !validDispensaryIds.has(row.dispensary_id)) {
|
||||
stats.skipped_missing_store++;
|
||||
stats.skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Derive category from subcategory in TypeScript
|
||||
const categoryRaw = deriveCategory(row.subcategory) || row.subcategory || null;
|
||||
|
||||
// Use created_at as first_seen_at if available, otherwise NOW()
|
||||
const timestamp = row.created_at || new Date();
|
||||
|
||||
// WRITE to cannaiq database
|
||||
try {
|
||||
const result = await cannaiqPool.query(
|
||||
`
|
||||
INSERT INTO store_products (
|
||||
dispensary_id,
|
||||
provider,
|
||||
provider_product_id,
|
||||
name_raw,
|
||||
brand_name_raw,
|
||||
category_raw,
|
||||
subcategory_raw,
|
||||
stock_status,
|
||||
is_in_stock,
|
||||
image_url,
|
||||
first_seen_at,
|
||||
last_seen_at,
|
||||
created_at,
|
||||
updated_at
|
||||
) VALUES (
|
||||
$1, 'dutchie', $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13
|
||||
)
|
||||
ON CONFLICT (dispensary_id, provider, provider_product_id) DO NOTHING
|
||||
RETURNING id
|
||||
`,
|
||||
[
|
||||
row.dispensary_id,
|
||||
row.external_product_id,
|
||||
row.name,
|
||||
row.brand_name,
|
||||
categoryRaw,
|
||||
row.subcategory || null,
|
||||
row.stock_status || 'in_stock',
|
||||
row.stock_status !== 'out_of_stock',
|
||||
row.primary_image_url || null,
|
||||
timestamp, // first_seen_at = created_at or NOW()
|
||||
timestamp, // last_seen_at = created_at or NOW()
|
||||
timestamp, // created_at
|
||||
timestamp, // updated_at
|
||||
]
|
||||
);
|
||||
|
||||
if (result.rowCount && result.rowCount > 0) {
|
||||
stats.inserted++;
|
||||
} else {
|
||||
stats.skipped_duplicate++;
|
||||
stats.skipped++;
|
||||
}
|
||||
} catch (err: any) {
|
||||
// If somehow we still hit an FK error, skip gracefully
|
||||
if (err.code === '23503') {
|
||||
// FK violation
|
||||
stats.skipped_missing_store++;
|
||||
stats.skipped++;
|
||||
} else {
|
||||
throw err; // Re-throw unexpected errors
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
offset += BATCH_SIZE;
|
||||
log(` Processed ${Math.min(offset, totalCount)}/${totalCount} products...`);
|
||||
}
|
||||
|
||||
logStoreProductStats(stats);
|
||||
return stats;
|
||||
}
|
||||
|
||||
// =====================================================
|
||||
// STEP 6: Link store_products to brands (on cannaiq db)
|
||||
// =====================================================
|
||||
async function linkStoreProductsToBrands(): Promise<Stats> {
|
||||
log('Step 6: Link store_products to brands by brand_name_raw...');
|
||||
|
||||
const result = await cannaiqPool.query(`
|
||||
UPDATE store_products sp
|
||||
SET brand_id = b.id
|
||||
FROM brands b
|
||||
WHERE LOWER(TRIM(sp.brand_name_raw)) = LOWER(b.name)
|
||||
AND sp.brand_id IS NULL
|
||||
RETURNING sp.id
|
||||
`);
|
||||
|
||||
const stats: Stats = {
|
||||
read: result.rowCount || 0,
|
||||
inserted: result.rowCount || 0,
|
||||
skipped: 0,
|
||||
};
|
||||
|
||||
logStats('store_products.brand_id', stats);
|
||||
return stats;
|
||||
}
|
||||
|
||||
// =====================================================
|
||||
// STEP 7: Insert store_product_snapshots from legacy dutchie_product_snapshots
|
||||
// =====================================================
|
||||
async function insertStoreProductSnapshots(): Promise<StoreProductStats> {
|
||||
log('Step 7: Insert store_product_snapshots from legacy -> cannaiq...');
|
||||
|
||||
// Step 7a: Preload valid dispensary IDs from canonical database
|
||||
log(' Loading valid dispensary IDs from canonical database...');
|
||||
const dispensaryResult = await cannaiqPool.query('SELECT id FROM dispensaries');
|
||||
const validDispensaryIds = new Set<number>(dispensaryResult.rows.map((r) => r.id));
|
||||
log(` Found ${validDispensaryIds.size} valid dispensaries in canonical database`);
|
||||
|
||||
// Count total in legacy
|
||||
const countResult = await legacyPool.query('SELECT COUNT(*) FROM dutchie_product_snapshots');
|
||||
const totalCount = parseInt(countResult.rows[0].count, 10);
|
||||
|
||||
const stats: StoreProductStats = {
|
||||
read: totalCount,
|
||||
inserted: 0,
|
||||
skipped: 0,
|
||||
skipped_missing_store: 0,
|
||||
skipped_duplicate: 0,
|
||||
};
|
||||
|
||||
if (totalCount === 0) {
|
||||
log(' No snapshots to migrate.');
|
||||
return stats;
|
||||
}
|
||||
|
||||
const BATCH_SIZE = 500;
|
||||
let offset = 0;
|
||||
|
||||
while (offset < totalCount) {
|
||||
// READ batch from legacy with join to get provider_product_id from dutchie_products
|
||||
// ONLY use columns that actually exist in dutchie_product_snapshots:
|
||||
// id, dispensary_id, dutchie_product_id, crawled_at, created_at
|
||||
// Missing columns: raw_product_data
|
||||
// We join to dutchie_products for: external_product_id, name, brand_name, subcategory, primary_image_url
|
||||
const batchResult = await legacyPool.query(
|
||||
`
|
||||
SELECT
|
||||
dps.id,
|
||||
dps.dispensary_id,
|
||||
dp.external_product_id AS provider_product_id,
|
||||
dp.name,
|
||||
dp.brand_name,
|
||||
dp.subcategory,
|
||||
dp.primary_image_url,
|
||||
dps.crawled_at,
|
||||
dps.created_at
|
||||
FROM dutchie_product_snapshots dps
|
||||
JOIN dutchie_products dp ON dp.id = dps.dutchie_product_id
|
||||
ORDER BY dps.id
|
||||
LIMIT $1 OFFSET $2
|
||||
`,
|
||||
[BATCH_SIZE, offset]
|
||||
);
|
||||
|
||||
for (const row of batchResult.rows) {
|
||||
// Skip if dispensary_id is missing or not in canonical database
|
||||
if (!row.dispensary_id || !validDispensaryIds.has(row.dispensary_id)) {
|
||||
stats.skipped_missing_store++;
|
||||
stats.skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Derive category from subcategory in TypeScript
|
||||
const categoryRaw = deriveCategory(row.subcategory) || row.subcategory || null;
|
||||
|
||||
// Pricing/THC/CBD/stock data not available (raw_product_data doesn't exist in legacy)
|
||||
// These will be NULL for legacy snapshots - future crawls will populate them
|
||||
const timestamp = row.crawled_at || row.created_at || new Date();
|
||||
|
||||
// WRITE to cannaiq database
|
||||
try {
|
||||
const result = await cannaiqPool.query(
|
||||
`
|
||||
INSERT INTO store_product_snapshots (
|
||||
dispensary_id,
|
||||
provider,
|
||||
provider_product_id,
|
||||
captured_at,
|
||||
name_raw,
|
||||
brand_name_raw,
|
||||
category_raw,
|
||||
subcategory_raw,
|
||||
price_rec,
|
||||
price_med,
|
||||
price_rec_special,
|
||||
is_on_special,
|
||||
is_in_stock,
|
||||
stock_status,
|
||||
thc_percent,
|
||||
cbd_percent,
|
||||
image_url,
|
||||
raw_data,
|
||||
created_at
|
||||
) VALUES (
|
||||
$1, 'dutchie', $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18
|
||||
)
|
||||
ON CONFLICT DO NOTHING
|
||||
RETURNING id
|
||||
`,
|
||||
[
|
||||
row.dispensary_id,
|
||||
row.provider_product_id,
|
||||
timestamp, // captured_at
|
||||
row.name,
|
||||
row.brand_name,
|
||||
categoryRaw,
|
||||
row.subcategory || null,
|
||||
null, // price_rec - not available
|
||||
null, // price_med - not available
|
||||
null, // price_rec_special - not available
|
||||
false, // is_on_special - default false
|
||||
true, // is_in_stock - default true (unknown)
|
||||
'unknown', // stock_status - unknown for legacy
|
||||
null, // thc_percent - not available
|
||||
null, // cbd_percent - not available
|
||||
row.primary_image_url || null, // image_url from legacy product
|
||||
null, // raw_data - not available
|
||||
row.created_at || timestamp,
|
||||
]
|
||||
);
|
||||
|
||||
if (result.rowCount && result.rowCount > 0) {
|
||||
stats.inserted++;
|
||||
} else {
|
||||
stats.skipped_duplicate++;
|
||||
stats.skipped++;
|
||||
}
|
||||
} catch (err: any) {
|
||||
// If somehow we still hit an FK error, skip gracefully
|
||||
if (err.code === '23503') {
|
||||
// FK violation
|
||||
stats.skipped_missing_store++;
|
||||
stats.skipped++;
|
||||
} else {
|
||||
throw err; // Re-throw unexpected errors
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
offset += BATCH_SIZE;
|
||||
log(` Processed ${Math.min(offset, totalCount)}/${totalCount} snapshots...`);
|
||||
}
|
||||
|
||||
logStoreProductStats(stats);
|
||||
return stats;
|
||||
}
|
||||
|
||||
// =====================================================
|
||||
// STEP 8: Link store_product_snapshots to store_products (on cannaiq db)
|
||||
// =====================================================
|
||||
async function linkSnapshotsToStoreProducts(): Promise<Stats> {
|
||||
log('Step 8: Link store_product_snapshots to store_products...');
|
||||
|
||||
const result = await cannaiqPool.query(`
|
||||
UPDATE store_product_snapshots sps
|
||||
SET store_product_id = sp.id
|
||||
FROM store_products sp
|
||||
WHERE sps.dispensary_id = sp.dispensary_id
|
||||
AND sps.provider = sp.provider
|
||||
AND sps.provider_product_id = sp.provider_product_id
|
||||
AND sps.store_product_id IS NULL
|
||||
RETURNING sps.id
|
||||
`);
|
||||
|
||||
const stats: Stats = {
|
||||
read: result.rowCount || 0,
|
||||
inserted: result.rowCount || 0,
|
||||
skipped: 0,
|
||||
};
|
||||
|
||||
logStats('store_product_snapshots.store_product_id', stats);
|
||||
return stats;
|
||||
}
|
||||
|
||||
// =====================================================
|
||||
// MAIN
|
||||
// =====================================================
|
||||
async function main() {
|
||||
log('='.repeat(60));
|
||||
log('CannaiQ Legacy Import ETL');
|
||||
log('='.repeat(60));
|
||||
log('');
|
||||
log('This script migrates data from dutchie_legacy -> dutchie_menus.');
|
||||
log('All operations are INSERT-ONLY and IDEMPOTENT.');
|
||||
log('');
|
||||
|
||||
try {
|
||||
// Test both connections and show which databases we're connected to
|
||||
const cannaiqInfo = await cannaiqPool.query('SELECT current_database() as db, current_user as user');
|
||||
const legacyInfo = await legacyPool.query('SELECT current_database() as db, current_user as user');
|
||||
|
||||
log(`DESTINATION (cannaiq): ${cannaiqInfo.rows[0].user}@${cannaiqInfo.rows[0].db}`);
|
||||
log(`SOURCE (legacy): ${legacyInfo.rows[0].user}@${legacyInfo.rows[0].db}`);
|
||||
log('');
|
||||
|
||||
// Verify we're not writing to legacy
|
||||
if (legacyInfo.rows[0].db === cannaiqInfo.rows[0].db) {
|
||||
throw new Error(
|
||||
'SAFETY CHECK FAILED: Source and destination are the same database!\n' +
|
||||
'CANNAIQ_DB_NAME must be different from LEGACY_DB_NAME.'
|
||||
);
|
||||
}
|
||||
|
||||
// Run steps
|
||||
await backfillStateIds();
|
||||
log('');
|
||||
|
||||
await insertChains();
|
||||
log('');
|
||||
|
||||
await linkDispensariesToChains();
|
||||
log('');
|
||||
|
||||
await insertBrands();
|
||||
log('');
|
||||
|
||||
await insertStoreProducts();
|
||||
log('');
|
||||
|
||||
await linkStoreProductsToBrands();
|
||||
log('');
|
||||
|
||||
await insertStoreProductSnapshots();
|
||||
log('');
|
||||
|
||||
await linkSnapshotsToStoreProducts();
|
||||
log('');
|
||||
|
||||
// Final summary (from cannaiq db)
|
||||
log('='.repeat(60));
|
||||
log('SUMMARY (from dutchie_menus)');
|
||||
log('='.repeat(60));
|
||||
|
||||
const summaryQueries = [
|
||||
{ table: 'states', query: 'SELECT COUNT(*) FROM states' },
|
||||
{ table: 'chains', query: 'SELECT COUNT(*) FROM chains' },
|
||||
{ table: 'brands', query: 'SELECT COUNT(*) FROM brands' },
|
||||
{ table: 'dispensaries (with state_id)', query: 'SELECT COUNT(*) FROM dispensaries WHERE state_id IS NOT NULL' },
|
||||
{ table: 'dispensaries (with chain_id)', query: 'SELECT COUNT(*) FROM dispensaries WHERE chain_id IS NOT NULL' },
|
||||
{ table: 'store_products', query: 'SELECT COUNT(*) FROM store_products' },
|
||||
{ table: 'store_products (with brand_id)', query: 'SELECT COUNT(*) FROM store_products WHERE brand_id IS NOT NULL' },
|
||||
{ table: 'store_product_snapshots', query: 'SELECT COUNT(*) FROM store_product_snapshots' },
|
||||
{ table: 'store_product_snapshots (with store_product_id)', query: 'SELECT COUNT(*) FROM store_product_snapshots WHERE store_product_id IS NOT NULL' },
|
||||
];
|
||||
|
||||
for (const sq of summaryQueries) {
|
||||
const result = await cannaiqPool.query(sq.query);
|
||||
log(` ${sq.table}: ${result.rows[0].count}`);
|
||||
}
|
||||
|
||||
log('');
|
||||
log('Legacy import complete!');
|
||||
} catch (error: any) {
|
||||
log(`ERROR: ${error.message}`);
|
||||
console.error(error);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await cannaiqPool.end();
|
||||
await legacyPool.end();
|
||||
}
|
||||
}
|
||||
|
||||
// Run
|
||||
main();
|
||||
749
backend/src/scripts/etl/legacy-import.ts
Normal file
749
backend/src/scripts/etl/legacy-import.ts
Normal file
@@ -0,0 +1,749 @@
|
||||
/**
|
||||
* Legacy Data Import ETL Script
|
||||
*
|
||||
* DEPRECATED: This script assumed a two-database architecture.
|
||||
*
|
||||
* CURRENT ARCHITECTURE (Single Database):
|
||||
* - All data lives in ONE database: cannaiq (cannaiq-postgres container)
|
||||
* - Legacy tables exist INSIDE this same database with namespaced prefixes (e.g., legacy_*)
|
||||
* - The only database is: cannaiq (in cannaiq-postgres container)
|
||||
*
|
||||
* If you need to import legacy data:
|
||||
* 1. Import into namespaced tables (legacy_dispensaries, legacy_products, etc.)
|
||||
* inside the main cannaiq database
|
||||
* 2. Use the canonical connection from src/dutchie-az/db/connection.ts
|
||||
*
|
||||
* SAFETY RULES:
|
||||
* - INSERT-ONLY: No UPDATE, no DELETE, no TRUNCATE
|
||||
* - ON CONFLICT DO NOTHING: Skip duplicates, never overwrite
|
||||
* - Batch Processing: 500-1000 rows per batch
|
||||
* - Manual Invocation Only: Requires explicit user execution
|
||||
*/
|
||||
|
||||
import { Pool, PoolClient } from 'pg';
|
||||
|
||||
// ============================================================
|
||||
// CONFIGURATION
|
||||
// ============================================================
|
||||
|
||||
const BATCH_SIZE = 500;
|
||||
|
||||
interface ETLConfig {
|
||||
dryRun: boolean;
|
||||
tables: string[];
|
||||
}
|
||||
|
||||
interface ETLStats {
|
||||
table: string;
|
||||
read: number;
|
||||
inserted: number;
|
||||
skipped: number;
|
||||
errors: number;
|
||||
durationMs: number;
|
||||
}
|
||||
|
||||
// Parse command line arguments
|
||||
function parseArgs(): ETLConfig {
|
||||
const args = process.argv.slice(2);
|
||||
const config: ETLConfig = {
|
||||
dryRun: false,
|
||||
tables: ['dispensaries', 'products', 'dutchie_products', 'dutchie_product_snapshots'],
|
||||
};
|
||||
|
||||
for (const arg of args) {
|
||||
if (arg === '--dry-run') {
|
||||
config.dryRun = true;
|
||||
} else if (arg.startsWith('--tables=')) {
|
||||
config.tables = arg.replace('--tables=', '').split(',');
|
||||
}
|
||||
}
|
||||
|
||||
return config;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// DATABASE CONNECTIONS
|
||||
// ============================================================
|
||||
|
||||
// DEPRECATED: Both pools point to the same database (cannaiq)
|
||||
// Legacy tables exist inside the main database with namespaced prefixes
|
||||
function createLegacyPool(): Pool {
|
||||
return new Pool({
|
||||
host: process.env.CANNAIQ_DB_HOST || 'localhost',
|
||||
port: parseInt(process.env.CANNAIQ_DB_PORT || '54320'),
|
||||
user: process.env.CANNAIQ_DB_USER || 'dutchie',
|
||||
password: process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass',
|
||||
database: process.env.CANNAIQ_DB_NAME || 'cannaiq',
|
||||
max: 5,
|
||||
});
|
||||
}
|
||||
|
||||
function createCannaiqPool(): Pool {
|
||||
return new Pool({
|
||||
host: process.env.CANNAIQ_DB_HOST || 'localhost',
|
||||
port: parseInt(process.env.CANNAIQ_DB_PORT || '54320'),
|
||||
user: process.env.CANNAIQ_DB_USER || 'dutchie',
|
||||
password: process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass',
|
||||
database: process.env.CANNAIQ_DB_NAME || 'cannaiq',
|
||||
max: 5,
|
||||
});
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// STAGING TABLE CREATION
|
||||
// ============================================================
|
||||
|
||||
const STAGING_TABLES_SQL = `
|
||||
-- Staging table for legacy dispensaries
|
||||
CREATE TABLE IF NOT EXISTS dispensaries_from_legacy (
|
||||
id SERIAL PRIMARY KEY,
|
||||
legacy_id INTEGER NOT NULL,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
slug VARCHAR(255) NOT NULL,
|
||||
city VARCHAR(100) NOT NULL,
|
||||
state VARCHAR(10) NOT NULL,
|
||||
postal_code VARCHAR(20),
|
||||
address TEXT,
|
||||
latitude DECIMAL(10,7),
|
||||
longitude DECIMAL(10,7),
|
||||
menu_url TEXT,
|
||||
website TEXT,
|
||||
legacy_metadata JSONB,
|
||||
imported_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
UNIQUE(legacy_id)
|
||||
);
|
||||
|
||||
-- Staging table for legacy products
|
||||
CREATE TABLE IF NOT EXISTS products_from_legacy (
|
||||
id SERIAL PRIMARY KEY,
|
||||
legacy_product_id INTEGER NOT NULL,
|
||||
legacy_dispensary_id INTEGER,
|
||||
external_product_id VARCHAR(255),
|
||||
name VARCHAR(500) NOT NULL,
|
||||
brand_name VARCHAR(255),
|
||||
type VARCHAR(100),
|
||||
subcategory VARCHAR(100),
|
||||
strain_type VARCHAR(50),
|
||||
thc DECIMAL(10,4),
|
||||
cbd DECIMAL(10,4),
|
||||
price_cents INTEGER,
|
||||
original_price_cents INTEGER,
|
||||
stock_status VARCHAR(20),
|
||||
weight VARCHAR(100),
|
||||
primary_image_url TEXT,
|
||||
first_seen_at TIMESTAMPTZ,
|
||||
last_seen_at TIMESTAMPTZ,
|
||||
legacy_raw_payload JSONB,
|
||||
imported_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
UNIQUE(legacy_product_id)
|
||||
);
|
||||
|
||||
-- Staging table for legacy price history
|
||||
CREATE TABLE IF NOT EXISTS price_history_legacy (
|
||||
id SERIAL PRIMARY KEY,
|
||||
legacy_product_id INTEGER NOT NULL,
|
||||
price_cents INTEGER,
|
||||
recorded_at TIMESTAMPTZ,
|
||||
imported_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Index for efficient lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_disp_legacy_slug ON dispensaries_from_legacy(slug, city, state);
|
||||
CREATE INDEX IF NOT EXISTS idx_prod_legacy_ext_id ON products_from_legacy(external_product_id);
|
||||
`;
|
||||
|
||||
async function createStagingTables(cannaiqPool: Pool, dryRun: boolean): Promise<void> {
|
||||
console.log('[ETL] Creating staging tables...');
|
||||
|
||||
if (dryRun) {
|
||||
console.log('[ETL] DRY RUN: Would create staging tables');
|
||||
return;
|
||||
}
|
||||
|
||||
const client = await cannaiqPool.connect();
|
||||
try {
|
||||
await client.query(STAGING_TABLES_SQL);
|
||||
console.log('[ETL] Staging tables created successfully');
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// ETL FUNCTIONS
|
||||
// ============================================================
|
||||
|
||||
async function importDispensaries(
|
||||
legacyPool: Pool,
|
||||
cannaiqPool: Pool,
|
||||
dryRun: boolean
|
||||
): Promise<ETLStats> {
|
||||
const startTime = Date.now();
|
||||
const stats: ETLStats = {
|
||||
table: 'dispensaries',
|
||||
read: 0,
|
||||
inserted: 0,
|
||||
skipped: 0,
|
||||
errors: 0,
|
||||
durationMs: 0,
|
||||
};
|
||||
|
||||
console.log('[ETL] Importing dispensaries...');
|
||||
|
||||
const legacyClient = await legacyPool.connect();
|
||||
const cannaiqClient = await cannaiqPool.connect();
|
||||
|
||||
try {
|
||||
// Count total rows
|
||||
const countResult = await legacyClient.query('SELECT COUNT(*) FROM dispensaries');
|
||||
const totalRows = parseInt(countResult.rows[0].count);
|
||||
console.log(`[ETL] Found ${totalRows} dispensaries in legacy database`);
|
||||
|
||||
// Process in batches
|
||||
let offset = 0;
|
||||
while (offset < totalRows) {
|
||||
const batchResult = await legacyClient.query(`
|
||||
SELECT
|
||||
id, name, slug, city, state, zip, address,
|
||||
latitude, longitude, menu_url, website, dba_name,
|
||||
menu_provider, product_provider, provider_detection_data
|
||||
FROM dispensaries
|
||||
ORDER BY id
|
||||
LIMIT $1 OFFSET $2
|
||||
`, [BATCH_SIZE, offset]);
|
||||
|
||||
stats.read += batchResult.rows.length;
|
||||
|
||||
if (dryRun) {
|
||||
console.log(`[ETL] DRY RUN: Would insert batch of ${batchResult.rows.length} dispensaries`);
|
||||
stats.inserted += batchResult.rows.length;
|
||||
} else {
|
||||
for (const row of batchResult.rows) {
|
||||
try {
|
||||
const legacyMetadata = {
|
||||
dba_name: row.dba_name,
|
||||
menu_provider: row.menu_provider,
|
||||
product_provider: row.product_provider,
|
||||
provider_detection_data: row.provider_detection_data,
|
||||
};
|
||||
|
||||
const insertResult = await cannaiqClient.query(`
|
||||
INSERT INTO dispensaries_from_legacy
|
||||
(legacy_id, name, slug, city, state, postal_code, address,
|
||||
latitude, longitude, menu_url, website, legacy_metadata)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
|
||||
ON CONFLICT (legacy_id) DO NOTHING
|
||||
RETURNING id
|
||||
`, [
|
||||
row.id,
|
||||
row.name,
|
||||
row.slug,
|
||||
row.city,
|
||||
row.state,
|
||||
row.zip,
|
||||
row.address,
|
||||
row.latitude,
|
||||
row.longitude,
|
||||
row.menu_url,
|
||||
row.website,
|
||||
JSON.stringify(legacyMetadata),
|
||||
]);
|
||||
|
||||
if (insertResult.rowCount > 0) {
|
||||
stats.inserted++;
|
||||
} else {
|
||||
stats.skipped++;
|
||||
}
|
||||
} catch (err: any) {
|
||||
stats.errors++;
|
||||
console.error(`[ETL] Error inserting dispensary ${row.id}:`, err.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
offset += BATCH_SIZE;
|
||||
console.log(`[ETL] Processed ${Math.min(offset, totalRows)}/${totalRows} dispensaries`);
|
||||
}
|
||||
} finally {
|
||||
legacyClient.release();
|
||||
cannaiqClient.release();
|
||||
}
|
||||
|
||||
stats.durationMs = Date.now() - startTime;
|
||||
return stats;
|
||||
}
|
||||
|
||||
async function importProducts(
|
||||
legacyPool: Pool,
|
||||
cannaiqPool: Pool,
|
||||
dryRun: boolean
|
||||
): Promise<ETLStats> {
|
||||
const startTime = Date.now();
|
||||
const stats: ETLStats = {
|
||||
table: 'products',
|
||||
read: 0,
|
||||
inserted: 0,
|
||||
skipped: 0,
|
||||
errors: 0,
|
||||
durationMs: 0,
|
||||
};
|
||||
|
||||
console.log('[ETL] Importing legacy products...');
|
||||
|
||||
const legacyClient = await legacyPool.connect();
|
||||
const cannaiqClient = await cannaiqPool.connect();
|
||||
|
||||
try {
|
||||
const countResult = await legacyClient.query('SELECT COUNT(*) FROM products');
|
||||
const totalRows = parseInt(countResult.rows[0].count);
|
||||
console.log(`[ETL] Found ${totalRows} products in legacy database`);
|
||||
|
||||
let offset = 0;
|
||||
while (offset < totalRows) {
|
||||
const batchResult = await legacyClient.query(`
|
||||
SELECT
|
||||
id, dispensary_id, dutchie_product_id, name, brand,
|
||||
subcategory, strain_type, thc_percentage, cbd_percentage,
|
||||
price, original_price, in_stock, weight, image_url,
|
||||
first_seen_at, last_seen_at, raw_data
|
||||
FROM products
|
||||
ORDER BY id
|
||||
LIMIT $1 OFFSET $2
|
||||
`, [BATCH_SIZE, offset]);
|
||||
|
||||
stats.read += batchResult.rows.length;
|
||||
|
||||
if (dryRun) {
|
||||
console.log(`[ETL] DRY RUN: Would insert batch of ${batchResult.rows.length} products`);
|
||||
stats.inserted += batchResult.rows.length;
|
||||
} else {
|
||||
for (const row of batchResult.rows) {
|
||||
try {
|
||||
const stockStatus = row.in_stock === true ? 'in_stock' :
|
||||
row.in_stock === false ? 'out_of_stock' : 'unknown';
|
||||
const priceCents = row.price ? Math.round(parseFloat(row.price) * 100) : null;
|
||||
const originalPriceCents = row.original_price ? Math.round(parseFloat(row.original_price) * 100) : null;
|
||||
|
||||
const insertResult = await cannaiqClient.query(`
|
||||
INSERT INTO products_from_legacy
|
||||
(legacy_product_id, legacy_dispensary_id, external_product_id,
|
||||
name, brand_name, subcategory, strain_type, thc, cbd,
|
||||
price_cents, original_price_cents, stock_status, weight,
|
||||
primary_image_url, first_seen_at, last_seen_at, legacy_raw_payload)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17)
|
||||
ON CONFLICT (legacy_product_id) DO NOTHING
|
||||
RETURNING id
|
||||
`, [
|
||||
row.id,
|
||||
row.dispensary_id,
|
||||
row.dutchie_product_id,
|
||||
row.name,
|
||||
row.brand,
|
||||
row.subcategory,
|
||||
row.strain_type,
|
||||
row.thc_percentage,
|
||||
row.cbd_percentage,
|
||||
priceCents,
|
||||
originalPriceCents,
|
||||
stockStatus,
|
||||
row.weight,
|
||||
row.image_url,
|
||||
row.first_seen_at,
|
||||
row.last_seen_at,
|
||||
row.raw_data ? JSON.stringify(row.raw_data) : null,
|
||||
]);
|
||||
|
||||
if (insertResult.rowCount > 0) {
|
||||
stats.inserted++;
|
||||
} else {
|
||||
stats.skipped++;
|
||||
}
|
||||
} catch (err: any) {
|
||||
stats.errors++;
|
||||
console.error(`[ETL] Error inserting product ${row.id}:`, err.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
offset += BATCH_SIZE;
|
||||
console.log(`[ETL] Processed ${Math.min(offset, totalRows)}/${totalRows} products`);
|
||||
}
|
||||
} finally {
|
||||
legacyClient.release();
|
||||
cannaiqClient.release();
|
||||
}
|
||||
|
||||
stats.durationMs = Date.now() - startTime;
|
||||
return stats;
|
||||
}
|
||||
|
||||
async function importDutchieProducts(
|
||||
legacyPool: Pool,
|
||||
cannaiqPool: Pool,
|
||||
dryRun: boolean
|
||||
): Promise<ETLStats> {
|
||||
const startTime = Date.now();
|
||||
const stats: ETLStats = {
|
||||
table: 'dutchie_products',
|
||||
read: 0,
|
||||
inserted: 0,
|
||||
skipped: 0,
|
||||
errors: 0,
|
||||
durationMs: 0,
|
||||
};
|
||||
|
||||
console.log('[ETL] Importing dutchie_products...');
|
||||
|
||||
const legacyClient = await legacyPool.connect();
|
||||
const cannaiqClient = await cannaiqPool.connect();
|
||||
|
||||
try {
|
||||
const countResult = await legacyClient.query('SELECT COUNT(*) FROM dutchie_products');
|
||||
const totalRows = parseInt(countResult.rows[0].count);
|
||||
console.log(`[ETL] Found ${totalRows} dutchie_products in legacy database`);
|
||||
|
||||
// Note: For dutchie_products, we need to map dispensary_id to the canonical dispensary
|
||||
// This requires the dispensaries to be imported first
|
||||
// For now, we'll insert directly since the schema is nearly identical
|
||||
|
||||
let offset = 0;
|
||||
while (offset < totalRows) {
|
||||
const batchResult = await legacyClient.query(`
|
||||
SELECT *
|
||||
FROM dutchie_products
|
||||
ORDER BY id
|
||||
LIMIT $1 OFFSET $2
|
||||
`, [BATCH_SIZE, offset]);
|
||||
|
||||
stats.read += batchResult.rows.length;
|
||||
|
||||
if (dryRun) {
|
||||
console.log(`[ETL] DRY RUN: Would insert batch of ${batchResult.rows.length} dutchie_products`);
|
||||
stats.inserted += batchResult.rows.length;
|
||||
} else {
|
||||
// For each row, attempt insert with ON CONFLICT DO NOTHING
|
||||
for (const row of batchResult.rows) {
|
||||
try {
|
||||
// Check if dispensary exists in canonical table
|
||||
const dispCheck = await cannaiqClient.query(`
|
||||
SELECT id FROM dispensaries WHERE id = $1
|
||||
`, [row.dispensary_id]);
|
||||
|
||||
if (dispCheck.rows.length === 0) {
|
||||
stats.skipped++;
|
||||
continue; // Skip products for dispensaries not yet imported
|
||||
}
|
||||
|
||||
const insertResult = await cannaiqClient.query(`
|
||||
INSERT INTO dutchie_products
|
||||
(dispensary_id, platform, external_product_id, platform_dispensary_id,
|
||||
c_name, name, brand_name, brand_id, brand_logo_url,
|
||||
type, subcategory, strain_type, provider,
|
||||
thc, thc_content, cbd, cbd_content, cannabinoids_v2, effects,
|
||||
status, medical_only, rec_only, featured, coming_soon,
|
||||
certificate_of_analysis_enabled,
|
||||
is_below_threshold, is_below_kiosk_threshold,
|
||||
options_below_threshold, options_below_kiosk_threshold,
|
||||
stock_status, total_quantity_available,
|
||||
primary_image_url, images, measurements, weight, past_c_names,
|
||||
created_at_dutchie, updated_at_dutchie, latest_raw_payload)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35, $36, $37, $38, $39)
|
||||
ON CONFLICT (dispensary_id, external_product_id) DO NOTHING
|
||||
RETURNING id
|
||||
`, [
|
||||
row.dispensary_id,
|
||||
row.platform || 'dutchie',
|
||||
row.external_product_id,
|
||||
row.platform_dispensary_id,
|
||||
row.c_name,
|
||||
row.name,
|
||||
row.brand_name,
|
||||
row.brand_id,
|
||||
row.brand_logo_url,
|
||||
row.type,
|
||||
row.subcategory,
|
||||
row.strain_type,
|
||||
row.provider,
|
||||
row.thc,
|
||||
row.thc_content,
|
||||
row.cbd,
|
||||
row.cbd_content,
|
||||
row.cannabinoids_v2,
|
||||
row.effects,
|
||||
row.status,
|
||||
row.medical_only,
|
||||
row.rec_only,
|
||||
row.featured,
|
||||
row.coming_soon,
|
||||
row.certificate_of_analysis_enabled,
|
||||
row.is_below_threshold,
|
||||
row.is_below_kiosk_threshold,
|
||||
row.options_below_threshold,
|
||||
row.options_below_kiosk_threshold,
|
||||
row.stock_status,
|
||||
row.total_quantity_available,
|
||||
row.primary_image_url,
|
||||
row.images,
|
||||
row.measurements,
|
||||
row.weight,
|
||||
row.past_c_names,
|
||||
row.created_at_dutchie,
|
||||
row.updated_at_dutchie,
|
||||
row.latest_raw_payload,
|
||||
]);
|
||||
|
||||
if (insertResult.rowCount > 0) {
|
||||
stats.inserted++;
|
||||
} else {
|
||||
stats.skipped++;
|
||||
}
|
||||
} catch (err: any) {
|
||||
stats.errors++;
|
||||
if (stats.errors <= 5) {
|
||||
console.error(`[ETL] Error inserting dutchie_product ${row.id}:`, err.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
offset += BATCH_SIZE;
|
||||
console.log(`[ETL] Processed ${Math.min(offset, totalRows)}/${totalRows} dutchie_products`);
|
||||
}
|
||||
} finally {
|
||||
legacyClient.release();
|
||||
cannaiqClient.release();
|
||||
}
|
||||
|
||||
stats.durationMs = Date.now() - startTime;
|
||||
return stats;
|
||||
}
|
||||
|
||||
async function importDutchieSnapshots(
|
||||
legacyPool: Pool,
|
||||
cannaiqPool: Pool,
|
||||
dryRun: boolean
|
||||
): Promise<ETLStats> {
|
||||
const startTime = Date.now();
|
||||
const stats: ETLStats = {
|
||||
table: 'dutchie_product_snapshots',
|
||||
read: 0,
|
||||
inserted: 0,
|
||||
skipped: 0,
|
||||
errors: 0,
|
||||
durationMs: 0,
|
||||
};
|
||||
|
||||
console.log('[ETL] Importing dutchie_product_snapshots...');
|
||||
|
||||
const legacyClient = await legacyPool.connect();
|
||||
const cannaiqClient = await cannaiqPool.connect();
|
||||
|
||||
try {
|
||||
const countResult = await legacyClient.query('SELECT COUNT(*) FROM dutchie_product_snapshots');
|
||||
const totalRows = parseInt(countResult.rows[0].count);
|
||||
console.log(`[ETL] Found ${totalRows} dutchie_product_snapshots in legacy database`);
|
||||
|
||||
// Build mapping of legacy product IDs to canonical product IDs
|
||||
console.log('[ETL] Building product ID mapping...');
|
||||
const productMapping = new Map<number, number>();
|
||||
const mappingResult = await cannaiqClient.query(`
|
||||
SELECT id, external_product_id, dispensary_id FROM dutchie_products
|
||||
`);
|
||||
// Create a key from dispensary_id + external_product_id
|
||||
const productByKey = new Map<string, number>();
|
||||
for (const row of mappingResult.rows) {
|
||||
const key = `${row.dispensary_id}:${row.external_product_id}`;
|
||||
productByKey.set(key, row.id);
|
||||
}
|
||||
|
||||
let offset = 0;
|
||||
while (offset < totalRows) {
|
||||
const batchResult = await legacyClient.query(`
|
||||
SELECT *
|
||||
FROM dutchie_product_snapshots
|
||||
ORDER BY id
|
||||
LIMIT $1 OFFSET $2
|
||||
`, [BATCH_SIZE, offset]);
|
||||
|
||||
stats.read += batchResult.rows.length;
|
||||
|
||||
if (dryRun) {
|
||||
console.log(`[ETL] DRY RUN: Would insert batch of ${batchResult.rows.length} snapshots`);
|
||||
stats.inserted += batchResult.rows.length;
|
||||
} else {
|
||||
for (const row of batchResult.rows) {
|
||||
try {
|
||||
// Map legacy product ID to canonical product ID
|
||||
const key = `${row.dispensary_id}:${row.external_product_id}`;
|
||||
const canonicalProductId = productByKey.get(key);
|
||||
|
||||
if (!canonicalProductId) {
|
||||
stats.skipped++;
|
||||
continue; // Skip snapshots for products not yet imported
|
||||
}
|
||||
|
||||
// Insert snapshot (no conflict handling - all snapshots are historical)
|
||||
await cannaiqClient.query(`
|
||||
INSERT INTO dutchie_product_snapshots
|
||||
(dutchie_product_id, dispensary_id, platform_dispensary_id,
|
||||
external_product_id, pricing_type, crawl_mode,
|
||||
status, featured, special, medical_only, rec_only,
|
||||
is_present_in_feed, stock_status,
|
||||
rec_min_price_cents, rec_max_price_cents, rec_min_special_price_cents,
|
||||
med_min_price_cents, med_max_price_cents, med_min_special_price_cents,
|
||||
wholesale_min_price_cents,
|
||||
total_quantity_available, total_kiosk_quantity_available,
|
||||
manual_inventory, is_below_threshold, is_below_kiosk_threshold,
|
||||
options, raw_payload, crawled_at)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28)
|
||||
`, [
|
||||
canonicalProductId,
|
||||
row.dispensary_id,
|
||||
row.platform_dispensary_id,
|
||||
row.external_product_id,
|
||||
row.pricing_type,
|
||||
row.crawl_mode,
|
||||
row.status,
|
||||
row.featured,
|
||||
row.special,
|
||||
row.medical_only,
|
||||
row.rec_only,
|
||||
row.is_present_in_feed,
|
||||
row.stock_status,
|
||||
row.rec_min_price_cents,
|
||||
row.rec_max_price_cents,
|
||||
row.rec_min_special_price_cents,
|
||||
row.med_min_price_cents,
|
||||
row.med_max_price_cents,
|
||||
row.med_min_special_price_cents,
|
||||
row.wholesale_min_price_cents,
|
||||
row.total_quantity_available,
|
||||
row.total_kiosk_quantity_available,
|
||||
row.manual_inventory,
|
||||
row.is_below_threshold,
|
||||
row.is_below_kiosk_threshold,
|
||||
row.options,
|
||||
row.raw_payload,
|
||||
row.crawled_at,
|
||||
]);
|
||||
|
||||
stats.inserted++;
|
||||
} catch (err: any) {
|
||||
stats.errors++;
|
||||
if (stats.errors <= 5) {
|
||||
console.error(`[ETL] Error inserting snapshot ${row.id}:`, err.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
offset += BATCH_SIZE;
|
||||
console.log(`[ETL] Processed ${Math.min(offset, totalRows)}/${totalRows} snapshots`);
|
||||
}
|
||||
} finally {
|
||||
legacyClient.release();
|
||||
cannaiqClient.release();
|
||||
}
|
||||
|
||||
stats.durationMs = Date.now() - startTime;
|
||||
return stats;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// MAIN
|
||||
// ============================================================
|
||||
|
||||
async function main(): Promise<void> {
|
||||
console.log('='.repeat(60));
|
||||
console.log('LEGACY DATA IMPORT ETL');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const config = parseArgs();
|
||||
|
||||
console.log(`Mode: ${config.dryRun ? 'DRY RUN' : 'LIVE'}`);
|
||||
console.log(`Tables: ${config.tables.join(', ')}`);
|
||||
console.log('');
|
||||
|
||||
// Create connection pools
|
||||
const legacyPool = createLegacyPool();
|
||||
const cannaiqPool = createCannaiqPool();
|
||||
|
||||
try {
|
||||
// Test connections
|
||||
console.log('[ETL] Testing database connections...');
|
||||
await legacyPool.query('SELECT 1');
|
||||
console.log('[ETL] Legacy database connected');
|
||||
await cannaiqPool.query('SELECT 1');
|
||||
console.log('[ETL] CannaiQ database connected');
|
||||
console.log('');
|
||||
|
||||
// Create staging tables
|
||||
await createStagingTables(cannaiqPool, config.dryRun);
|
||||
console.log('');
|
||||
|
||||
// Run imports
|
||||
const allStats: ETLStats[] = [];
|
||||
|
||||
if (config.tables.includes('dispensaries')) {
|
||||
const stats = await importDispensaries(legacyPool, cannaiqPool, config.dryRun);
|
||||
allStats.push(stats);
|
||||
console.log('');
|
||||
}
|
||||
|
||||
if (config.tables.includes('products')) {
|
||||
const stats = await importProducts(legacyPool, cannaiqPool, config.dryRun);
|
||||
allStats.push(stats);
|
||||
console.log('');
|
||||
}
|
||||
|
||||
if (config.tables.includes('dutchie_products')) {
|
||||
const stats = await importDutchieProducts(legacyPool, cannaiqPool, config.dryRun);
|
||||
allStats.push(stats);
|
||||
console.log('');
|
||||
}
|
||||
|
||||
if (config.tables.includes('dutchie_product_snapshots')) {
|
||||
const stats = await importDutchieSnapshots(legacyPool, cannaiqPool, config.dryRun);
|
||||
allStats.push(stats);
|
||||
console.log('');
|
||||
}
|
||||
|
||||
// Print summary
|
||||
console.log('='.repeat(60));
|
||||
console.log('IMPORT SUMMARY');
|
||||
console.log('='.repeat(60));
|
||||
console.log('');
|
||||
console.log('| Table | Read | Inserted | Skipped | Errors | Duration |');
|
||||
console.log('|----------------------------|----------|----------|----------|----------|----------|');
|
||||
for (const s of allStats) {
|
||||
console.log(`| ${s.table.padEnd(26)} | ${String(s.read).padStart(8)} | ${String(s.inserted).padStart(8)} | ${String(s.skipped).padStart(8)} | ${String(s.errors).padStart(8)} | ${(s.durationMs / 1000).toFixed(1).padStart(7)}s |`);
|
||||
}
|
||||
console.log('');
|
||||
|
||||
const totalInserted = allStats.reduce((sum, s) => sum + s.inserted, 0);
|
||||
const totalErrors = allStats.reduce((sum, s) => sum + s.errors, 0);
|
||||
console.log(`Total inserted: ${totalInserted}`);
|
||||
console.log(`Total errors: ${totalErrors}`);
|
||||
|
||||
if (config.dryRun) {
|
||||
console.log('');
|
||||
console.log('DRY RUN COMPLETE - No data was written');
|
||||
console.log('Run without --dry-run to perform actual import');
|
||||
}
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('[ETL] Fatal error:', error.message);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await legacyPool.end();
|
||||
await cannaiqPool.end();
|
||||
}
|
||||
|
||||
console.log('');
|
||||
console.log('ETL complete');
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error('Unhandled error:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
@@ -1,4 +1,4 @@
|
||||
import { pool } from '../db/migrate';
|
||||
import { pool } from '../db/pool';
|
||||
import { getActiveProxy, putProxyInTimeout, isBotDetectionError } from '../services/proxy';
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
* npx tsx src/scripts/queue-dispensaries.ts --process # Process queued jobs
|
||||
*/
|
||||
|
||||
import { pool } from '../db/migrate';
|
||||
import { pool } from '../db/pool';
|
||||
import { logger } from '../services/logger';
|
||||
import {
|
||||
runDetectMenuProviderJob,
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
* npx tsx src/scripts/queue-intelligence.ts --dry-run
|
||||
*/
|
||||
|
||||
import { pool } from '../db/migrate';
|
||||
import { pool } from '../db/pool';
|
||||
import { logger } from '../services/logger';
|
||||
import {
|
||||
detectMultiCategoryProviders,
|
||||
|
||||
173
backend/src/scripts/resolve-dutchie-id.ts
Normal file
173
backend/src/scripts/resolve-dutchie-id.ts
Normal file
@@ -0,0 +1,173 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
/**
|
||||
* Dutchie Platform ID Resolver
|
||||
*
|
||||
* Standalone script to resolve a Dutchie dispensary slug to its platform ID.
|
||||
*
|
||||
* USAGE:
|
||||
* npx tsx src/scripts/resolve-dutchie-id.ts <slug>
|
||||
* npx tsx src/scripts/resolve-dutchie-id.ts hydroman-dispensary
|
||||
* npx tsx src/scripts/resolve-dutchie-id.ts AZ-Deeply-Rooted
|
||||
*
|
||||
* RESOLUTION STRATEGY:
|
||||
* 1. Navigate to https://dutchie.com/embedded-menu/{slug} via Puppeteer
|
||||
* 2. Extract window.reactEnv.dispensaryId (preferred - fastest)
|
||||
* 3. If reactEnv fails, call GraphQL GetAddressBasedDispensaryData as fallback
|
||||
*
|
||||
* OUTPUT:
|
||||
* - dispensaryId: The MongoDB ObjectId (e.g., "6405ef617056e8014d79101b")
|
||||
* - source: "reactEnv" or "graphql"
|
||||
* - httpStatus: HTTP status from embedded menu page
|
||||
* - error: Error message if resolution failed
|
||||
*/
|
||||
|
||||
import { resolveDispensaryIdWithDetails, ResolveDispensaryResult } from '../dutchie-az/services/graphql-client';
|
||||
|
||||
async function main() {
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
if (args.length === 0 || args.includes('--help') || args.includes('-h')) {
|
||||
console.log(`
|
||||
Dutchie Platform ID Resolver
|
||||
|
||||
Usage:
|
||||
npx tsx src/scripts/resolve-dutchie-id.ts <slug>
|
||||
|
||||
Examples:
|
||||
npx tsx src/scripts/resolve-dutchie-id.ts hydroman-dispensary
|
||||
npx tsx src/scripts/resolve-dutchie-id.ts AZ-Deeply-Rooted
|
||||
npx tsx src/scripts/resolve-dutchie-id.ts mint-cannabis
|
||||
|
||||
Resolution Strategy:
|
||||
1. Puppeteer navigates to https://dutchie.com/embedded-menu/{slug}
|
||||
2. Extracts window.reactEnv.dispensaryId (preferred)
|
||||
3. Falls back to GraphQL GetAddressBasedDispensaryData if needed
|
||||
|
||||
Output Fields:
|
||||
- dispensaryId: MongoDB ObjectId (e.g., "6405ef617056e8014d79101b")
|
||||
- source: "reactEnv" (from page) or "graphql" (from API)
|
||||
- httpStatus: HTTP status code from page load
|
||||
- error: Error message if resolution failed
|
||||
`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const slug = args[0];
|
||||
|
||||
console.log('='.repeat(60));
|
||||
console.log('DUTCHIE PLATFORM ID RESOLVER');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Slug: ${slug}`);
|
||||
console.log(`Embedded Menu URL: https://dutchie.com/embedded-menu/${slug}`);
|
||||
console.log('');
|
||||
console.log('Resolving...');
|
||||
console.log('');
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
const result: ResolveDispensaryResult = await resolveDispensaryIdWithDetails(slug);
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
console.log('='.repeat(60));
|
||||
console.log('RESOLUTION RESULT');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
if (result.dispensaryId) {
|
||||
console.log(`✓ SUCCESS`);
|
||||
console.log('');
|
||||
console.log(` Dispensary ID: ${result.dispensaryId}`);
|
||||
console.log(` Source: ${result.source}`);
|
||||
console.log(` HTTP Status: ${result.httpStatus || 'N/A'}`);
|
||||
console.log(` Duration: ${duration}ms`);
|
||||
console.log('');
|
||||
|
||||
// Show how to use this ID
|
||||
console.log('='.repeat(60));
|
||||
console.log('USAGE');
|
||||
console.log('='.repeat(60));
|
||||
console.log('');
|
||||
console.log('Use this ID in GraphQL FilteredProducts query:');
|
||||
console.log('');
|
||||
console.log(' POST https://dutchie.com/api-3/graphql');
|
||||
console.log('');
|
||||
console.log(' Body:');
|
||||
console.log(` {
|
||||
"operationName": "FilteredProducts",
|
||||
"variables": {
|
||||
"productsFilter": {
|
||||
"dispensaryId": "${result.dispensaryId}",
|
||||
"pricingType": "rec",
|
||||
"Status": "Active"
|
||||
},
|
||||
"page": 0,
|
||||
"perPage": 100
|
||||
},
|
||||
"extensions": {
|
||||
"persistedQuery": {
|
||||
"version": 1,
|
||||
"sha256Hash": "ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0"
|
||||
}
|
||||
}
|
||||
}`);
|
||||
console.log('');
|
||||
|
||||
// Output for piping/scripting
|
||||
console.log('='.repeat(60));
|
||||
console.log('JSON OUTPUT');
|
||||
console.log('='.repeat(60));
|
||||
console.log(JSON.stringify({
|
||||
success: true,
|
||||
slug,
|
||||
dispensaryId: result.dispensaryId,
|
||||
source: result.source,
|
||||
httpStatus: result.httpStatus,
|
||||
durationMs: duration,
|
||||
}, null, 2));
|
||||
|
||||
} else {
|
||||
console.log(`✗ FAILED`);
|
||||
console.log('');
|
||||
console.log(` Error: ${result.error || 'Unknown error'}`);
|
||||
console.log(` HTTP Status: ${result.httpStatus || 'N/A'}`);
|
||||
console.log(` Duration: ${duration}ms`);
|
||||
console.log('');
|
||||
|
||||
if (result.httpStatus === 403 || result.httpStatus === 404) {
|
||||
console.log('NOTE: This store may be removed or not accessible on Dutchie.');
|
||||
console.log(' Mark dispensary as not_crawlable in the database.');
|
||||
}
|
||||
|
||||
console.log('');
|
||||
console.log('JSON OUTPUT:');
|
||||
console.log(JSON.stringify({
|
||||
success: false,
|
||||
slug,
|
||||
error: result.error,
|
||||
httpStatus: result.httpStatus,
|
||||
durationMs: duration,
|
||||
}, null, 2));
|
||||
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
} catch (error: any) {
|
||||
const duration = Date.now() - startTime;
|
||||
console.error('='.repeat(60));
|
||||
console.error('ERROR');
|
||||
console.error('='.repeat(60));
|
||||
console.error(`Message: ${error.message}`);
|
||||
console.error(`Duration: ${duration}ms`);
|
||||
console.error('');
|
||||
|
||||
if (error.message.includes('net::ERR_NAME_NOT_RESOLVED')) {
|
||||
console.error('NOTE: DNS resolution failed. This typically happens when running');
|
||||
console.error(' locally due to network restrictions. Try running from the');
|
||||
console.error(' Kubernetes pod or a cloud environment.');
|
||||
}
|
||||
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
105
backend/src/scripts/run-backfill.ts
Normal file
105
backend/src/scripts/run-backfill.ts
Normal file
@@ -0,0 +1,105 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
/**
|
||||
* Run Backfill CLI
|
||||
*
|
||||
* Import historical payloads from existing data sources.
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx src/scripts/run-backfill.ts [options]
|
||||
*
|
||||
* Options:
|
||||
* --source SOURCE Source to backfill from:
|
||||
* - dutchie_products (default)
|
||||
* - snapshots
|
||||
* - cache_files
|
||||
* - all
|
||||
* --dry-run Print changes without modifying DB
|
||||
* --limit N Max payloads to create (default: unlimited)
|
||||
* --dispensary ID Only backfill specific dispensary
|
||||
* --cache-path PATH Path to cache files (default: ./cache/payloads)
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import { runBackfill, BackfillOptions } from '../hydration';
|
||||
|
||||
async function main() {
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
const dryRun = args.includes('--dry-run');
|
||||
|
||||
let source: BackfillOptions['source'] = 'dutchie_products';
|
||||
const sourceIdx = args.indexOf('--source');
|
||||
if (sourceIdx !== -1 && args[sourceIdx + 1]) {
|
||||
source = args[sourceIdx + 1] as BackfillOptions['source'];
|
||||
}
|
||||
|
||||
let limit: number | undefined;
|
||||
const limitIdx = args.indexOf('--limit');
|
||||
if (limitIdx !== -1 && args[limitIdx + 1]) {
|
||||
limit = parseInt(args[limitIdx + 1], 10);
|
||||
}
|
||||
|
||||
let dispensaryId: number | undefined;
|
||||
const dispIdx = args.indexOf('--dispensary');
|
||||
if (dispIdx !== -1 && args[dispIdx + 1]) {
|
||||
dispensaryId = parseInt(args[dispIdx + 1], 10);
|
||||
}
|
||||
|
||||
let cachePath: string | undefined;
|
||||
const cacheIdx = args.indexOf('--cache-path');
|
||||
if (cacheIdx !== -1 && args[cacheIdx + 1]) {
|
||||
cachePath = args[cacheIdx + 1];
|
||||
}
|
||||
|
||||
const pool = new Pool({
|
||||
connectionString: process.env.DATABASE_URL,
|
||||
});
|
||||
|
||||
try {
|
||||
console.log('='.repeat(60));
|
||||
console.log('BACKFILL RUNNER');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Source: ${source}`);
|
||||
console.log(`Dry run: ${dryRun}`);
|
||||
if (limit) console.log(`Limit: ${limit}`);
|
||||
if (dispensaryId) console.log(`Dispensary: ${dispensaryId}`);
|
||||
if (cachePath) console.log(`Cache path: ${cachePath}`);
|
||||
console.log('');
|
||||
|
||||
const results = await runBackfill(pool, {
|
||||
dryRun,
|
||||
source,
|
||||
limit,
|
||||
dispensaryId,
|
||||
cachePath,
|
||||
});
|
||||
|
||||
console.log('\nBackfill Results:');
|
||||
console.log('='.repeat(40));
|
||||
|
||||
for (const result of results) {
|
||||
console.log(`\n${result.source}:`);
|
||||
console.log(` Payloads created: ${result.payloadsCreated}`);
|
||||
console.log(` Skipped: ${result.skipped}`);
|
||||
console.log(` Errors: ${result.errors.length}`);
|
||||
console.log(` Duration: ${result.durationMs}ms`);
|
||||
|
||||
if (result.errors.length > 0) {
|
||||
console.log(' First 5 errors:');
|
||||
for (const err of result.errors.slice(0, 5)) {
|
||||
console.log(` - ${err}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const totalCreated = results.reduce((sum, r) => sum + r.payloadsCreated, 0);
|
||||
console.log(`\nTotal payloads created: ${totalCreated}`);
|
||||
} catch (error: any) {
|
||||
console.error('Backfill error:', error.message);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
309
backend/src/scripts/run-discovery.ts
Normal file
309
backend/src/scripts/run-discovery.ts
Normal file
@@ -0,0 +1,309 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
/**
|
||||
* Dutchie Discovery CLI
|
||||
*
|
||||
* Command-line interface for running the Dutchie store discovery pipeline.
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx src/scripts/run-discovery.ts <command> [options]
|
||||
*
|
||||
* Commands:
|
||||
* discover:state <state> - Discover all stores in a state (e.g., AZ)
|
||||
* discover:city <city> - Discover stores in a single city
|
||||
* discover:full - Run full discovery pipeline
|
||||
* seed:cities <state> - Seed known cities for a state
|
||||
* stats - Show discovery statistics
|
||||
* list - List discovered locations
|
||||
*
|
||||
* Examples:
|
||||
* npx tsx src/scripts/run-discovery.ts discover:state AZ
|
||||
* npx tsx src/scripts/run-discovery.ts discover:city phoenix --state AZ
|
||||
* npx tsx src/scripts/run-discovery.ts seed:cities AZ
|
||||
* npx tsx src/scripts/run-discovery.ts stats
|
||||
* npx tsx src/scripts/run-discovery.ts list --status discovered --state AZ
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import {
|
||||
runFullDiscovery,
|
||||
discoverCity,
|
||||
discoverState,
|
||||
getDiscoveryStats,
|
||||
seedKnownCities,
|
||||
ARIZONA_CITIES,
|
||||
} from '../discovery';
|
||||
|
||||
// Parse command line arguments
|
||||
function parseArgs() {
|
||||
const args = process.argv.slice(2);
|
||||
const command = args[0] || 'help';
|
||||
const positional: string[] = [];
|
||||
const flags: Record<string, string | boolean> = {};
|
||||
|
||||
for (let i = 1; i < args.length; i++) {
|
||||
const arg = args[i];
|
||||
if (arg.startsWith('--')) {
|
||||
const [key, value] = arg.slice(2).split('=');
|
||||
if (value !== undefined) {
|
||||
flags[key] = value;
|
||||
} else if (args[i + 1] && !args[i + 1].startsWith('--')) {
|
||||
flags[key] = args[i + 1];
|
||||
i++;
|
||||
} else {
|
||||
flags[key] = true;
|
||||
}
|
||||
} else {
|
||||
positional.push(arg);
|
||||
}
|
||||
}
|
||||
|
||||
return { command, positional, flags };
|
||||
}
|
||||
|
||||
// Create database pool
|
||||
function createPool(): Pool {
|
||||
const connectionString = process.env.DATABASE_URL;
|
||||
if (!connectionString) {
|
||||
console.error('ERROR: DATABASE_URL environment variable is required');
|
||||
process.exit(1);
|
||||
}
|
||||
return new Pool({ connectionString });
|
||||
}
|
||||
|
||||
// Print help
|
||||
function printHelp() {
|
||||
console.log(`
|
||||
Dutchie Discovery CLI
|
||||
|
||||
Usage:
|
||||
npx tsx src/scripts/run-discovery.ts <command> [options]
|
||||
|
||||
Commands:
|
||||
discover:state <state> Discover all stores in a state (e.g., AZ)
|
||||
discover:city <city> Discover stores in a single city
|
||||
discover:full Run full discovery pipeline
|
||||
seed:cities <state> Seed known cities for a state
|
||||
stats Show discovery statistics
|
||||
list List discovered locations
|
||||
|
||||
Options:
|
||||
--state <code> State code (e.g., AZ, CA, ON)
|
||||
--country <code> Country code (default: US)
|
||||
--status <status> Filter by status (discovered, verified, rejected, merged)
|
||||
--limit <n> Limit results (default: varies by command)
|
||||
--dry-run Don't make any changes, just show what would happen
|
||||
--verbose Show detailed output
|
||||
|
||||
Examples:
|
||||
npx tsx src/scripts/run-discovery.ts discover:state AZ
|
||||
npx tsx src/scripts/run-discovery.ts discover:city phoenix --state AZ
|
||||
npx tsx src/scripts/run-discovery.ts seed:cities AZ
|
||||
npx tsx src/scripts/run-discovery.ts stats
|
||||
npx tsx src/scripts/run-discovery.ts list --status discovered --state AZ --limit 20
|
||||
`);
|
||||
}
|
||||
|
||||
// Main
|
||||
async function main() {
|
||||
const { command, positional, flags } = parseArgs();
|
||||
|
||||
if (command === 'help' || flags.help) {
|
||||
printHelp();
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const pool = createPool();
|
||||
|
||||
try {
|
||||
switch (command) {
|
||||
case 'discover:state': {
|
||||
const stateCode = positional[0] || (flags.state as string);
|
||||
if (!stateCode) {
|
||||
console.error('ERROR: State code is required');
|
||||
console.error('Usage: discover:state <state>');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`\nDiscovering stores in ${stateCode}...\n`);
|
||||
const result = await discoverState(pool, stateCode.toUpperCase(), {
|
||||
dryRun: Boolean(flags['dry-run']),
|
||||
verbose: Boolean(flags.verbose),
|
||||
cityLimit: flags.limit ? parseInt(flags.limit as string, 10) : 100,
|
||||
});
|
||||
|
||||
console.log('\n=== DISCOVERY RESULTS ===');
|
||||
console.log(`Cities crawled: ${result.locations.length}`);
|
||||
console.log(`Locations found: ${result.totalLocationsFound}`);
|
||||
console.log(`Locations upserted: ${result.totalLocationsUpserted}`);
|
||||
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'discover:city': {
|
||||
const citySlug = positional[0];
|
||||
if (!citySlug) {
|
||||
console.error('ERROR: City slug is required');
|
||||
console.error('Usage: discover:city <city-slug> [--state AZ]');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`\nDiscovering stores in ${citySlug}...\n`);
|
||||
const result = await discoverCity(pool, citySlug, {
|
||||
stateCode: flags.state as string,
|
||||
countryCode: (flags.country as string) || 'US',
|
||||
dryRun: Boolean(flags['dry-run']),
|
||||
verbose: Boolean(flags.verbose),
|
||||
});
|
||||
|
||||
if (!result) {
|
||||
console.error(`City not found: ${citySlug}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log('\n=== DISCOVERY RESULTS ===');
|
||||
console.log(`City: ${result.citySlug}`);
|
||||
console.log(`Locations found: ${result.locationsFound}`);
|
||||
console.log(`Locations upserted: ${result.locationsUpserted}`);
|
||||
console.log(`New: ${result.locationsNew}, Updated: ${result.locationsUpdated}`);
|
||||
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
|
||||
if (result.errors.length > 0) {
|
||||
console.log(`Errors: ${result.errors.length}`);
|
||||
result.errors.forEach((e) => console.log(` - ${e}`));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 'discover:full': {
|
||||
console.log('\nRunning full discovery pipeline...\n');
|
||||
const result = await runFullDiscovery(pool, {
|
||||
stateCode: flags.state as string,
|
||||
countryCode: (flags.country as string) || 'US',
|
||||
cityLimit: flags.limit ? parseInt(flags.limit as string, 10) : 50,
|
||||
skipCityDiscovery: Boolean(flags['skip-cities']),
|
||||
onlyStale: !flags.all,
|
||||
staleDays: flags['stale-days'] ? parseInt(flags['stale-days'] as string, 10) : 7,
|
||||
dryRun: Boolean(flags['dry-run']),
|
||||
verbose: Boolean(flags.verbose),
|
||||
});
|
||||
|
||||
console.log('\n=== FULL DISCOVERY RESULTS ===');
|
||||
console.log(`Cities discovered: ${result.cities.citiesFound}`);
|
||||
console.log(`Cities upserted: ${result.cities.citiesUpserted}`);
|
||||
console.log(`Cities crawled: ${result.locations.length}`);
|
||||
console.log(`Total locations found: ${result.totalLocationsFound}`);
|
||||
console.log(`Total locations upserted: ${result.totalLocationsUpserted}`);
|
||||
console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'seed:cities': {
|
||||
const stateCode = positional[0] || (flags.state as string);
|
||||
if (!stateCode) {
|
||||
console.error('ERROR: State code is required');
|
||||
console.error('Usage: seed:cities <state>');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
let cities: any[] = [];
|
||||
if (stateCode.toUpperCase() === 'AZ') {
|
||||
cities = ARIZONA_CITIES;
|
||||
} else {
|
||||
console.error(`No predefined cities for state: ${stateCode}`);
|
||||
console.error('Add cities to city-discovery.ts ARIZONA_CITIES array (or add new state arrays)');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`\nSeeding ${cities.length} cities for ${stateCode}...\n`);
|
||||
const result = await seedKnownCities(pool, cities);
|
||||
console.log(`Created: ${result.created} new cities`);
|
||||
console.log(`Updated: ${result.updated} existing cities`);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'stats': {
|
||||
console.log('\nFetching discovery statistics...\n');
|
||||
const stats = await getDiscoveryStats(pool);
|
||||
|
||||
console.log('=== CITIES ===');
|
||||
console.log(`Total: ${stats.cities.total}`);
|
||||
console.log(`Crawled (24h): ${stats.cities.crawledLast24h}`);
|
||||
console.log(`Never crawled: ${stats.cities.neverCrawled}`);
|
||||
console.log('');
|
||||
console.log('=== LOCATIONS ===');
|
||||
console.log(`Total active: ${stats.locations.total}`);
|
||||
console.log(`Discovered: ${stats.locations.discovered}`);
|
||||
console.log(`Verified: ${stats.locations.verified}`);
|
||||
console.log(`Merged: ${stats.locations.merged}`);
|
||||
console.log(`Rejected: ${stats.locations.rejected}`);
|
||||
console.log('');
|
||||
console.log('=== BY STATE ===');
|
||||
stats.locations.byState.forEach((s) => {
|
||||
console.log(` ${s.stateCode}: ${s.count}`);
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
case 'list': {
|
||||
const status = flags.status as string;
|
||||
const stateCode = flags.state as string;
|
||||
const limit = flags.limit ? parseInt(flags.limit as string, 10) : 50;
|
||||
|
||||
let whereClause = 'WHERE active = TRUE';
|
||||
const params: any[] = [];
|
||||
let paramIndex = 1;
|
||||
|
||||
if (status) {
|
||||
whereClause += ` AND status = $${paramIndex}`;
|
||||
params.push(status);
|
||||
paramIndex++;
|
||||
}
|
||||
|
||||
if (stateCode) {
|
||||
whereClause += ` AND state_code = $${paramIndex}`;
|
||||
params.push(stateCode.toUpperCase());
|
||||
paramIndex++;
|
||||
}
|
||||
|
||||
params.push(limit);
|
||||
|
||||
const { rows } = await pool.query(
|
||||
`
|
||||
SELECT id, platform, name, city, state_code, status, platform_menu_url, first_seen_at
|
||||
FROM dutchie_discovery_locations
|
||||
${whereClause}
|
||||
ORDER BY first_seen_at DESC
|
||||
LIMIT $${paramIndex}
|
||||
`,
|
||||
params
|
||||
);
|
||||
|
||||
console.log(`\nFound ${rows.length} locations:\n`);
|
||||
console.log('ID\tStatus\t\tState\tCity\t\tName');
|
||||
console.log('-'.repeat(80));
|
||||
rows.forEach((row: any) => {
|
||||
const cityDisplay = (row.city || '').substring(0, 12).padEnd(12);
|
||||
const nameDisplay = (row.name || '').substring(0, 30);
|
||||
console.log(
|
||||
`${row.id}\t${row.status.padEnd(12)}\t${row.state_code || 'N/A'}\t${cityDisplay}\t${nameDisplay}`
|
||||
);
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
console.error(`Unknown command: ${command}`);
|
||||
printHelp();
|
||||
process.exit(1);
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.error('ERROR:', error.message);
|
||||
if (flags.verbose) {
|
||||
console.error(error.stack);
|
||||
}
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
@@ -1,5 +1,8 @@
|
||||
/**
|
||||
* Run Dutchie GraphQL Scrape
|
||||
* LEGACY SCRIPT - Run Dutchie GraphQL Scrape
|
||||
*
|
||||
* DEPRECATED: This script creates its own database pool.
|
||||
* Future implementations should use the CannaiQ API endpoints instead.
|
||||
*
|
||||
* This script demonstrates the full pipeline:
|
||||
* 1. Puppeteer navigates to Dutchie menu
|
||||
@@ -7,12 +10,21 @@
|
||||
* 3. Products are normalized to our schema
|
||||
* 4. Products are upserted to database
|
||||
* 5. Derived views (brands, categories, specials) are automatically updated
|
||||
*
|
||||
* DO NOT:
|
||||
* - Add this to package.json scripts
|
||||
* - Run this in automated jobs
|
||||
* - Use DATABASE_URL directly
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import { scrapeDutchieMenu } from '../scrapers/dutchie-graphql';
|
||||
|
||||
const DATABASE_URL = process.env.DATABASE_URL || 'postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus';
|
||||
console.warn('\n⚠️ LEGACY SCRIPT: This script should be replaced with CannaiQ API calls.\n');
|
||||
|
||||
// Single database connection (cannaiq in cannaiq-postgres container)
|
||||
const DATABASE_URL = process.env.CANNAIQ_DB_URL ||
|
||||
`postgresql://${process.env.CANNAIQ_DB_USER || 'dutchie'}:${process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass'}@${process.env.CANNAIQ_DB_HOST || 'localhost'}:${process.env.CANNAIQ_DB_PORT || '54320'}/${process.env.CANNAIQ_DB_NAME || 'cannaiq'}`;
|
||||
|
||||
async function main() {
|
||||
const pool = new Pool({ connectionString: DATABASE_URL });
|
||||
|
||||
510
backend/src/scripts/run-hydration.ts
Normal file
510
backend/src/scripts/run-hydration.ts
Normal file
@@ -0,0 +1,510 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
/**
|
||||
* Unified Hydration CLI
|
||||
*
|
||||
* Central entrypoint for all hydration operations:
|
||||
*
|
||||
* MODES:
|
||||
* payload - Process raw_payloads → canonical tables (existing behavior)
|
||||
* backfill - Migrate dutchie_* → canonical tables (legacy backfill)
|
||||
* sync - Sync recent crawls to canonical tables
|
||||
* status - Show hydration progress
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx src/scripts/run-hydration.ts --mode=<mode> [options]
|
||||
*
|
||||
* Examples:
|
||||
* # Payload-based hydration (default)
|
||||
* npx tsx src/scripts/run-hydration.ts --mode=payload
|
||||
*
|
||||
* # Full legacy backfill
|
||||
* npx tsx src/scripts/run-hydration.ts --mode=backfill
|
||||
*
|
||||
* # Backfill single dispensary
|
||||
* npx tsx src/scripts/run-hydration.ts --mode=backfill --store=123
|
||||
*
|
||||
* # Sync recent crawls
|
||||
* npx tsx src/scripts/run-hydration.ts --mode=sync --since="2 hours"
|
||||
*
|
||||
* # Check status
|
||||
* npx tsx src/scripts/run-hydration.ts --mode=status
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import dotenv from 'dotenv';
|
||||
import {
|
||||
HydrationWorker,
|
||||
runHydrationBatch,
|
||||
processPayloadById,
|
||||
reprocessFailedPayloads,
|
||||
getPayloadStats,
|
||||
} from '../hydration';
|
||||
import { runLegacyBackfill } from '../hydration/legacy-backfill';
|
||||
import { syncRecentCrawls } from '../hydration/incremental-sync';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
// ============================================================
|
||||
// ARGUMENT PARSING
|
||||
// ============================================================
|
||||
|
||||
interface CliArgs {
|
||||
mode: 'payload' | 'backfill' | 'sync' | 'status';
|
||||
store?: number;
|
||||
since?: string;
|
||||
dryRun: boolean;
|
||||
verbose: boolean;
|
||||
limit: number;
|
||||
loop: boolean;
|
||||
reprocess: boolean;
|
||||
payloadId?: string;
|
||||
startFrom?: number;
|
||||
}
|
||||
|
||||
function parseArgs(): CliArgs {
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
// Defaults
|
||||
const result: CliArgs = {
|
||||
mode: 'payload',
|
||||
dryRun: args.includes('--dry-run'),
|
||||
verbose: args.includes('--verbose') || args.includes('-v'),
|
||||
limit: 50,
|
||||
loop: args.includes('--loop'),
|
||||
reprocess: args.includes('--reprocess'),
|
||||
};
|
||||
|
||||
// Parse --mode=<value>
|
||||
const modeArg = args.find(a => a.startsWith('--mode='));
|
||||
if (modeArg) {
|
||||
const mode = modeArg.split('=')[1];
|
||||
if (['payload', 'backfill', 'sync', 'status'].includes(mode)) {
|
||||
result.mode = mode as CliArgs['mode'];
|
||||
}
|
||||
}
|
||||
|
||||
// Parse --store=<id>
|
||||
const storeArg = args.find(a => a.startsWith('--store='));
|
||||
if (storeArg) {
|
||||
result.store = parseInt(storeArg.split('=')[1], 10);
|
||||
}
|
||||
|
||||
// Parse --since=<value>
|
||||
const sinceArg = args.find(a => a.startsWith('--since='));
|
||||
if (sinceArg) {
|
||||
result.since = sinceArg.split('=')[1];
|
||||
}
|
||||
|
||||
// Parse --limit=<value> or --limit <value>
|
||||
const limitArg = args.find(a => a.startsWith('--limit='));
|
||||
if (limitArg) {
|
||||
result.limit = parseInt(limitArg.split('=')[1], 10);
|
||||
} else {
|
||||
const limitIdx = args.indexOf('--limit');
|
||||
if (limitIdx !== -1 && args[limitIdx + 1]) {
|
||||
result.limit = parseInt(args[limitIdx + 1], 10);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse --payload=<id> or --payload <id>
|
||||
const payloadArg = args.find(a => a.startsWith('--payload='));
|
||||
if (payloadArg) {
|
||||
result.payloadId = payloadArg.split('=')[1];
|
||||
} else {
|
||||
const payloadIdx = args.indexOf('--payload');
|
||||
if (payloadIdx !== -1 && args[payloadIdx + 1]) {
|
||||
result.payloadId = args[payloadIdx + 1];
|
||||
}
|
||||
}
|
||||
|
||||
// Parse --start-from=<id>
|
||||
const startArg = args.find(a => a.startsWith('--start-from='));
|
||||
if (startArg) {
|
||||
result.startFrom = parseInt(startArg.split('=')[1], 10);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// DATABASE CONNECTION
|
||||
// ============================================================
|
||||
|
||||
function getConnectionString(): string {
|
||||
if (process.env.CANNAIQ_DB_URL) {
|
||||
return process.env.CANNAIQ_DB_URL;
|
||||
}
|
||||
|
||||
const host = process.env.CANNAIQ_DB_HOST;
|
||||
const port = process.env.CANNAIQ_DB_PORT;
|
||||
const name = process.env.CANNAIQ_DB_NAME;
|
||||
const user = process.env.CANNAIQ_DB_USER;
|
||||
const pass = process.env.CANNAIQ_DB_PASS;
|
||||
|
||||
if (host && port && name && user && pass) {
|
||||
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
|
||||
}
|
||||
|
||||
// Fallback to DATABASE_URL for local development
|
||||
if (process.env.DATABASE_URL) {
|
||||
return process.env.DATABASE_URL;
|
||||
}
|
||||
|
||||
throw new Error('Missing database connection environment variables');
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// MODE: PAYLOAD (existing behavior)
|
||||
// ============================================================
|
||||
|
||||
async function runPayloadMode(pool: Pool, args: CliArgs): Promise<void> {
|
||||
console.log('='.repeat(60));
|
||||
console.log('HYDRATION - PAYLOAD MODE');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Dry run: ${args.dryRun}`);
|
||||
console.log(`Batch size: ${args.limit}`);
|
||||
console.log('');
|
||||
|
||||
// Show current stats
|
||||
try {
|
||||
const stats = await getPayloadStats(pool);
|
||||
console.log('Current payload stats:');
|
||||
console.log(` Total: ${stats.total}`);
|
||||
console.log(` Processed: ${stats.processed}`);
|
||||
console.log(` Unprocessed: ${stats.unprocessed}`);
|
||||
console.log(` Failed: ${stats.failed}`);
|
||||
console.log('');
|
||||
} catch {
|
||||
console.log('Note: raw_payloads table not found or empty');
|
||||
console.log('');
|
||||
}
|
||||
|
||||
if (args.payloadId) {
|
||||
// Process specific payload
|
||||
console.log(`Processing payload: ${args.payloadId}`);
|
||||
const result = await processPayloadById(pool, args.payloadId, { dryRun: args.dryRun });
|
||||
console.log('Result:', JSON.stringify(result, null, 2));
|
||||
} else if (args.reprocess) {
|
||||
// Reprocess failed payloads
|
||||
console.log('Reprocessing failed payloads...');
|
||||
const result = await reprocessFailedPayloads(pool, { dryRun: args.dryRun, batchSize: args.limit });
|
||||
console.log('Result:', JSON.stringify(result, null, 2));
|
||||
} else if (args.loop) {
|
||||
// Run continuous loop
|
||||
const worker = new HydrationWorker(pool, { dryRun: args.dryRun, batchSize: args.limit });
|
||||
|
||||
process.on('SIGINT', () => {
|
||||
console.log('\nStopping hydration loop...');
|
||||
worker.stop();
|
||||
});
|
||||
|
||||
await worker.runLoop(30000);
|
||||
} else {
|
||||
// Run single batch
|
||||
const result = await runHydrationBatch(pool, { dryRun: args.dryRun, batchSize: args.limit });
|
||||
console.log('Batch result:');
|
||||
console.log(` Payloads processed: ${result.payloadsProcessed}`);
|
||||
console.log(` Payloads failed: ${result.payloadsFailed}`);
|
||||
console.log(` Products upserted: ${result.totalProductsUpserted}`);
|
||||
console.log(` Snapshots created: ${result.totalSnapshotsCreated}`);
|
||||
console.log(` Brands created: ${result.totalBrandsCreated}`);
|
||||
console.log(` Duration: ${result.durationMs}ms`);
|
||||
|
||||
if (result.errors.length > 0) {
|
||||
console.log('\nErrors:');
|
||||
for (const err of result.errors.slice(0, 10)) {
|
||||
console.log(` ${err.payloadId}: ${err.error}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// MODE: BACKFILL (legacy dutchie_* → canonical)
|
||||
// ============================================================
|
||||
|
||||
async function runBackfillMode(pool: Pool, args: CliArgs): Promise<void> {
|
||||
console.log('='.repeat(60));
|
||||
console.log('HYDRATION - BACKFILL MODE');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Mode: ${args.dryRun ? 'DRY RUN' : 'LIVE'}`);
|
||||
if (args.store) {
|
||||
console.log(`Store: ${args.store}`);
|
||||
}
|
||||
if (args.startFrom) {
|
||||
console.log(`Start from product ID: ${args.startFrom}`);
|
||||
}
|
||||
console.log('');
|
||||
|
||||
await runLegacyBackfill(pool, {
|
||||
dryRun: args.dryRun,
|
||||
verbose: args.verbose,
|
||||
dispensaryId: args.store,
|
||||
startFromProductId: args.startFrom,
|
||||
});
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// MODE: SYNC (recent crawls → canonical)
|
||||
// ============================================================
|
||||
|
||||
async function runSyncMode(pool: Pool, args: CliArgs): Promise<void> {
|
||||
const since = args.since || '1 hour';
|
||||
|
||||
console.log('='.repeat(60));
|
||||
console.log('HYDRATION - SYNC MODE');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Mode: ${args.dryRun ? 'DRY RUN' : 'LIVE'}`);
|
||||
console.log(`Since: ${since}`);
|
||||
console.log(`Limit: ${args.limit}`);
|
||||
if (args.store) {
|
||||
console.log(`Store: ${args.store}`);
|
||||
}
|
||||
console.log('');
|
||||
|
||||
const result = await syncRecentCrawls(pool, {
|
||||
dryRun: args.dryRun,
|
||||
verbose: args.verbose,
|
||||
since,
|
||||
dispensaryId: args.store,
|
||||
limit: args.limit,
|
||||
});
|
||||
|
||||
console.log('');
|
||||
console.log('=== Sync Results ===');
|
||||
console.log(`Crawls synced: ${result.synced}`);
|
||||
console.log(`Errors: ${result.errors.length}`);
|
||||
|
||||
if (result.errors.length > 0) {
|
||||
console.log('');
|
||||
console.log('Errors:');
|
||||
for (const error of result.errors.slice(0, 10)) {
|
||||
console.log(` - ${error}`);
|
||||
}
|
||||
if (result.errors.length > 10) {
|
||||
console.log(` ... and ${result.errors.length - 10} more`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// MODE: STATUS
|
||||
// ============================================================
|
||||
|
||||
async function runStatusMode(pool: Pool): Promise<void> {
|
||||
console.log('='.repeat(60));
|
||||
console.log('HYDRATION STATUS');
|
||||
console.log('='.repeat(60));
|
||||
console.log('');
|
||||
|
||||
// Check if v_hydration_status view exists
|
||||
const viewExists = await pool.query(`
|
||||
SELECT EXISTS (
|
||||
SELECT 1 FROM pg_views WHERE viewname = 'v_hydration_status'
|
||||
) as exists
|
||||
`);
|
||||
|
||||
if (viewExists.rows[0].exists) {
|
||||
const { rows } = await pool.query('SELECT * FROM v_hydration_status');
|
||||
console.log('Hydration Progress:');
|
||||
console.log('-'.repeat(70));
|
||||
console.log(
|
||||
'Table'.padEnd(30) +
|
||||
'Source'.padEnd(12) +
|
||||
'Hydrated'.padEnd(12) +
|
||||
'Progress'
|
||||
);
|
||||
console.log('-'.repeat(70));
|
||||
|
||||
for (const row of rows) {
|
||||
const progress = row.hydration_pct ? `${row.hydration_pct}%` : 'N/A';
|
||||
console.log(
|
||||
row.source_table.padEnd(30) +
|
||||
String(row.source_count).padEnd(12) +
|
||||
String(row.hydrated_count).padEnd(12) +
|
||||
progress
|
||||
);
|
||||
}
|
||||
console.log('-'.repeat(70));
|
||||
} else {
|
||||
console.log('Note: v_hydration_status view not found. Run migration 052 first.');
|
||||
}
|
||||
|
||||
// Get counts from canonical tables
|
||||
console.log('\nCanonical Table Counts:');
|
||||
console.log('-'.repeat(40));
|
||||
|
||||
const tables = ['store_products', 'store_product_snapshots', 'crawl_runs'];
|
||||
for (const table of tables) {
|
||||
try {
|
||||
const { rows } = await pool.query(`SELECT COUNT(*) as cnt FROM ${table}`);
|
||||
console.log(`${table}: ${rows[0].cnt}`);
|
||||
} catch {
|
||||
console.log(`${table}: (table not found)`);
|
||||
}
|
||||
}
|
||||
|
||||
// Get legacy table counts
|
||||
console.log('\nLegacy Table Counts:');
|
||||
console.log('-'.repeat(40));
|
||||
|
||||
const legacyTables = ['dutchie_products', 'dutchie_product_snapshots', 'dispensary_crawl_jobs'];
|
||||
for (const table of legacyTables) {
|
||||
try {
|
||||
const { rows } = await pool.query(`SELECT COUNT(*) as cnt FROM ${table}`);
|
||||
console.log(`${table}: ${rows[0].cnt}`);
|
||||
} catch {
|
||||
console.log(`${table}: (table not found)`);
|
||||
}
|
||||
}
|
||||
|
||||
// Show recent sync activity
|
||||
console.log('\nRecent Crawl Runs (last 24h):');
|
||||
console.log('-'.repeat(40));
|
||||
|
||||
try {
|
||||
const { rows } = await pool.query(`
|
||||
SELECT status, COUNT(*) as count
|
||||
FROM crawl_runs
|
||||
WHERE started_at > NOW() - INTERVAL '24 hours'
|
||||
GROUP BY status
|
||||
ORDER BY count DESC
|
||||
`);
|
||||
|
||||
if (rows.length === 0) {
|
||||
console.log('No crawl runs in last 24 hours');
|
||||
} else {
|
||||
for (const row of rows) {
|
||||
console.log(`${row.status}: ${row.count}`);
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
console.log('(crawl_runs table not found)');
|
||||
}
|
||||
|
||||
// Payload stats
|
||||
console.log('\nPayload Hydration:');
|
||||
console.log('-'.repeat(40));
|
||||
|
||||
try {
|
||||
const stats = await getPayloadStats(pool);
|
||||
console.log(`Total payloads: ${stats.total}`);
|
||||
console.log(`Processed: ${stats.processed}`);
|
||||
console.log(`Unprocessed: ${stats.unprocessed}`);
|
||||
console.log(`Failed: ${stats.failed}`);
|
||||
} catch {
|
||||
console.log('(raw_payloads table not found)');
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// HELP
|
||||
// ============================================================
|
||||
|
||||
function showHelp(): void {
|
||||
console.log(`
|
||||
Unified Hydration CLI
|
||||
|
||||
Usage:
|
||||
npx tsx src/scripts/run-hydration.ts --mode=<mode> [options]
|
||||
|
||||
Modes:
|
||||
payload Process raw_payloads → canonical tables (default)
|
||||
backfill Migrate dutchie_* → canonical tables
|
||||
sync Sync recent crawls to canonical tables
|
||||
status Show hydration progress
|
||||
|
||||
Common Options:
|
||||
--dry-run Print changes without modifying database
|
||||
--verbose, -v Show detailed progress
|
||||
--store=<id> Limit to a single dispensary
|
||||
--limit=<n> Batch size (default: 50)
|
||||
|
||||
Payload Mode Options:
|
||||
--loop Run continuous hydration loop
|
||||
--reprocess Reprocess failed payloads
|
||||
--payload=<id> Process a specific payload by ID
|
||||
|
||||
Backfill Mode Options:
|
||||
--start-from=<id> Resume from a specific product ID
|
||||
|
||||
Sync Mode Options:
|
||||
--since=<interval> Time window (default: "1 hour")
|
||||
Examples: "30 minutes", "2 hours", "1 day"
|
||||
|
||||
Examples:
|
||||
# Full legacy backfill (dutchie_* → canonical)
|
||||
npx tsx src/scripts/run-hydration.ts --mode=backfill
|
||||
|
||||
# Backfill single dispensary (dry run)
|
||||
npx tsx src/scripts/run-hydration.ts --mode=backfill --store=123 --dry-run
|
||||
|
||||
# Sync recent crawls from last 4 hours
|
||||
npx tsx src/scripts/run-hydration.ts --mode=sync --since="4 hours"
|
||||
|
||||
# Sync single dispensary
|
||||
npx tsx src/scripts/run-hydration.ts --mode=sync --store=123
|
||||
|
||||
# Run payload hydration loop
|
||||
npx tsx src/scripts/run-hydration.ts --mode=payload --loop
|
||||
|
||||
# Check hydration status
|
||||
npx tsx src/scripts/run-hydration.ts --mode=status
|
||||
`);
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// MAIN
|
||||
// ============================================================
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const rawArgs = process.argv.slice(2);
|
||||
|
||||
if (rawArgs.includes('--help') || rawArgs.includes('-h')) {
|
||||
showHelp();
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const args = parseArgs();
|
||||
|
||||
const pool = new Pool({
|
||||
connectionString: getConnectionString(),
|
||||
max: 5,
|
||||
});
|
||||
|
||||
try {
|
||||
// Verify connection
|
||||
await pool.query('SELECT 1');
|
||||
console.log('Database connection: OK\n');
|
||||
|
||||
switch (args.mode) {
|
||||
case 'payload':
|
||||
await runPayloadMode(pool, args);
|
||||
break;
|
||||
|
||||
case 'backfill':
|
||||
await runBackfillMode(pool, args);
|
||||
break;
|
||||
|
||||
case 'sync':
|
||||
await runSyncMode(pool, args);
|
||||
break;
|
||||
|
||||
case 'status':
|
||||
await runStatusMode(pool);
|
||||
break;
|
||||
|
||||
default:
|
||||
console.error(`Unknown mode: ${args.mode}`);
|
||||
showHelp();
|
||||
process.exit(1);
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.error('Error:', error.message);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
225
backend/src/scripts/sandbox-crawl-101.ts
Normal file
225
backend/src/scripts/sandbox-crawl-101.ts
Normal file
@@ -0,0 +1,225 @@
|
||||
/**
|
||||
* Sandbox Crawl Script for Dispensary 101 (Trulieve Scottsdale)
|
||||
*
|
||||
* Runs a full crawl and captures trace data for observability.
|
||||
* NO automatic promotion or status changes.
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import { crawlDispensaryProducts } from '../dutchie-az/services/product-crawler';
|
||||
import { Dispensary } from '../dutchie-az/types';
|
||||
|
||||
const pool = new Pool({ connectionString: process.env.DATABASE_URL });
|
||||
|
||||
async function main() {
|
||||
console.log('=== SANDBOX CRAWL: Dispensary 101 (Trulieve Scottsdale) ===\n');
|
||||
const startTime = Date.now();
|
||||
|
||||
// Load dispensary from database (only columns that exist in local schema)
|
||||
const dispResult = await pool.query(`
|
||||
SELECT id, name, city, state, menu_type, platform_dispensary_id, menu_url
|
||||
FROM dispensaries
|
||||
WHERE id = 101
|
||||
`);
|
||||
|
||||
if (!dispResult.rows[0]) {
|
||||
console.log('ERROR: Dispensary 101 not found');
|
||||
await pool.end();
|
||||
return;
|
||||
}
|
||||
|
||||
const row = dispResult.rows[0];
|
||||
|
||||
// Map to Dispensary interface (snake_case -> camelCase)
|
||||
const dispensary: Dispensary = {
|
||||
id: row.id,
|
||||
platform: 'dutchie',
|
||||
name: row.name,
|
||||
slug: row.name.toLowerCase().replace(/\s+/g, '-'),
|
||||
city: row.city,
|
||||
state: row.state,
|
||||
platformDispensaryId: row.platform_dispensary_id,
|
||||
menuType: row.menu_type,
|
||||
menuUrl: row.menu_url,
|
||||
createdAt: new Date(),
|
||||
updatedAt: new Date(),
|
||||
};
|
||||
|
||||
console.log('=== DISPENSARY INFO ===');
|
||||
console.log(`Name: ${dispensary.name}`);
|
||||
console.log(`Location: ${dispensary.city}, ${dispensary.state}`);
|
||||
console.log(`Menu Type: ${dispensary.menuType}`);
|
||||
console.log(`Platform ID: ${dispensary.platformDispensaryId}`);
|
||||
console.log(`Menu URL: ${dispensary.menuUrl}`);
|
||||
console.log('');
|
||||
|
||||
// Get profile info
|
||||
const profileResult = await pool.query(`
|
||||
SELECT id, profile_key, status, config FROM dispensary_crawler_profiles
|
||||
WHERE dispensary_id = 101
|
||||
`);
|
||||
|
||||
const profile = profileResult.rows[0];
|
||||
if (profile) {
|
||||
console.log('=== PROFILE ===');
|
||||
console.log(`Profile Key: ${profile.profile_key}`);
|
||||
console.log(`Profile Status: ${profile.status}`);
|
||||
console.log(`Config: ${JSON.stringify(profile.config, null, 2)}`);
|
||||
console.log('');
|
||||
} else {
|
||||
console.log('=== PROFILE ===');
|
||||
console.log('No profile found - will use defaults');
|
||||
console.log('');
|
||||
}
|
||||
|
||||
// Run the crawl
|
||||
console.log('=== STARTING CRAWL ===');
|
||||
console.log('Options: useBothModes=true, downloadImages=false (sandbox)');
|
||||
console.log('');
|
||||
|
||||
try {
|
||||
const result = await crawlDispensaryProducts(dispensary, 'rec', {
|
||||
useBothModes: true,
|
||||
downloadImages: false, // Skip images in sandbox mode for speed
|
||||
});
|
||||
|
||||
console.log('');
|
||||
console.log('=== CRAWL RESULT ===');
|
||||
console.log(`Success: ${result.success}`);
|
||||
console.log(`Products Found: ${result.productsFound}`);
|
||||
console.log(`Products Fetched: ${result.productsFetched}`);
|
||||
console.log(`Products Upserted: ${result.productsUpserted}`);
|
||||
console.log(`Snapshots Created: ${result.snapshotsCreated}`);
|
||||
if (result.errorMessage) {
|
||||
console.log(`Error: ${result.errorMessage}`);
|
||||
}
|
||||
console.log(`Duration: ${result.durationMs}ms`);
|
||||
console.log('');
|
||||
|
||||
// Show sample products from database
|
||||
if (result.productsUpserted > 0) {
|
||||
const sampleProducts = await pool.query(`
|
||||
SELECT
|
||||
id, name, brand_name, type, subcategory, strain_type,
|
||||
price_rec, price_rec_original, stock_status, external_product_id
|
||||
FROM dutchie_products
|
||||
WHERE dispensary_id = 101
|
||||
ORDER BY updated_at DESC
|
||||
LIMIT 10
|
||||
`);
|
||||
|
||||
console.log('=== SAMPLE PRODUCTS (10) ===');
|
||||
sampleProducts.rows.forEach((p: any, i: number) => {
|
||||
console.log(`${i + 1}. ${p.name}`);
|
||||
console.log(` Brand: ${p.brand_name || 'N/A'}`);
|
||||
console.log(` Type: ${p.type} / ${p.subcategory || 'N/A'}`);
|
||||
console.log(` Strain: ${p.strain_type || 'N/A'}`);
|
||||
console.log(` Price: $${p.price_rec || 'N/A'} (orig: $${p.price_rec_original || 'N/A'})`);
|
||||
console.log(` Stock: ${p.stock_status}`);
|
||||
console.log(` External ID: ${p.external_product_id}`);
|
||||
console.log('');
|
||||
});
|
||||
|
||||
// Show field coverage stats
|
||||
const fieldStats = await pool.query(`
|
||||
SELECT
|
||||
COUNT(*) as total,
|
||||
COUNT(brand_name) as with_brand,
|
||||
COUNT(type) as with_type,
|
||||
COUNT(strain_type) as with_strain,
|
||||
COUNT(price_rec) as with_price,
|
||||
COUNT(image_url) as with_image,
|
||||
COUNT(description) as with_description,
|
||||
COUNT(thc_content) as with_thc,
|
||||
COUNT(cbd_content) as with_cbd
|
||||
FROM dutchie_products
|
||||
WHERE dispensary_id = 101
|
||||
`);
|
||||
|
||||
const stats = fieldStats.rows[0];
|
||||
console.log('=== FIELD COVERAGE ===');
|
||||
console.log(`Total products: ${stats.total}`);
|
||||
console.log(`With brand: ${stats.with_brand} (${Math.round(stats.with_brand / stats.total * 100)}%)`);
|
||||
console.log(`With type: ${stats.with_type} (${Math.round(stats.with_type / stats.total * 100)}%)`);
|
||||
console.log(`With strain_type: ${stats.with_strain} (${Math.round(stats.with_strain / stats.total * 100)}%)`);
|
||||
console.log(`With price_rec: ${stats.with_price} (${Math.round(stats.with_price / stats.total * 100)}%)`);
|
||||
console.log(`With image_url: ${stats.with_image} (${Math.round(stats.with_image / stats.total * 100)}%)`);
|
||||
console.log(`With description: ${stats.with_description} (${Math.round(stats.with_description / stats.total * 100)}%)`);
|
||||
console.log(`With THC: ${stats.with_thc} (${Math.round(stats.with_thc / stats.total * 100)}%)`);
|
||||
console.log(`With CBD: ${stats.with_cbd} (${Math.round(stats.with_cbd / stats.total * 100)}%)`);
|
||||
console.log('');
|
||||
}
|
||||
|
||||
// Insert trace record for observability
|
||||
const traceData = {
|
||||
crawlResult: result,
|
||||
dispensaryInfo: {
|
||||
id: dispensary.id,
|
||||
name: dispensary.name,
|
||||
platformDispensaryId: dispensary.platformDispensaryId,
|
||||
menuUrl: dispensary.menuUrl,
|
||||
},
|
||||
profile: profile || null,
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
|
||||
await pool.query(`
|
||||
INSERT INTO crawl_orchestration_traces
|
||||
(dispensary_id, profile_id, profile_key, crawler_module, mode,
|
||||
state_at_start, state_at_end, trace, success, products_found,
|
||||
duration_ms, started_at, completed_at)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, NOW())
|
||||
`, [
|
||||
101,
|
||||
profile?.id || null,
|
||||
profile?.profile_key || null,
|
||||
'product-crawler',
|
||||
'sandbox',
|
||||
profile?.status || 'no_profile',
|
||||
profile?.status || 'no_profile', // No status change in sandbox
|
||||
JSON.stringify(traceData),
|
||||
result.success,
|
||||
result.productsFound,
|
||||
result.durationMs,
|
||||
new Date(startTime),
|
||||
]);
|
||||
|
||||
console.log('=== TRACE RECORDED ===');
|
||||
console.log('Trace saved to crawl_orchestration_traces table');
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('=== CRAWL ERROR ===');
|
||||
console.error('Error:', error.message);
|
||||
console.error('Stack:', error.stack);
|
||||
|
||||
// Record error trace
|
||||
await pool.query(`
|
||||
INSERT INTO crawl_orchestration_traces
|
||||
(dispensary_id, profile_id, profile_key, crawler_module, mode,
|
||||
state_at_start, state_at_end, trace, success, error_message,
|
||||
duration_ms, started_at, completed_at)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, NOW())
|
||||
`, [
|
||||
101,
|
||||
profile?.id || null,
|
||||
profile?.profile_key || null,
|
||||
'product-crawler',
|
||||
'sandbox',
|
||||
profile?.status || 'no_profile',
|
||||
profile?.status || 'no_profile',
|
||||
JSON.stringify({ error: error.message, stack: error.stack }),
|
||||
false,
|
||||
error.message,
|
||||
Date.now() - startTime,
|
||||
new Date(startTime),
|
||||
]);
|
||||
}
|
||||
|
||||
await pool.end();
|
||||
console.log('=== SANDBOX CRAWL COMPLETE ===');
|
||||
}
|
||||
|
||||
main().catch(e => {
|
||||
console.error('Fatal error:', e.message);
|
||||
process.exit(1);
|
||||
});
|
||||
181
backend/src/scripts/sandbox-test.ts
Normal file
181
backend/src/scripts/sandbox-test.ts
Normal file
@@ -0,0 +1,181 @@
|
||||
/**
|
||||
* LEGACY SCRIPT - Sandbox Crawl Test
|
||||
*
|
||||
* DEPRECATED: This script uses direct database connections.
|
||||
* Future implementations should use the CannaiQ API endpoints instead.
|
||||
*
|
||||
* This script runs sandbox crawl for a dispensary and captures the full trace.
|
||||
* It is kept for historical reference and manual testing only.
|
||||
*
|
||||
* DO NOT:
|
||||
* - Add this to package.json scripts
|
||||
* - Run this in automated jobs
|
||||
* - Use DATABASE_URL directly
|
||||
*
|
||||
* Usage (manual only):
|
||||
* STORAGE_DRIVER=local npx tsx src/scripts/sandbox-test.ts <dispensary_id>
|
||||
*
|
||||
* LOCAL MODE REQUIREMENTS:
|
||||
* - STORAGE_DRIVER=local
|
||||
* - STORAGE_BASE_PATH=./storage
|
||||
* - Local cannaiq-postgres on port 54320
|
||||
* - NO MinIO, NO Kubernetes
|
||||
*/
|
||||
|
||||
import { query, getClient, closePool } from '../dutchie-az/db/connection';
|
||||
import { runDispensaryOrchestrator } from '../services/dispensary-orchestrator';
|
||||
|
||||
// Verify local mode
|
||||
function verifyLocalMode(): void {
|
||||
const storageDriver = process.env.STORAGE_DRIVER || 'local';
|
||||
const minioEndpoint = process.env.MINIO_ENDPOINT;
|
||||
|
||||
console.log('=== LOCAL MODE VERIFICATION ===');
|
||||
console.log(`STORAGE_DRIVER: ${storageDriver}`);
|
||||
console.log(`MINIO_ENDPOINT: ${minioEndpoint || 'NOT SET (good)'}`);
|
||||
console.log(`STORAGE_BASE_PATH: ${process.env.STORAGE_BASE_PATH || './storage'}`);
|
||||
console.log('DB Connection: Using canonical CannaiQ pool');
|
||||
|
||||
if (storageDriver !== 'local') {
|
||||
console.error('ERROR: STORAGE_DRIVER must be "local"');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (minioEndpoint) {
|
||||
console.error('ERROR: MINIO_ENDPOINT should NOT be set in local mode');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log('✅ Local mode verified\n');
|
||||
}
|
||||
|
||||
async function getDispensaryInfo(dispensaryId: number) {
|
||||
const result = await query(`
|
||||
SELECT d.id, d.name, d.city, d.menu_type, d.platform_dispensary_id, d.menu_url,
|
||||
p.profile_key, p.status as profile_status, p.config
|
||||
FROM dispensaries d
|
||||
LEFT JOIN dispensary_crawler_profiles p ON p.dispensary_id = d.id
|
||||
WHERE d.id = $1
|
||||
`, [dispensaryId]);
|
||||
|
||||
return result.rows[0];
|
||||
}
|
||||
|
||||
async function getLatestTrace(dispensaryId: number) {
|
||||
const result = await query(`
|
||||
SELECT *
|
||||
FROM crawl_orchestration_traces
|
||||
WHERE dispensary_id = $1
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
`, [dispensaryId]);
|
||||
|
||||
return result.rows[0];
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.warn('\n⚠️ LEGACY SCRIPT: This script should be replaced with CannaiQ API calls.\n');
|
||||
|
||||
const dispensaryId = parseInt(process.argv[2], 10);
|
||||
|
||||
if (!dispensaryId || isNaN(dispensaryId)) {
|
||||
console.error('Usage: npx tsx src/scripts/sandbox-test.ts <dispensary_id>');
|
||||
console.error('Example: npx tsx src/scripts/sandbox-test.ts 101');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Verify local mode first
|
||||
verifyLocalMode();
|
||||
|
||||
try {
|
||||
// Get dispensary info
|
||||
console.log(`=== DISPENSARY INFO (ID: ${dispensaryId}) ===`);
|
||||
const dispensary = await getDispensaryInfo(dispensaryId);
|
||||
|
||||
if (!dispensary) {
|
||||
console.error(`Dispensary ${dispensaryId} not found`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`Name: ${dispensary.name}`);
|
||||
console.log(`City: ${dispensary.city}`);
|
||||
console.log(`Menu Type: ${dispensary.menu_type}`);
|
||||
console.log(`Platform Dispensary ID: ${dispensary.platform_dispensary_id || 'NULL'}`);
|
||||
console.log(`Menu URL: ${dispensary.menu_url || 'NULL'}`);
|
||||
console.log(`Profile Key: ${dispensary.profile_key || 'NONE'}`);
|
||||
console.log(`Profile Status: ${dispensary.profile_status || 'N/A'}`);
|
||||
console.log(`Profile Config: ${JSON.stringify(dispensary.config, null, 2)}`);
|
||||
console.log('');
|
||||
|
||||
// Run sandbox crawl
|
||||
console.log('=== RUNNING SANDBOX CRAWL ===');
|
||||
console.log(`Starting sandbox crawl for ${dispensary.name}...`);
|
||||
const startTime = Date.now();
|
||||
|
||||
const result = await runDispensaryOrchestrator(dispensaryId);
|
||||
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
console.log('\n=== CRAWL RESULT ===');
|
||||
console.log(`Status: ${result.status}`);
|
||||
console.log(`Summary: ${result.summary}`);
|
||||
console.log(`Run ID: ${result.runId}`);
|
||||
console.log(`Duration: ${duration}ms`);
|
||||
console.log(`Detection Ran: ${result.detectionRan}`);
|
||||
console.log(`Crawl Ran: ${result.crawlRan}`);
|
||||
console.log(`Crawl Type: ${result.crawlType || 'N/A'}`);
|
||||
console.log(`Products Found: ${result.productsFound || 0}`);
|
||||
console.log(`Products New: ${result.productsNew || 0}`);
|
||||
console.log(`Products Updated: ${result.productsUpdated || 0}`);
|
||||
|
||||
if (result.error) {
|
||||
console.log(`Error: ${result.error}`);
|
||||
}
|
||||
|
||||
// Get the trace
|
||||
console.log('\n=== ORCHESTRATOR TRACE ===');
|
||||
const trace = await getLatestTrace(dispensaryId);
|
||||
|
||||
if (trace) {
|
||||
console.log(`Trace ID: ${trace.id}`);
|
||||
console.log(`Profile Key: ${trace.profile_key || 'N/A'}`);
|
||||
console.log(`Mode: ${trace.mode}`);
|
||||
console.log(`Status: ${trace.status}`);
|
||||
console.log(`Started At: ${trace.started_at}`);
|
||||
console.log(`Completed At: ${trace.completed_at || 'In Progress'}`);
|
||||
|
||||
if (trace.steps && Array.isArray(trace.steps)) {
|
||||
console.log(`\nSteps (${trace.steps.length} total):`);
|
||||
trace.steps.forEach((step: any, i: number) => {
|
||||
const status = step.status === 'completed' ? '✅' : step.status === 'failed' ? '❌' : '⏳';
|
||||
console.log(` ${i + 1}. ${status} ${step.action}: ${step.description}`);
|
||||
if (step.output && Object.keys(step.output).length > 0) {
|
||||
console.log(` Output: ${JSON.stringify(step.output)}`);
|
||||
}
|
||||
if (step.error) {
|
||||
console.log(` Error: ${step.error}`);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (trace.result) {
|
||||
console.log(`\nResult: ${JSON.stringify(trace.result, null, 2)}`);
|
||||
}
|
||||
|
||||
if (trace.error_message) {
|
||||
console.log(`\nError Message: ${trace.error_message}`);
|
||||
}
|
||||
} else {
|
||||
console.log('No trace found for this dispensary');
|
||||
}
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('Error running sandbox test:', error.message);
|
||||
console.error(error.stack);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await closePool();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
88
backend/src/scripts/sandbox-validate-101.ts
Normal file
88
backend/src/scripts/sandbox-validate-101.ts
Normal file
@@ -0,0 +1,88 @@
|
||||
/**
|
||||
* Sandbox Validation Script for Dispensary 101 (Trulieve Scottsdale)
|
||||
*
|
||||
* This script runs a sandbox crawl and captures the trace for observability.
|
||||
* NO automatic promotion or state changes.
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
|
||||
const pool = new Pool({ connectionString: process.env.DATABASE_URL });
|
||||
|
||||
async function main() {
|
||||
console.log('=== SANDBOX VALIDATION: Dispensary 101 (Trulieve Scottsdale) ===');
|
||||
console.log('');
|
||||
|
||||
// Get dispensary info
|
||||
const dispResult = await pool.query(`
|
||||
SELECT d.id, d.name, d.city, d.state, d.menu_type, d.platform_dispensary_id, d.menu_url,
|
||||
dcp.id as profile_id, dcp.profile_key, dcp.status as profile_status, dcp.config
|
||||
FROM dispensaries d
|
||||
LEFT JOIN dispensary_crawler_profiles dcp ON dcp.dispensary_id = d.id
|
||||
WHERE d.id = 101
|
||||
`);
|
||||
|
||||
if (!dispResult.rows[0]) {
|
||||
console.log('ERROR: Dispensary 101 not found');
|
||||
await pool.end();
|
||||
return;
|
||||
}
|
||||
|
||||
const disp = dispResult.rows[0];
|
||||
console.log('=== DISPENSARY INFO ===');
|
||||
console.log('Name:', disp.name);
|
||||
console.log('Location:', disp.city + ', ' + disp.state);
|
||||
console.log('Menu Type:', disp.menu_type);
|
||||
console.log('Platform ID:', disp.platform_dispensary_id);
|
||||
console.log('Menu URL:', disp.menu_url);
|
||||
console.log('');
|
||||
|
||||
console.log('=== PROFILE ===');
|
||||
console.log('Profile ID:', disp.profile_id);
|
||||
console.log('Profile Key:', disp.profile_key);
|
||||
console.log('Profile Status:', disp.profile_status);
|
||||
console.log('Config:', JSON.stringify(disp.config, null, 2));
|
||||
console.log('');
|
||||
|
||||
// Get product count
|
||||
const products = await pool.query('SELECT COUNT(*) FROM dutchie_products WHERE dispensary_id = 101');
|
||||
console.log('Current product count:', products.rows[0].count);
|
||||
console.log('');
|
||||
|
||||
// Check for traces (local DB uses state_at_start/state_at_end column names)
|
||||
const traces = await pool.query(`
|
||||
SELECT id, run_id, state_at_start, state_at_end,
|
||||
products_found, success, error_message, created_at, trace
|
||||
FROM crawl_orchestration_traces
|
||||
WHERE dispensary_id = 101
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 3
|
||||
`);
|
||||
|
||||
console.log('=== RECENT TRACES ===');
|
||||
if (traces.rows.length === 0) {
|
||||
console.log('No traces found');
|
||||
} else {
|
||||
traces.rows.forEach((t: any, i: number) => {
|
||||
console.log(`${i+1}. [id:${t.id}] ${t.state_at_start} -> ${t.state_at_end}`);
|
||||
console.log(` Products: ${t.products_found} | Success: ${t.success}`);
|
||||
if (t.error_message) console.log(` Error: ${t.error_message}`);
|
||||
if (t.trace && Array.isArray(t.trace)) {
|
||||
console.log(' Trace steps:');
|
||||
t.trace.slice(0, 5).forEach((s: any, j: number) => {
|
||||
console.log(` ${j+1}. [${s.status || s.type}] ${s.step_name || s.message || JSON.stringify(s).slice(0, 60)}`);
|
||||
});
|
||||
if (t.trace.length > 5) console.log(` ... and ${t.trace.length - 5} more steps`);
|
||||
}
|
||||
console.log('');
|
||||
});
|
||||
}
|
||||
|
||||
await pool.end();
|
||||
console.log('=== DATABASE CHECK COMPLETE ===');
|
||||
}
|
||||
|
||||
main().catch(e => {
|
||||
console.error('Error:', e.message);
|
||||
process.exit(1);
|
||||
});
|
||||
@@ -1,6 +1,16 @@
|
||||
/**
|
||||
* Scrape ALL active products via direct GraphQL pagination
|
||||
* This is more reliable than category navigation
|
||||
* LEGACY SCRIPT - Scrape All Active Products
|
||||
*
|
||||
* DEPRECATED: This script creates its own database pool.
|
||||
* Future implementations should use the CannaiQ API endpoints instead.
|
||||
*
|
||||
* Scrapes ALL active products via direct GraphQL pagination.
|
||||
* This is more reliable than category navigation.
|
||||
*
|
||||
* DO NOT:
|
||||
* - Add this to package.json scripts
|
||||
* - Run this in automated jobs
|
||||
* - Use DATABASE_URL directly
|
||||
*/
|
||||
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
@@ -10,8 +20,11 @@ import { normalizeDutchieProduct, DutchieProduct } from '../scrapers/dutchie-gra
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
const DATABASE_URL =
|
||||
process.env.DATABASE_URL || 'postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus';
|
||||
console.warn('\n⚠️ LEGACY SCRIPT: This script should be replaced with CannaiQ API calls.\n');
|
||||
|
||||
// Single database connection (cannaiq in cannaiq-postgres container)
|
||||
const DATABASE_URL = process.env.CANNAIQ_DB_URL ||
|
||||
`postgresql://${process.env.CANNAIQ_DB_USER || 'dutchie'}:${process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass'}@${process.env.CANNAIQ_DB_HOST || 'localhost'}:${process.env.CANNAIQ_DB_PORT || '54320'}/${process.env.CANNAIQ_DB_NAME || 'cannaiq'}`;
|
||||
const GRAPHQL_HASH = 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0';
|
||||
|
||||
async function scrapeAllProducts(menuUrl: string, storeId: number) {
|
||||
|
||||
42
backend/src/scripts/search-dispensaries.ts
Normal file
42
backend/src/scripts/search-dispensaries.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
import pg from 'pg';
|
||||
const pool = new pg.Pool({ connectionString: process.env.DATABASE_URL });
|
||||
|
||||
async function main() {
|
||||
// Search broadly for flower power
|
||||
const result = await pool.query(`
|
||||
SELECT id, name, address, city, state, zip, menu_url, menu_type, platform_dispensary_id, website
|
||||
FROM dispensaries
|
||||
WHERE LOWER(name) LIKE $1 OR LOWER(name) LIKE $2 OR LOWER(address) LIKE $3
|
||||
ORDER BY name
|
||||
`, ['%flower%', '%az %', '%union hills%']);
|
||||
|
||||
console.log('=== SEARCHING FOR FLOWER/AZ/UNION HILLS ===');
|
||||
result.rows.forEach((r: any) => console.log(JSON.stringify(r)));
|
||||
|
||||
// Also search for any existing Nirvana dispensaries
|
||||
const nirvana = await pool.query(`
|
||||
SELECT id, name, address, city, state, zip, menu_url, menu_type, platform_dispensary_id, website
|
||||
FROM dispensaries
|
||||
WHERE LOWER(name) LIKE $1
|
||||
ORDER BY name
|
||||
`, ['%nirvana%']);
|
||||
|
||||
console.log('');
|
||||
console.log('=== EXISTING NIRVANA DISPENSARIES ===');
|
||||
nirvana.rows.forEach((r: any) => console.log(JSON.stringify(r)));
|
||||
|
||||
// Get all AZ dispensaries for comparison
|
||||
const allAZ = await pool.query(`
|
||||
SELECT id, name, address, city, state, zip
|
||||
FROM dispensaries
|
||||
WHERE state = 'AZ'
|
||||
ORDER BY name
|
||||
`);
|
||||
|
||||
console.log('');
|
||||
console.log('=== ALL AZ DISPENSARIES (' + allAZ.rows.length + ' total) ===');
|
||||
allAZ.rows.forEach((r: any) => console.log(JSON.stringify({id: r.id, name: r.name, address: r.address, city: r.city})));
|
||||
|
||||
await pool.end();
|
||||
}
|
||||
main().catch(e => { console.error(e.message); process.exit(1); });
|
||||
307
backend/src/scripts/seed-dt-cities-bulk.ts
Normal file
307
backend/src/scripts/seed-dt-cities-bulk.ts
Normal file
@@ -0,0 +1,307 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
/**
|
||||
* Seed Dutchie Discovery Cities - Bulk
|
||||
*
|
||||
* Seeds dutchie_discovery_cities with a static list of major US metros.
|
||||
* Uses UPSERT to avoid duplicates on re-runs.
|
||||
*
|
||||
* Usage:
|
||||
* npm run seed:dt:cities:bulk
|
||||
* DATABASE_URL="..." npx tsx src/scripts/seed-dt-cities-bulk.ts
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
|
||||
const DB_URL = process.env.DATABASE_URL || process.env.CANNAIQ_DB_URL ||
|
||||
'postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus';
|
||||
|
||||
// ============================================================================
|
||||
// Static list of major US metros
|
||||
// Format: { city_slug, city_name, state_code, country_code }
|
||||
// ============================================================================
|
||||
|
||||
interface CityEntry {
|
||||
city_slug: string;
|
||||
city_name: string;
|
||||
state_code: string;
|
||||
country_code: string;
|
||||
}
|
||||
|
||||
const CITIES: CityEntry[] = [
|
||||
// Arizona (priority state)
|
||||
{ city_slug: 'az-phoenix', city_name: 'Phoenix', state_code: 'AZ', country_code: 'US' },
|
||||
{ city_slug: 'az-tucson', city_name: 'Tucson', state_code: 'AZ', country_code: 'US' },
|
||||
{ city_slug: 'az-mesa', city_name: 'Mesa', state_code: 'AZ', country_code: 'US' },
|
||||
{ city_slug: 'az-scottsdale', city_name: 'Scottsdale', state_code: 'AZ', country_code: 'US' },
|
||||
{ city_slug: 'az-tempe', city_name: 'Tempe', state_code: 'AZ', country_code: 'US' },
|
||||
{ city_slug: 'az-chandler', city_name: 'Chandler', state_code: 'AZ', country_code: 'US' },
|
||||
{ city_slug: 'az-glendale', city_name: 'Glendale', state_code: 'AZ', country_code: 'US' },
|
||||
{ city_slug: 'az-peoria', city_name: 'Peoria', state_code: 'AZ', country_code: 'US' },
|
||||
{ city_slug: 'az-flagstaff', city_name: 'Flagstaff', state_code: 'AZ', country_code: 'US' },
|
||||
{ city_slug: 'az-sedona', city_name: 'Sedona', state_code: 'AZ', country_code: 'US' },
|
||||
|
||||
// California
|
||||
{ city_slug: 'ca-los-angeles', city_name: 'Los Angeles', state_code: 'CA', country_code: 'US' },
|
||||
{ city_slug: 'ca-san-francisco', city_name: 'San Francisco', state_code: 'CA', country_code: 'US' },
|
||||
{ city_slug: 'ca-san-diego', city_name: 'San Diego', state_code: 'CA', country_code: 'US' },
|
||||
{ city_slug: 'ca-san-jose', city_name: 'San Jose', state_code: 'CA', country_code: 'US' },
|
||||
{ city_slug: 'ca-oakland', city_name: 'Oakland', state_code: 'CA', country_code: 'US' },
|
||||
{ city_slug: 'ca-sacramento', city_name: 'Sacramento', state_code: 'CA', country_code: 'US' },
|
||||
{ city_slug: 'ca-fresno', city_name: 'Fresno', state_code: 'CA', country_code: 'US' },
|
||||
{ city_slug: 'ca-long-beach', city_name: 'Long Beach', state_code: 'CA', country_code: 'US' },
|
||||
{ city_slug: 'ca-bakersfield', city_name: 'Bakersfield', state_code: 'CA', country_code: 'US' },
|
||||
{ city_slug: 'ca-anaheim', city_name: 'Anaheim', state_code: 'CA', country_code: 'US' },
|
||||
{ city_slug: 'ca-santa-ana', city_name: 'Santa Ana', state_code: 'CA', country_code: 'US' },
|
||||
{ city_slug: 'ca-riverside', city_name: 'Riverside', state_code: 'CA', country_code: 'US' },
|
||||
{ city_slug: 'ca-stockton', city_name: 'Stockton', state_code: 'CA', country_code: 'US' },
|
||||
{ city_slug: 'ca-irvine', city_name: 'Irvine', state_code: 'CA', country_code: 'US' },
|
||||
{ city_slug: 'ca-santa-barbara', city_name: 'Santa Barbara', state_code: 'CA', country_code: 'US' },
|
||||
|
||||
// Colorado
|
||||
{ city_slug: 'co-denver', city_name: 'Denver', state_code: 'CO', country_code: 'US' },
|
||||
{ city_slug: 'co-colorado-springs', city_name: 'Colorado Springs', state_code: 'CO', country_code: 'US' },
|
||||
{ city_slug: 'co-aurora', city_name: 'Aurora', state_code: 'CO', country_code: 'US' },
|
||||
{ city_slug: 'co-boulder', city_name: 'Boulder', state_code: 'CO', country_code: 'US' },
|
||||
{ city_slug: 'co-fort-collins', city_name: 'Fort Collins', state_code: 'CO', country_code: 'US' },
|
||||
{ city_slug: 'co-pueblo', city_name: 'Pueblo', state_code: 'CO', country_code: 'US' },
|
||||
|
||||
// Florida
|
||||
{ city_slug: 'fl-miami', city_name: 'Miami', state_code: 'FL', country_code: 'US' },
|
||||
{ city_slug: 'fl-orlando', city_name: 'Orlando', state_code: 'FL', country_code: 'US' },
|
||||
{ city_slug: 'fl-tampa', city_name: 'Tampa', state_code: 'FL', country_code: 'US' },
|
||||
{ city_slug: 'fl-jacksonville', city_name: 'Jacksonville', state_code: 'FL', country_code: 'US' },
|
||||
{ city_slug: 'fl-fort-lauderdale', city_name: 'Fort Lauderdale', state_code: 'FL', country_code: 'US' },
|
||||
{ city_slug: 'fl-west-palm-beach', city_name: 'West Palm Beach', state_code: 'FL', country_code: 'US' },
|
||||
{ city_slug: 'fl-st-petersburg', city_name: 'St. Petersburg', state_code: 'FL', country_code: 'US' },
|
||||
|
||||
// Illinois
|
||||
{ city_slug: 'il-chicago', city_name: 'Chicago', state_code: 'IL', country_code: 'US' },
|
||||
{ city_slug: 'il-springfield', city_name: 'Springfield', state_code: 'IL', country_code: 'US' },
|
||||
{ city_slug: 'il-peoria', city_name: 'Peoria', state_code: 'IL', country_code: 'US' },
|
||||
{ city_slug: 'il-rockford', city_name: 'Rockford', state_code: 'IL', country_code: 'US' },
|
||||
|
||||
// Massachusetts
|
||||
{ city_slug: 'ma-boston', city_name: 'Boston', state_code: 'MA', country_code: 'US' },
|
||||
{ city_slug: 'ma-worcester', city_name: 'Worcester', state_code: 'MA', country_code: 'US' },
|
||||
{ city_slug: 'ma-springfield', city_name: 'Springfield', state_code: 'MA', country_code: 'US' },
|
||||
{ city_slug: 'ma-cambridge', city_name: 'Cambridge', state_code: 'MA', country_code: 'US' },
|
||||
|
||||
// Michigan
|
||||
{ city_slug: 'mi-detroit', city_name: 'Detroit', state_code: 'MI', country_code: 'US' },
|
||||
{ city_slug: 'mi-grand-rapids', city_name: 'Grand Rapids', state_code: 'MI', country_code: 'US' },
|
||||
{ city_slug: 'mi-ann-arbor', city_name: 'Ann Arbor', state_code: 'MI', country_code: 'US' },
|
||||
{ city_slug: 'mi-lansing', city_name: 'Lansing', state_code: 'MI', country_code: 'US' },
|
||||
{ city_slug: 'mi-flint', city_name: 'Flint', state_code: 'MI', country_code: 'US' },
|
||||
|
||||
// Nevada
|
||||
{ city_slug: 'nv-las-vegas', city_name: 'Las Vegas', state_code: 'NV', country_code: 'US' },
|
||||
{ city_slug: 'nv-reno', city_name: 'Reno', state_code: 'NV', country_code: 'US' },
|
||||
{ city_slug: 'nv-henderson', city_name: 'Henderson', state_code: 'NV', country_code: 'US' },
|
||||
{ city_slug: 'nv-north-las-vegas', city_name: 'North Las Vegas', state_code: 'NV', country_code: 'US' },
|
||||
|
||||
// New Jersey
|
||||
{ city_slug: 'nj-newark', city_name: 'Newark', state_code: 'NJ', country_code: 'US' },
|
||||
{ city_slug: 'nj-jersey-city', city_name: 'Jersey City', state_code: 'NJ', country_code: 'US' },
|
||||
{ city_slug: 'nj-paterson', city_name: 'Paterson', state_code: 'NJ', country_code: 'US' },
|
||||
{ city_slug: 'nj-trenton', city_name: 'Trenton', state_code: 'NJ', country_code: 'US' },
|
||||
|
||||
// New Mexico
|
||||
{ city_slug: 'nm-albuquerque', city_name: 'Albuquerque', state_code: 'NM', country_code: 'US' },
|
||||
{ city_slug: 'nm-santa-fe', city_name: 'Santa Fe', state_code: 'NM', country_code: 'US' },
|
||||
{ city_slug: 'nm-las-cruces', city_name: 'Las Cruces', state_code: 'NM', country_code: 'US' },
|
||||
|
||||
// New York
|
||||
{ city_slug: 'ny-new-york', city_name: 'New York', state_code: 'NY', country_code: 'US' },
|
||||
{ city_slug: 'ny-buffalo', city_name: 'Buffalo', state_code: 'NY', country_code: 'US' },
|
||||
{ city_slug: 'ny-rochester', city_name: 'Rochester', state_code: 'NY', country_code: 'US' },
|
||||
{ city_slug: 'ny-albany', city_name: 'Albany', state_code: 'NY', country_code: 'US' },
|
||||
{ city_slug: 'ny-syracuse', city_name: 'Syracuse', state_code: 'NY', country_code: 'US' },
|
||||
|
||||
// Ohio
|
||||
{ city_slug: 'oh-columbus', city_name: 'Columbus', state_code: 'OH', country_code: 'US' },
|
||||
{ city_slug: 'oh-cleveland', city_name: 'Cleveland', state_code: 'OH', country_code: 'US' },
|
||||
{ city_slug: 'oh-cincinnati', city_name: 'Cincinnati', state_code: 'OH', country_code: 'US' },
|
||||
{ city_slug: 'oh-toledo', city_name: 'Toledo', state_code: 'OH', country_code: 'US' },
|
||||
{ city_slug: 'oh-akron', city_name: 'Akron', state_code: 'OH', country_code: 'US' },
|
||||
|
||||
// Oklahoma
|
||||
{ city_slug: 'ok-oklahoma-city', city_name: 'Oklahoma City', state_code: 'OK', country_code: 'US' },
|
||||
{ city_slug: 'ok-tulsa', city_name: 'Tulsa', state_code: 'OK', country_code: 'US' },
|
||||
{ city_slug: 'ok-norman', city_name: 'Norman', state_code: 'OK', country_code: 'US' },
|
||||
|
||||
// Oregon
|
||||
{ city_slug: 'or-portland', city_name: 'Portland', state_code: 'OR', country_code: 'US' },
|
||||
{ city_slug: 'or-eugene', city_name: 'Eugene', state_code: 'OR', country_code: 'US' },
|
||||
{ city_slug: 'or-salem', city_name: 'Salem', state_code: 'OR', country_code: 'US' },
|
||||
{ city_slug: 'or-bend', city_name: 'Bend', state_code: 'OR', country_code: 'US' },
|
||||
{ city_slug: 'or-medford', city_name: 'Medford', state_code: 'OR', country_code: 'US' },
|
||||
|
||||
// Pennsylvania
|
||||
{ city_slug: 'pa-philadelphia', city_name: 'Philadelphia', state_code: 'PA', country_code: 'US' },
|
||||
{ city_slug: 'pa-pittsburgh', city_name: 'Pittsburgh', state_code: 'PA', country_code: 'US' },
|
||||
{ city_slug: 'pa-allentown', city_name: 'Allentown', state_code: 'PA', country_code: 'US' },
|
||||
|
||||
// Texas (limited cannabis, but for completeness)
|
||||
{ city_slug: 'tx-houston', city_name: 'Houston', state_code: 'TX', country_code: 'US' },
|
||||
{ city_slug: 'tx-san-antonio', city_name: 'San Antonio', state_code: 'TX', country_code: 'US' },
|
||||
{ city_slug: 'tx-dallas', city_name: 'Dallas', state_code: 'TX', country_code: 'US' },
|
||||
{ city_slug: 'tx-austin', city_name: 'Austin', state_code: 'TX', country_code: 'US' },
|
||||
{ city_slug: 'tx-fort-worth', city_name: 'Fort Worth', state_code: 'TX', country_code: 'US' },
|
||||
{ city_slug: 'tx-el-paso', city_name: 'El Paso', state_code: 'TX', country_code: 'US' },
|
||||
|
||||
// Virginia
|
||||
{ city_slug: 'va-virginia-beach', city_name: 'Virginia Beach', state_code: 'VA', country_code: 'US' },
|
||||
{ city_slug: 'va-norfolk', city_name: 'Norfolk', state_code: 'VA', country_code: 'US' },
|
||||
{ city_slug: 'va-richmond', city_name: 'Richmond', state_code: 'VA', country_code: 'US' },
|
||||
{ city_slug: 'va-arlington', city_name: 'Arlington', state_code: 'VA', country_code: 'US' },
|
||||
|
||||
// Washington
|
||||
{ city_slug: 'wa-seattle', city_name: 'Seattle', state_code: 'WA', country_code: 'US' },
|
||||
{ city_slug: 'wa-spokane', city_name: 'Spokane', state_code: 'WA', country_code: 'US' },
|
||||
{ city_slug: 'wa-tacoma', city_name: 'Tacoma', state_code: 'WA', country_code: 'US' },
|
||||
{ city_slug: 'wa-vancouver', city_name: 'Vancouver', state_code: 'WA', country_code: 'US' },
|
||||
{ city_slug: 'wa-bellevue', city_name: 'Bellevue', state_code: 'WA', country_code: 'US' },
|
||||
|
||||
// Washington DC
|
||||
{ city_slug: 'dc-washington', city_name: 'Washington', state_code: 'DC', country_code: 'US' },
|
||||
|
||||
// Maryland
|
||||
{ city_slug: 'md-baltimore', city_name: 'Baltimore', state_code: 'MD', country_code: 'US' },
|
||||
{ city_slug: 'md-rockville', city_name: 'Rockville', state_code: 'MD', country_code: 'US' },
|
||||
{ city_slug: 'md-silver-spring', city_name: 'Silver Spring', state_code: 'MD', country_code: 'US' },
|
||||
|
||||
// Connecticut
|
||||
{ city_slug: 'ct-hartford', city_name: 'Hartford', state_code: 'CT', country_code: 'US' },
|
||||
{ city_slug: 'ct-new-haven', city_name: 'New Haven', state_code: 'CT', country_code: 'US' },
|
||||
{ city_slug: 'ct-stamford', city_name: 'Stamford', state_code: 'CT', country_code: 'US' },
|
||||
|
||||
// Maine
|
||||
{ city_slug: 'me-portland', city_name: 'Portland', state_code: 'ME', country_code: 'US' },
|
||||
{ city_slug: 'me-bangor', city_name: 'Bangor', state_code: 'ME', country_code: 'US' },
|
||||
|
||||
// Missouri
|
||||
{ city_slug: 'mo-kansas-city', city_name: 'Kansas City', state_code: 'MO', country_code: 'US' },
|
||||
{ city_slug: 'mo-st-louis', city_name: 'St. Louis', state_code: 'MO', country_code: 'US' },
|
||||
{ city_slug: 'mo-springfield', city_name: 'Springfield', state_code: 'MO', country_code: 'US' },
|
||||
|
||||
// Minnesota
|
||||
{ city_slug: 'mn-minneapolis', city_name: 'Minneapolis', state_code: 'MN', country_code: 'US' },
|
||||
{ city_slug: 'mn-st-paul', city_name: 'St. Paul', state_code: 'MN', country_code: 'US' },
|
||||
{ city_slug: 'mn-duluth', city_name: 'Duluth', state_code: 'MN', country_code: 'US' },
|
||||
|
||||
// Alaska
|
||||
{ city_slug: 'ak-anchorage', city_name: 'Anchorage', state_code: 'AK', country_code: 'US' },
|
||||
{ city_slug: 'ak-fairbanks', city_name: 'Fairbanks', state_code: 'AK', country_code: 'US' },
|
||||
{ city_slug: 'ak-juneau', city_name: 'Juneau', state_code: 'AK', country_code: 'US' },
|
||||
|
||||
// Hawaii
|
||||
{ city_slug: 'hi-honolulu', city_name: 'Honolulu', state_code: 'HI', country_code: 'US' },
|
||||
{ city_slug: 'hi-maui', city_name: 'Maui', state_code: 'HI', country_code: 'US' },
|
||||
|
||||
// Vermont
|
||||
{ city_slug: 'vt-burlington', city_name: 'Burlington', state_code: 'VT', country_code: 'US' },
|
||||
|
||||
// Rhode Island
|
||||
{ city_slug: 'ri-providence', city_name: 'Providence', state_code: 'RI', country_code: 'US' },
|
||||
|
||||
// Delaware
|
||||
{ city_slug: 'de-wilmington', city_name: 'Wilmington', state_code: 'DE', country_code: 'US' },
|
||||
|
||||
// Montana
|
||||
{ city_slug: 'mt-billings', city_name: 'Billings', state_code: 'MT', country_code: 'US' },
|
||||
{ city_slug: 'mt-missoula', city_name: 'Missoula', state_code: 'MT', country_code: 'US' },
|
||||
];
|
||||
|
||||
// ============================================================================
|
||||
// Main
|
||||
// ============================================================================
|
||||
|
||||
async function main() {
|
||||
console.log('=========================================================');
|
||||
console.log(' Seed Dutchie Discovery Cities - Bulk');
|
||||
console.log('=========================================================');
|
||||
console.log(`\nDatabase: ${DB_URL.replace(/:[^:@]+@/, ':****@')}`);
|
||||
console.log(`Cities to seed: ${CITIES.length}`);
|
||||
|
||||
const pool = new Pool({ connectionString: DB_URL });
|
||||
|
||||
try {
|
||||
// Test connection
|
||||
const { rows } = await pool.query('SELECT NOW() as time');
|
||||
console.log(`Connected at: ${rows[0].time}\n`);
|
||||
|
||||
let inserted = 0;
|
||||
let updated = 0;
|
||||
let errors = 0;
|
||||
|
||||
for (const city of CITIES) {
|
||||
try {
|
||||
const result = await pool.query(`
|
||||
INSERT INTO dutchie_discovery_cities (
|
||||
platform,
|
||||
city_slug,
|
||||
city_name,
|
||||
state_code,
|
||||
country_code,
|
||||
crawl_enabled,
|
||||
created_at,
|
||||
updated_at
|
||||
) VALUES (
|
||||
'dutchie',
|
||||
$1,
|
||||
$2,
|
||||
$3,
|
||||
$4,
|
||||
TRUE,
|
||||
NOW(),
|
||||
NOW()
|
||||
)
|
||||
ON CONFLICT (platform, country_code, state_code, city_slug)
|
||||
DO UPDATE SET
|
||||
city_name = EXCLUDED.city_name,
|
||||
crawl_enabled = TRUE,
|
||||
updated_at = NOW()
|
||||
RETURNING (xmax = 0) AS inserted
|
||||
`, [city.city_slug, city.city_name, city.state_code, city.country_code]);
|
||||
|
||||
if (result.rows[0].inserted) {
|
||||
inserted++;
|
||||
} else {
|
||||
updated++;
|
||||
}
|
||||
} catch (err: any) {
|
||||
console.error(` Error seeding ${city.city_slug}: ${err.message}`);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
|
||||
// Get total count
|
||||
const { rows: countRows } = await pool.query(`
|
||||
SELECT COUNT(*) as total FROM dutchie_discovery_cities WHERE platform = 'dutchie'
|
||||
`);
|
||||
|
||||
console.log('=========================================================');
|
||||
console.log(' SUMMARY');
|
||||
console.log('=========================================================');
|
||||
console.log(` Cities in static list: ${CITIES.length}`);
|
||||
console.log(` Inserted: ${inserted}`);
|
||||
console.log(` Updated: ${updated}`);
|
||||
console.log(` Errors: ${errors}`);
|
||||
console.log(` Total in DB: ${countRows[0].total}`);
|
||||
|
||||
if (errors > 0) {
|
||||
console.log('\n Completed with errors');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log('\n Seed completed successfully');
|
||||
process.exit(0);
|
||||
} catch (error: any) {
|
||||
console.error('\n Seed failed:', error.message);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
166
backend/src/scripts/seed-dt-city.ts
Normal file
166
backend/src/scripts/seed-dt-city.ts
Normal file
@@ -0,0 +1,166 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
/**
|
||||
* Seed Dutchie City for Discovery
|
||||
*
|
||||
* Manually seeds a city into dutchie_discovery_cities for location discovery.
|
||||
* Use this when /cities scraping is blocked (403) and you need to manually add cities.
|
||||
*
|
||||
* Usage:
|
||||
* npm run seed:platforms:dt:city -- --city-slug=ny-hudson --city-name=Hudson --state-code=NY
|
||||
* npm run seed:platforms:dt:city -- --city-slug=ma-boston --city-name=Boston --state-code=MA --country-code=US
|
||||
*
|
||||
* Options:
|
||||
* --city-slug Required. URL slug for the city (e.g., "ny-hudson")
|
||||
* --city-name Required. Display name (e.g., "Hudson")
|
||||
* --state-code Required. State/province code (e.g., "NY", "CA", "ON")
|
||||
* --country-code Optional. Country code (default: "US")
|
||||
*
|
||||
* After seeding, run location discovery:
|
||||
* npm run discovery:platforms:dt:locations
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
|
||||
const DB_URL = process.env.DATABASE_URL || process.env.CANNAIQ_DB_URL ||
|
||||
'postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus';
|
||||
|
||||
interface Args {
|
||||
citySlug?: string;
|
||||
cityName?: string;
|
||||
stateCode?: string;
|
||||
countryCode: string;
|
||||
}
|
||||
|
||||
function parseArgs(): Args {
|
||||
const args: Args = { countryCode: 'US' };
|
||||
|
||||
for (const arg of process.argv.slice(2)) {
|
||||
const citySlugMatch = arg.match(/--city-slug=(.+)/);
|
||||
if (citySlugMatch) args.citySlug = citySlugMatch[1];
|
||||
|
||||
const cityNameMatch = arg.match(/--city-name=(.+)/);
|
||||
if (cityNameMatch) args.cityName = cityNameMatch[1];
|
||||
|
||||
const stateCodeMatch = arg.match(/--state-code=(.+)/);
|
||||
if (stateCodeMatch) args.stateCode = stateCodeMatch[1].toUpperCase();
|
||||
|
||||
const countryCodeMatch = arg.match(/--country-code=(.+)/);
|
||||
if (countryCodeMatch) args.countryCode = countryCodeMatch[1].toUpperCase();
|
||||
}
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
function printUsage() {
|
||||
console.log(`
|
||||
Usage:
|
||||
npm run seed:platforms:dt:city -- --city-slug=<slug> --city-name=<name> --state-code=<state>
|
||||
|
||||
Required arguments:
|
||||
--city-slug URL slug for the city (e.g., "ny-hudson", "ma-boston")
|
||||
--city-name Display name (e.g., "Hudson", "Boston")
|
||||
--state-code State/province code (e.g., "NY", "CA", "ON")
|
||||
|
||||
Optional arguments:
|
||||
--country-code Country code (default: "US")
|
||||
|
||||
Examples:
|
||||
npm run seed:platforms:dt:city -- --city-slug=ny-hudson --city-name=Hudson --state-code=NY
|
||||
npm run seed:platforms:dt:city -- --city-slug=ca-los-angeles --city-name="Los Angeles" --state-code=CA
|
||||
npm run seed:platforms:dt:city -- --city-slug=on-toronto --city-name=Toronto --state-code=ON --country-code=CA
|
||||
`);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const args = parseArgs();
|
||||
|
||||
console.log('╔══════════════════════════════════════════════════╗');
|
||||
console.log('║ Seed Dutchie City for Discovery ║');
|
||||
console.log('╚══════════════════════════════════════════════════╝');
|
||||
|
||||
// Validate required args
|
||||
if (!args.citySlug || !args.cityName || !args.stateCode) {
|
||||
console.error('\n❌ Error: Missing required arguments\n');
|
||||
printUsage();
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`\nCity Slug: ${args.citySlug}`);
|
||||
console.log(`City Name: ${args.cityName}`);
|
||||
console.log(`State Code: ${args.stateCode}`);
|
||||
console.log(`Country Code: ${args.countryCode}`);
|
||||
console.log(`Database: ${DB_URL.replace(/:[^:@]+@/, ':****@')}`);
|
||||
|
||||
const pool = new Pool({ connectionString: DB_URL });
|
||||
|
||||
try {
|
||||
// Test DB connection
|
||||
const { rows: connTest } = await pool.query('SELECT NOW() as time');
|
||||
console.log(`\nConnected at: ${connTest[0].time}`);
|
||||
|
||||
// Upsert the city
|
||||
const { rows, rowCount } = await pool.query(`
|
||||
INSERT INTO dutchie_discovery_cities (
|
||||
platform,
|
||||
city_slug,
|
||||
city_name,
|
||||
state_code,
|
||||
country_code,
|
||||
crawl_enabled,
|
||||
created_at,
|
||||
updated_at
|
||||
) VALUES (
|
||||
'dutchie',
|
||||
$1,
|
||||
$2,
|
||||
$3,
|
||||
$4,
|
||||
TRUE,
|
||||
NOW(),
|
||||
NOW()
|
||||
)
|
||||
ON CONFLICT (platform, country_code, state_code, city_slug)
|
||||
DO UPDATE SET
|
||||
city_name = EXCLUDED.city_name,
|
||||
crawl_enabled = TRUE,
|
||||
updated_at = NOW()
|
||||
RETURNING id, city_slug, city_name, state_code, country_code, crawl_enabled,
|
||||
(xmax = 0) AS was_inserted
|
||||
`, [args.citySlug, args.cityName, args.stateCode, args.countryCode]);
|
||||
|
||||
if (rows.length > 0) {
|
||||
const row = rows[0];
|
||||
const action = row.was_inserted ? 'INSERTED' : 'UPDATED';
|
||||
console.log(`\n✅ City ${action}:`);
|
||||
console.log(` ID: ${row.id}`);
|
||||
console.log(` City Slug: ${row.city_slug}`);
|
||||
console.log(` City Name: ${row.city_name}`);
|
||||
console.log(` State Code: ${row.state_code}`);
|
||||
console.log(` Country Code: ${row.country_code}`);
|
||||
console.log(` Crawl Enabled: ${row.crawl_enabled}`);
|
||||
}
|
||||
|
||||
// Show current city count
|
||||
const { rows: countRows } = await pool.query(`
|
||||
SELECT
|
||||
COUNT(*) as total,
|
||||
COUNT(*) FILTER (WHERE crawl_enabled = TRUE) as enabled
|
||||
FROM dutchie_discovery_cities
|
||||
WHERE platform = 'dutchie'
|
||||
`);
|
||||
|
||||
console.log(`\nTotal Dutchie cities: ${countRows[0].total} (${countRows[0].enabled} enabled)`);
|
||||
|
||||
console.log('\n📍 Next step: Run location discovery');
|
||||
console.log(' npm run discovery:platforms:dt:locations');
|
||||
|
||||
process.exit(0);
|
||||
} catch (error: any) {
|
||||
console.error('\n❌ Failed to seed city:', error.message);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
325
backend/src/scripts/system-smoke-test.ts
Normal file
325
backend/src/scripts/system-smoke-test.ts
Normal file
@@ -0,0 +1,325 @@
|
||||
/**
|
||||
* System Smoke Test
|
||||
*
|
||||
* Validates core CannaiQ system components:
|
||||
* - Database connectivity
|
||||
* - Required tables and row counts
|
||||
* - Discovery data (via direct DB query)
|
||||
* - Analytics V2 services (via direct service calls)
|
||||
* - Orchestrator route (via HTTP)
|
||||
*
|
||||
* Usage: npm run system:smoke-test
|
||||
* Exit codes: 0 = success, 1 = failure
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import axios from 'axios';
|
||||
|
||||
// Configuration
|
||||
const API_BASE = process.env.API_BASE_URL || 'http://localhost:3010';
|
||||
const DB_URL = process.env.DATABASE_URL || process.env.CANNAIQ_DB_URL ||
|
||||
'postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus';
|
||||
|
||||
// Test results tracking
|
||||
interface TestResult {
|
||||
name: string;
|
||||
passed: boolean;
|
||||
message: string;
|
||||
details?: any;
|
||||
}
|
||||
|
||||
const results: TestResult[] = [];
|
||||
let hasFailure = false;
|
||||
|
||||
function pass(name: string, message: string, details?: any) {
|
||||
results.push({ name, passed: true, message, details });
|
||||
console.log(` ✓ PASS: ${name} - ${message}`);
|
||||
}
|
||||
|
||||
function fail(name: string, message: string, details?: any) {
|
||||
results.push({ name, passed: false, message, details });
|
||||
console.log(` ✗ FAIL: ${name} - ${message}`);
|
||||
hasFailure = true;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// DATABASE TESTS
|
||||
// ============================================================
|
||||
|
||||
async function testDatabaseConnection(pool: Pool): Promise<boolean> {
|
||||
console.log('\n[1/4] DATABASE CONNECTION');
|
||||
console.log('─'.repeat(50));
|
||||
|
||||
try {
|
||||
const result = await pool.query('SELECT NOW() as time, current_database() as db');
|
||||
const { time, db } = result.rows[0];
|
||||
pass('DB Connection', `Connected to ${db} at ${time}`);
|
||||
return true;
|
||||
} catch (error: any) {
|
||||
fail('DB Connection', `Failed: ${error.message}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function testRequiredTables(pool: Pool): Promise<void> {
|
||||
console.log('\n[2/4] REQUIRED TABLES');
|
||||
console.log('─'.repeat(50));
|
||||
|
||||
const tables = [
|
||||
'states',
|
||||
'dispensaries',
|
||||
'store_products',
|
||||
'store_product_snapshots',
|
||||
'crawl_runs',
|
||||
'dutchie_discovery_cities',
|
||||
'dutchie_discovery_locations',
|
||||
];
|
||||
|
||||
for (const table of tables) {
|
||||
try {
|
||||
const result = await pool.query(`SELECT COUNT(*) as count FROM ${table}`);
|
||||
const count = parseInt(result.rows[0].count, 10);
|
||||
pass(`Table: ${table}`, `${count.toLocaleString()} rows`);
|
||||
} catch (error: any) {
|
||||
if (error.code === '42P01') {
|
||||
fail(`Table: ${table}`, 'Table does not exist');
|
||||
} else {
|
||||
fail(`Table: ${table}`, `Query failed: ${error.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// DISCOVERY DATA TESTS (Direct DB)
|
||||
// ============================================================
|
||||
|
||||
async function testDiscoveryData(pool: Pool): Promise<void> {
|
||||
console.log('\n[3/4] DISCOVERY DATA (Direct DB Query)');
|
||||
console.log('─'.repeat(50));
|
||||
|
||||
// Test discovery summary via direct query
|
||||
try {
|
||||
const { rows: statusRows } = await pool.query(`
|
||||
SELECT status, COUNT(*) as cnt
|
||||
FROM dutchie_discovery_locations
|
||||
WHERE platform = 'dutchie' AND active = TRUE
|
||||
GROUP BY status
|
||||
`);
|
||||
|
||||
const statusCounts: Record<string, number> = {};
|
||||
let totalLocations = 0;
|
||||
for (const row of statusRows) {
|
||||
statusCounts[row.status] = parseInt(row.cnt, 10);
|
||||
totalLocations += parseInt(row.cnt, 10);
|
||||
}
|
||||
|
||||
pass('Discovery Summary', `${totalLocations} total locations`, {
|
||||
discovered: statusCounts['discovered'] || 0,
|
||||
verified: statusCounts['verified'] || 0,
|
||||
merged: statusCounts['merged'] || 0,
|
||||
rejected: statusCounts['rejected'] || 0,
|
||||
});
|
||||
} catch (error: any) {
|
||||
if (error.code === '42P01') {
|
||||
fail('Discovery Summary', 'Table dutchie_discovery_locations does not exist');
|
||||
} else {
|
||||
fail('Discovery Summary', `Query failed: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Test discovery locations query
|
||||
try {
|
||||
const { rows } = await pool.query(`
|
||||
SELECT id, name, state_code, status
|
||||
FROM dutchie_discovery_locations
|
||||
WHERE platform = 'dutchie' AND active = TRUE
|
||||
ORDER BY id DESC
|
||||
LIMIT 1
|
||||
`);
|
||||
|
||||
if (rows.length > 0) {
|
||||
pass('Discovery Locations', `Found location: ${rows[0].name} (${rows[0].state_code})`);
|
||||
} else {
|
||||
pass('Discovery Locations', 'Query succeeded, 0 locations found');
|
||||
}
|
||||
} catch (error: any) {
|
||||
if (error.code === '42P01') {
|
||||
fail('Discovery Locations', 'Table dutchie_discovery_locations does not exist');
|
||||
} else {
|
||||
fail('Discovery Locations', `Query failed: ${error.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// ANALYTICS V2 SERVICE TESTS (Direct Service Calls)
|
||||
// ============================================================
|
||||
|
||||
async function testAnalyticsV2Services(pool: Pool): Promise<void> {
|
||||
console.log('\n[4/4] ANALYTICS V2 (Direct Service Calls)');
|
||||
console.log('─'.repeat(50));
|
||||
|
||||
// Test: State Legal Breakdown
|
||||
try {
|
||||
// Recreational states
|
||||
const { rows: recRows } = await pool.query(`
|
||||
SELECT code FROM states
|
||||
WHERE recreational_legal = TRUE
|
||||
ORDER BY code
|
||||
`);
|
||||
|
||||
// Medical-only states
|
||||
const { rows: medRows } = await pool.query(`
|
||||
SELECT code FROM states
|
||||
WHERE medical_legal = TRUE
|
||||
AND (recreational_legal = FALSE OR recreational_legal IS NULL)
|
||||
ORDER BY code
|
||||
`);
|
||||
|
||||
// No-program states
|
||||
const { rows: noProgramRows } = await pool.query(`
|
||||
SELECT code FROM states
|
||||
WHERE (recreational_legal = FALSE OR recreational_legal IS NULL)
|
||||
AND (medical_legal = FALSE OR medical_legal IS NULL)
|
||||
ORDER BY code
|
||||
`);
|
||||
|
||||
const breakdown = {
|
||||
recreational: recRows.length,
|
||||
medical_only: medRows.length,
|
||||
no_program: noProgramRows.length,
|
||||
};
|
||||
|
||||
pass('State Legal Breakdown', `rec=${breakdown.recreational}, med=${breakdown.medical_only}, none=${breakdown.no_program}`);
|
||||
} catch (error: any) {
|
||||
fail('State Legal Breakdown', `Query failed: ${error.message}`);
|
||||
}
|
||||
|
||||
// Test: Recreational States
|
||||
try {
|
||||
const { rows } = await pool.query(`
|
||||
SELECT code FROM states
|
||||
WHERE recreational_legal = TRUE
|
||||
ORDER BY code
|
||||
`);
|
||||
const states = rows.map((r: any) => r.code);
|
||||
pass('Recreational States', `${states.length} states: ${states.slice(0, 5).join(', ')}${states.length > 5 ? '...' : ''}`);
|
||||
} catch (error: any) {
|
||||
fail('Recreational States', `Query failed: ${error.message}`);
|
||||
}
|
||||
|
||||
// Test: Medical-Only States
|
||||
try {
|
||||
const { rows } = await pool.query(`
|
||||
SELECT code FROM states
|
||||
WHERE medical_legal = TRUE
|
||||
AND (recreational_legal = FALSE OR recreational_legal IS NULL)
|
||||
ORDER BY code
|
||||
`);
|
||||
const states = rows.map((r: any) => r.code);
|
||||
pass('Medical-Only States', `${states.length} states: ${states.slice(0, 5).join(', ')}${states.length > 5 ? '...' : ''}`);
|
||||
} catch (error: any) {
|
||||
fail('Medical-Only States', `Query failed: ${error.message}`);
|
||||
}
|
||||
|
||||
// Test orchestrator route via HTTP (dry run)
|
||||
console.log('\n[4b/4] ORCHESTRATOR ROUTE (HTTP)');
|
||||
console.log('─'.repeat(50));
|
||||
|
||||
try {
|
||||
const response = await axios.post(
|
||||
`${API_BASE}/api/orchestrator/platforms/dt/promote/0`,
|
||||
{},
|
||||
{ timeout: 10000 }
|
||||
);
|
||||
// ID 0 should fail gracefully
|
||||
if (response.status === 400 || response.status === 404) {
|
||||
pass('Orchestrator Promote (dry)', `Route exists, returned ${response.status} for invalid ID`);
|
||||
} else if (response.status === 200 && response.data.success === false) {
|
||||
pass('Orchestrator Promote (dry)', 'Route exists, gracefully rejected ID 0');
|
||||
} else {
|
||||
pass('Orchestrator Promote (dry)', `Route exists, status ${response.status}`);
|
||||
}
|
||||
} catch (error: any) {
|
||||
if (error.response?.status === 400 || error.response?.status === 404) {
|
||||
pass('Orchestrator Promote (dry)', `Route exists, returned ${error.response.status} for invalid ID`);
|
||||
} else {
|
||||
const msg = error.response?.status
|
||||
? `HTTP ${error.response.status}: ${error.response.data?.error || error.message}`
|
||||
: error.message;
|
||||
fail('Orchestrator Promote (dry)', msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// MAIN
|
||||
// ============================================================
|
||||
|
||||
async function main() {
|
||||
console.log('╔══════════════════════════════════════════════════╗');
|
||||
console.log('║ CannaiQ System Smoke Test ║');
|
||||
console.log('╚══════════════════════════════════════════════════╝');
|
||||
console.log(`\nAPI Base: ${API_BASE}`);
|
||||
console.log(`Database: ${DB_URL.replace(/:[^:@]+@/, ':****@')}`);
|
||||
|
||||
const pool = new Pool({ connectionString: DB_URL });
|
||||
|
||||
try {
|
||||
// 1. Database connection
|
||||
const dbConnected = await testDatabaseConnection(pool);
|
||||
|
||||
// 2. Required tables (only if DB connected)
|
||||
if (dbConnected) {
|
||||
await testRequiredTables(pool);
|
||||
} else {
|
||||
console.log('\n[2/4] REQUIRED TABLES - SKIPPED (no DB connection)');
|
||||
}
|
||||
|
||||
// 3. Discovery data (direct DB - only if DB connected)
|
||||
if (dbConnected) {
|
||||
await testDiscoveryData(pool);
|
||||
} else {
|
||||
console.log('\n[3/4] DISCOVERY DATA - SKIPPED (no DB connection)');
|
||||
}
|
||||
|
||||
// 4. Analytics V2 services (direct DB + orchestrator HTTP)
|
||||
if (dbConnected) {
|
||||
await testAnalyticsV2Services(pool);
|
||||
} else {
|
||||
console.log('\n[4/4] ANALYTICS V2 - SKIPPED (no DB connection)');
|
||||
}
|
||||
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
|
||||
// Summary
|
||||
console.log('\n' + '═'.repeat(50));
|
||||
console.log('SUMMARY');
|
||||
console.log('═'.repeat(50));
|
||||
|
||||
const passed = results.filter(r => r.passed).length;
|
||||
const failed = results.filter(r => !r.passed).length;
|
||||
const total = results.length;
|
||||
|
||||
console.log(`\nTotal: ${total} | Passed: ${passed} | Failed: ${failed}`);
|
||||
|
||||
if (hasFailure) {
|
||||
console.log('\n❌ SMOKE TEST FAILED\n');
|
||||
console.log('Failed tests:');
|
||||
results.filter(r => !r.passed).forEach(r => {
|
||||
console.log(` - ${r.name}: ${r.message}`);
|
||||
});
|
||||
process.exit(1);
|
||||
} else {
|
||||
console.log('\n✅ SMOKE TEST PASSED\n');
|
||||
process.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
console.error('\n❌ SMOKE TEST CRASHED:', error.message);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user