#!/usr/bin/env npx tsx /** * Test Script: Crawl a single dispensary and write to canonical tables * * This script: * 1. Fetches products from Dutchie GraphQL * 2. Normalizes via DutchieNormalizer * 3. Writes to store_products, product_variants, snapshots via hydrateToCanonical * * Usage: * npx tsx src/scripts/test-crawl-to-canonical.ts * npx tsx src/scripts/test-crawl-to-canonical.ts 235 */ import { Pool } from 'pg'; import dotenv from 'dotenv'; import { executeGraphQL, GRAPHQL_HASHES, DUTCHIE_CONFIG, } from '../platforms/dutchie'; import { DutchieNormalizer, hydrateToCanonical, } from '../hydration'; import { initializeImageStorage } from '../utils/image-storage'; dotenv.config(); // ============================================================ // DATABASE CONNECTION // ============================================================ function getConnectionString(): string { if (process.env.CANNAIQ_DB_URL) { return process.env.CANNAIQ_DB_URL; } if (process.env.DATABASE_URL) { return process.env.DATABASE_URL; } const host = process.env.CANNAIQ_DB_HOST || 'localhost'; const port = process.env.CANNAIQ_DB_PORT || '54320'; const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus'; const user = process.env.CANNAIQ_DB_USER || 'dutchie'; const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass'; return `postgresql://${user}:${pass}@${host}:${port}/${name}`; } const pool = new Pool({ connectionString: getConnectionString() }); // ============================================================ // FETCH PRODUCTS FROM DUTCHIE // ============================================================ interface FetchResult { products: any[]; totalPages: number; totalProducts: number; } async function fetchAllProducts(platformDispensaryId: string, cName: string): Promise { const allProducts: any[] = []; let page = 0; let totalPages = 1; let totalProducts = 0; console.log(`[Fetch] Starting fetch for ${platformDispensaryId} (cName: ${cName})`); while (page < totalPages && page < DUTCHIE_CONFIG.maxPages) { const variables = { includeEnterpriseSpecials: false, productsFilter: { dispensaryId: platformDispensaryId, pricingType: 'rec', Status: 'Active', // 'Active' = in-stock products with pricing types: [], useCache: true, isDefaultSort: true, sortBy: 'popularSortIdx', sortDirection: 1, bypassOnlineThresholds: true, isKioskMenu: false, removeProductsBelowOptionThresholds: false, }, page, perPage: DUTCHIE_CONFIG.perPage, }; try { const result = await executeGraphQL( 'FilteredProducts', variables, GRAPHQL_HASHES.FilteredProducts, { cName, maxRetries: 3 } ); const data = result?.data?.filteredProducts; if (!data) { console.error(`[Fetch] No data returned for page ${page}`); break; } const products = data.products || []; totalProducts = data.queryInfo?.totalCount || 0; totalPages = Math.ceil(totalProducts / DUTCHIE_CONFIG.perPage); allProducts.push(...products); console.log(`[Fetch] Page ${page + 1}/${totalPages}: ${products.length} products (total so far: ${allProducts.length})`); page++; if (page < totalPages) { await new Promise(r => setTimeout(r, DUTCHIE_CONFIG.pageDelayMs)); } } catch (error: any) { console.error(`[Fetch] Error on page ${page}: ${error.message}`); break; } } return { products: allProducts, totalPages, totalProducts }; } // ============================================================ // MAIN // ============================================================ async function main() { const dispensaryId = parseInt(process.argv[2], 10); if (!dispensaryId) { console.error('Usage: npx tsx src/scripts/test-crawl-to-canonical.ts '); console.error('Example: npx tsx src/scripts/test-crawl-to-canonical.ts 235'); process.exit(1); } console.log('============================================================'); console.log(`Test Crawl to Canonical - Dispensary ${dispensaryId}`); console.log('============================================================\n'); // Initialize image storage console.log('[Init] Initializing image storage...'); await initializeImageStorage(); console.log(' Image storage ready\n'); try { // Step 1: Get dispensary info console.log('[Step 1] Getting dispensary info...'); const dispResult = await pool.query(` SELECT id, name, platform_dispensary_id, menu_url FROM dispensaries WHERE id = $1 `, [dispensaryId]); if (dispResult.rows.length === 0) { throw new Error(`Dispensary ${dispensaryId} not found`); } const disp = dispResult.rows[0]; console.log(` Name: ${disp.name}`); console.log(` Platform ID: ${disp.platform_dispensary_id}`); console.log(` Menu URL: ${disp.menu_url}`); if (!disp.platform_dispensary_id) { throw new Error('Dispensary does not have a platform_dispensary_id'); } // Extract cName from menu_url const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/); const cName = cNameMatch ? cNameMatch[1] : 'dispensary'; console.log(` cName: ${cName}\n`); // Step 2: Fetch products from Dutchie console.log('[Step 2] Fetching products from Dutchie GraphQL...'); const fetchResult = await fetchAllProducts(disp.platform_dispensary_id, cName); console.log(` Total products fetched: ${fetchResult.products.length}\n`); if (fetchResult.products.length === 0) { console.log('No products fetched. Exiting.'); process.exit(0); } // Step 3: Normalize console.log('[Step 3] Normalizing products...'); const normalizer = new DutchieNormalizer(); // Construct a RawPayload structure that the normalizer expects // The normalizer.normalize() expects: { raw_json, dispensary_id, ... } const rawPayloadForValidation = { products: fetchResult.products, queryInfo: { totalCount: fetchResult.totalProducts, }, }; const validation = normalizer.validatePayload(rawPayloadForValidation); if (!validation.valid) { console.error(` Validation failed: ${validation.errors?.join(', ')}`); process.exit(1); } console.log(` Validation: PASS`); // Build proper RawPayload for normalize() const rawPayload = { id: `test-${Date.now()}`, dispensary_id: dispensaryId, crawl_run_id: null, platform: 'dutchie', payload_version: 1, raw_json: rawPayloadForValidation, product_count: fetchResult.totalProducts, pricing_type: 'rec', crawl_mode: 'active', fetched_at: new Date(), processed: false, normalized_at: null, hydration_error: null, hydration_attempts: 0, created_at: new Date(), }; const normResult = normalizer.normalize(rawPayload); console.log(` Normalized products: ${normResult.products.length}`); console.log(` Brands extracted: ${normResult.brands.length}`); console.log(` Sample product: ${normResult.products[0]?.name}\n`); // Step 4: Write to canonical tables console.log('[Step 4] Writing to canonical tables via hydrateToCanonical...'); const hydrateResult = await hydrateToCanonical( pool, dispensaryId, normResult, null // no crawl_run_id for this test ); console.log(` Products upserted: ${hydrateResult.productsUpserted}`); console.log(` Products new: ${hydrateResult.productsNew}`); console.log(` Snapshots created: ${hydrateResult.snapshotsCreated}`); console.log(` Variants upserted: ${hydrateResult.variantsUpserted}`); console.log(` Brands created: ${hydrateResult.brandsCreated}\n`); // Step 5: Verify console.log('[Step 5] Verifying data in canonical tables...'); const productCount = await pool.query(` SELECT COUNT(*) as count FROM store_products WHERE dispensary_id = $1 `, [dispensaryId]); console.log(` store_products count: ${productCount.rows[0].count}`); const variantCount = await pool.query(` SELECT COUNT(*) as count FROM product_variants WHERE dispensary_id = $1 `, [dispensaryId]); console.log(` product_variants count: ${variantCount.rows[0].count}`); const snapshotCount = await pool.query(` SELECT COUNT(*) as count FROM store_product_snapshots WHERE dispensary_id = $1 `, [dispensaryId]); console.log(` store_product_snapshots count: ${snapshotCount.rows[0].count}`); console.log('\n============================================================'); console.log('SUCCESS - Crawl and hydration complete!'); console.log('============================================================'); } catch (error: any) { console.error('\n============================================================'); console.error('ERROR:', error.message); console.error('============================================================'); if (error.stack) { console.error(error.stack); } process.exit(1); } finally { await pool.end(); } } main();