#!/usr/bin/env npx tsx /** * Crawl Single Store - Verbose test showing each step * * Usage: * DATABASE_URL="postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \ * npx tsx src/scripts/crawl-single-store.ts * * Example: * DATABASE_URL="..." npx tsx src/scripts/crawl-single-store.ts 112 */ import { Pool } from 'pg'; import dotenv from 'dotenv'; import { executeGraphQL, startSession, endSession, setCrawlRotator, GRAPHQL_HASHES, DUTCHIE_CONFIG, } from '../platforms/dutchie'; import { CrawlRotator } from '../services/crawl-rotator'; dotenv.config(); // ============================================================ // DATABASE CONNECTION // ============================================================ function getConnectionString(): string { if (process.env.DATABASE_URL) { return process.env.DATABASE_URL; } if (process.env.CANNAIQ_DB_URL) { return process.env.CANNAIQ_DB_URL; } const host = process.env.CANNAIQ_DB_HOST || 'localhost'; const port = process.env.CANNAIQ_DB_PORT || '54320'; const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus'; const user = process.env.CANNAIQ_DB_USER || 'dutchie'; const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass'; return `postgresql://${user}:${pass}@${host}:${port}/${name}`; } const pool = new Pool({ connectionString: getConnectionString() }); // ============================================================ // MAIN // ============================================================ async function main() { const dispensaryId = parseInt(process.argv[2], 10); if (!dispensaryId) { console.error('Usage: npx tsx src/scripts/crawl-single-store.ts '); console.error('Example: npx tsx src/scripts/crawl-single-store.ts 112'); process.exit(1); } console.log(''); console.log('╔════════════════════════════════════════════════════════════╗'); console.log('║ SINGLE STORE CRAWL - VERBOSE OUTPUT ║'); console.log('╚════════════════════════════════════════════════════════════╝'); console.log(''); try { // ============================================================ // STEP 1: Get dispensary info from database // ============================================================ console.log('┌─────────────────────────────────────────────────────────────┐'); console.log('│ STEP 1: Load Dispensary Info from Database │'); console.log('└─────────────────────────────────────────────────────────────┘'); const dispResult = await pool.query(` SELECT id, name, platform_dispensary_id, menu_url, menu_type, city, state FROM dispensaries WHERE id = $1 `, [dispensaryId]); if (dispResult.rows.length === 0) { throw new Error(`Dispensary ${dispensaryId} not found`); } const disp = dispResult.rows[0]; console.log(` Dispensary ID: ${disp.id}`); console.log(` Name: ${disp.name}`); console.log(` City, State: ${disp.city}, ${disp.state}`); console.log(` Menu Type: ${disp.menu_type}`); console.log(` Platform ID: ${disp.platform_dispensary_id}`); console.log(` Menu URL: ${disp.menu_url}`); if (!disp.platform_dispensary_id) { throw new Error('Dispensary does not have a platform_dispensary_id - cannot crawl'); } // Extract cName from menu_url const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/); const cName = cNameMatch ? cNameMatch[1] : 'dispensary'; console.log(` cName (derived): ${cName}`); console.log(''); // ============================================================ // STEP 2: Start stealth session // Per workflow-12102025.md: Initialize CrawlRotator and start session with menuUrl // ============================================================ console.log('┌─────────────────────────────────────────────────────────────┐'); console.log('│ STEP 2: Start Stealth Session │'); console.log('└─────────────────────────────────────────────────────────────┘'); // Per workflow-12102025.md: Initialize CrawlRotator (required for sessions) const rotator = new CrawlRotator(); setCrawlRotator(rotator); // Per workflow-12102025.md: startSession takes menuUrl for dynamic Referer const session = startSession(disp.menu_url); const fp = session.fingerprint; console.log(` Session ID: ${session.sessionId}`); console.log(` Browser: ${fp.browserName} (${fp.deviceCategory})`); console.log(` User-Agent: ${fp.userAgent.slice(0, 60)}...`); console.log(` Accept-Language: ${fp.acceptLanguage}`); console.log(` Referer: ${session.referer}`); console.log(` DNT: ${fp.httpFingerprint.hasDNT ? 'enabled' : 'disabled'}`); console.log(` TLS: ${fp.httpFingerprint.curlImpersonateBinary}`); console.log(''); // ============================================================ // STEP 3: Execute GraphQL query // ============================================================ console.log('┌─────────────────────────────────────────────────────────────┐'); console.log('│ STEP 3: Execute GraphQL Query (FilteredProducts) │'); console.log('└─────────────────────────────────────────────────────────────┘'); const variables = { includeEnterpriseSpecials: false, productsFilter: { dispensaryId: disp.platform_dispensary_id, pricingType: 'rec', Status: 'Active', types: [], useCache: true, isDefaultSort: true, sortBy: 'popularSortIdx', sortDirection: 1, bypassOnlineThresholds: true, isKioskMenu: false, removeProductsBelowOptionThresholds: false, }, page: 0, perPage: 100, }; console.log(` Endpoint: ${DUTCHIE_CONFIG.graphqlEndpoint}`); console.log(` Operation: FilteredProducts`); console.log(` Hash: ${GRAPHQL_HASHES.FilteredProducts.slice(0, 20)}...`); console.log(` dispensaryId: ${variables.productsFilter.dispensaryId}`); console.log(` pricingType: ${variables.productsFilter.pricingType}`); console.log(` Status: ${variables.productsFilter.Status}`); console.log(` perPage: ${variables.perPage}`); console.log(''); console.log(' Sending request...'); const startTime = Date.now(); const result = await executeGraphQL( 'FilteredProducts', variables, GRAPHQL_HASHES.FilteredProducts, { cName, maxRetries: 3 } ); const elapsed = Date.now() - startTime; console.log(` Response time: ${elapsed}ms`); console.log(''); // ============================================================ // STEP 4: Process response // ============================================================ console.log('┌─────────────────────────────────────────────────────────────┐'); console.log('│ STEP 4: Process Response │'); console.log('└─────────────────────────────────────────────────────────────┘'); const data = result?.data?.filteredProducts; if (!data) { console.log(' ERROR: No data returned from GraphQL'); console.log(' Raw result:', JSON.stringify(result, null, 2).slice(0, 500)); endSession(); return; } const products = data.products || []; const totalCount = data.queryInfo?.totalCount || 0; const totalPages = Math.ceil(totalCount / 100); console.log(` Total products: ${totalCount}`); console.log(` Products in page: ${products.length}`); console.log(` Total pages: ${totalPages}`); console.log(''); // Show first few products console.log(' First 5 products:'); console.log(' ─────────────────────────────────────────────────────────'); for (let i = 0; i < Math.min(5, products.length); i++) { const p = products[i]; const name = (p.name || 'Unknown').slice(0, 40); const brand = (p.brand?.name || 'Unknown').slice(0, 15); const price = p.Prices?.[0]?.price || p.medPrice || p.recPrice || 'N/A'; const category = p.type || p.category || 'N/A'; console.log(` ${i + 1}. ${name.padEnd(42)} | ${brand.padEnd(17)} | $${price}`); } console.log(''); // ============================================================ // STEP 5: End session // ============================================================ console.log('┌─────────────────────────────────────────────────────────────┐'); console.log('│ STEP 5: End Session │'); console.log('└─────────────────────────────────────────────────────────────┘'); endSession(); console.log(''); // ============================================================ // SUMMARY // ============================================================ console.log('╔════════════════════════════════════════════════════════════╗'); console.log('║ SUMMARY ║'); console.log('╠════════════════════════════════════════════════════════════╣'); console.log(`║ Store: ${disp.name.slice(0, 38).padEnd(38)} ║`); console.log(`║ Products Found: ${String(totalCount).padEnd(38)} ║`); console.log(`║ Response Time: ${(elapsed + 'ms').padEnd(38)} ║`); console.log(`║ Status: ${'SUCCESS'.padEnd(38)} ║`); console.log('╚════════════════════════════════════════════════════════════╝'); } catch (error: any) { console.error(''); console.error('╔════════════════════════════════════════════════════════════╗'); console.error('║ ERROR ║'); console.error('╚════════════════════════════════════════════════════════════╝'); console.error(` ${error.message}`); if (error.stack) { console.error(''); console.error('Stack trace:'); console.error(error.stack.split('\n').slice(0, 5).join('\n')); } process.exit(1); } finally { await pool.end(); } } main();