- Store product images locally with hierarchy: /images/products/<state>/<store>/<brand>/<product>/ - Add /img/* proxy endpoint for on-demand resizing via Sharp - Implement per-product image checking to skip existing downloads - Fix pathToUrl() to correctly generate /images/... URLs - Add frontend getImageUrl() helper with preset sizes (thumb, medium, large) - Update all product pages to use optimized image URLs - Add stealth session support for Dutchie GraphQL crawls - Include test scripts for crawl and image verification 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
278 lines
9.1 KiB
TypeScript
278 lines
9.1 KiB
TypeScript
#!/usr/bin/env npx tsx
|
|
/**
|
|
* Test Script: Crawl a single dispensary and write to canonical tables
|
|
*
|
|
* This script:
|
|
* 1. Fetches products from Dutchie GraphQL
|
|
* 2. Normalizes via DutchieNormalizer
|
|
* 3. Writes to store_products, product_variants, snapshots via hydrateToCanonical
|
|
*
|
|
* Usage:
|
|
* npx tsx src/scripts/test-crawl-to-canonical.ts <dispensaryId>
|
|
* npx tsx src/scripts/test-crawl-to-canonical.ts 235
|
|
*/
|
|
|
|
import { Pool } from 'pg';
|
|
import dotenv from 'dotenv';
|
|
import {
|
|
executeGraphQL,
|
|
GRAPHQL_HASHES,
|
|
DUTCHIE_CONFIG,
|
|
} from '../platforms/dutchie';
|
|
import {
|
|
DutchieNormalizer,
|
|
hydrateToCanonical,
|
|
} from '../hydration';
|
|
import { initializeImageStorage } from '../utils/image-storage';
|
|
|
|
dotenv.config();
|
|
|
|
// ============================================================
|
|
// DATABASE CONNECTION
|
|
// ============================================================
|
|
|
|
function getConnectionString(): string {
|
|
if (process.env.CANNAIQ_DB_URL) {
|
|
return process.env.CANNAIQ_DB_URL;
|
|
}
|
|
if (process.env.DATABASE_URL) {
|
|
return process.env.DATABASE_URL;
|
|
}
|
|
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
|
|
const port = process.env.CANNAIQ_DB_PORT || '54320';
|
|
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
|
|
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
|
|
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
|
|
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
|
|
}
|
|
|
|
const pool = new Pool({ connectionString: getConnectionString() });
|
|
|
|
// ============================================================
|
|
// FETCH PRODUCTS FROM DUTCHIE
|
|
// ============================================================
|
|
|
|
interface FetchResult {
|
|
products: any[];
|
|
totalPages: number;
|
|
totalProducts: number;
|
|
}
|
|
|
|
async function fetchAllProducts(platformDispensaryId: string, cName: string): Promise<FetchResult> {
|
|
const allProducts: any[] = [];
|
|
let page = 0;
|
|
let totalPages = 1;
|
|
let totalProducts = 0;
|
|
|
|
console.log(`[Fetch] Starting fetch for ${platformDispensaryId} (cName: ${cName})`);
|
|
|
|
while (page < totalPages && page < DUTCHIE_CONFIG.maxPages) {
|
|
const variables = {
|
|
includeEnterpriseSpecials: false,
|
|
productsFilter: {
|
|
dispensaryId: platformDispensaryId,
|
|
pricingType: 'rec',
|
|
Status: 'Active', // 'Active' = in-stock products with pricing
|
|
types: [],
|
|
useCache: true,
|
|
isDefaultSort: true,
|
|
sortBy: 'popularSortIdx',
|
|
sortDirection: 1,
|
|
bypassOnlineThresholds: true,
|
|
isKioskMenu: false,
|
|
removeProductsBelowOptionThresholds: false,
|
|
},
|
|
page,
|
|
perPage: DUTCHIE_CONFIG.perPage,
|
|
};
|
|
|
|
try {
|
|
const result = await executeGraphQL(
|
|
'FilteredProducts',
|
|
variables,
|
|
GRAPHQL_HASHES.FilteredProducts,
|
|
{ cName, maxRetries: 3 }
|
|
);
|
|
|
|
const data = result?.data?.filteredProducts;
|
|
if (!data) {
|
|
console.error(`[Fetch] No data returned for page ${page}`);
|
|
break;
|
|
}
|
|
|
|
const products = data.products || [];
|
|
totalProducts = data.queryInfo?.totalCount || 0;
|
|
totalPages = Math.ceil(totalProducts / DUTCHIE_CONFIG.perPage);
|
|
|
|
allProducts.push(...products);
|
|
console.log(`[Fetch] Page ${page + 1}/${totalPages}: ${products.length} products (total so far: ${allProducts.length})`);
|
|
|
|
page++;
|
|
|
|
if (page < totalPages) {
|
|
await new Promise(r => setTimeout(r, DUTCHIE_CONFIG.pageDelayMs));
|
|
}
|
|
} catch (error: any) {
|
|
console.error(`[Fetch] Error on page ${page}: ${error.message}`);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return { products: allProducts, totalPages, totalProducts };
|
|
}
|
|
|
|
// ============================================================
|
|
// MAIN
|
|
// ============================================================
|
|
|
|
async function main() {
|
|
const dispensaryId = parseInt(process.argv[2], 10);
|
|
|
|
if (!dispensaryId) {
|
|
console.error('Usage: npx tsx src/scripts/test-crawl-to-canonical.ts <dispensaryId>');
|
|
console.error('Example: npx tsx src/scripts/test-crawl-to-canonical.ts 235');
|
|
process.exit(1);
|
|
}
|
|
|
|
console.log('============================================================');
|
|
console.log(`Test Crawl to Canonical - Dispensary ${dispensaryId}`);
|
|
console.log('============================================================\n');
|
|
|
|
// Initialize image storage
|
|
console.log('[Init] Initializing image storage...');
|
|
await initializeImageStorage();
|
|
console.log(' Image storage ready\n');
|
|
|
|
try {
|
|
// Step 1: Get dispensary info
|
|
console.log('[Step 1] Getting dispensary info...');
|
|
const dispResult = await pool.query(`
|
|
SELECT id, name, platform_dispensary_id, menu_url
|
|
FROM dispensaries
|
|
WHERE id = $1
|
|
`, [dispensaryId]);
|
|
|
|
if (dispResult.rows.length === 0) {
|
|
throw new Error(`Dispensary ${dispensaryId} not found`);
|
|
}
|
|
|
|
const disp = dispResult.rows[0];
|
|
console.log(` Name: ${disp.name}`);
|
|
console.log(` Platform ID: ${disp.platform_dispensary_id}`);
|
|
console.log(` Menu URL: ${disp.menu_url}`);
|
|
|
|
if (!disp.platform_dispensary_id) {
|
|
throw new Error('Dispensary does not have a platform_dispensary_id');
|
|
}
|
|
|
|
// Extract cName from menu_url
|
|
const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
|
|
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
|
|
console.log(` cName: ${cName}\n`);
|
|
|
|
// Step 2: Fetch products from Dutchie
|
|
console.log('[Step 2] Fetching products from Dutchie GraphQL...');
|
|
const fetchResult = await fetchAllProducts(disp.platform_dispensary_id, cName);
|
|
console.log(` Total products fetched: ${fetchResult.products.length}\n`);
|
|
|
|
if (fetchResult.products.length === 0) {
|
|
console.log('No products fetched. Exiting.');
|
|
process.exit(0);
|
|
}
|
|
|
|
// Step 3: Normalize
|
|
console.log('[Step 3] Normalizing products...');
|
|
const normalizer = new DutchieNormalizer();
|
|
|
|
// Construct a RawPayload structure that the normalizer expects
|
|
// The normalizer.normalize() expects: { raw_json, dispensary_id, ... }
|
|
const rawPayloadForValidation = {
|
|
products: fetchResult.products,
|
|
queryInfo: {
|
|
totalCount: fetchResult.totalProducts,
|
|
},
|
|
};
|
|
|
|
const validation = normalizer.validatePayload(rawPayloadForValidation);
|
|
if (!validation.valid) {
|
|
console.error(` Validation failed: ${validation.errors?.join(', ')}`);
|
|
process.exit(1);
|
|
}
|
|
console.log(` Validation: PASS`);
|
|
|
|
// Build proper RawPayload for normalize()
|
|
const rawPayload = {
|
|
id: `test-${Date.now()}`,
|
|
dispensary_id: dispensaryId,
|
|
crawl_run_id: null,
|
|
platform: 'dutchie',
|
|
payload_version: 1,
|
|
raw_json: rawPayloadForValidation,
|
|
product_count: fetchResult.totalProducts,
|
|
pricing_type: 'rec',
|
|
crawl_mode: 'active',
|
|
fetched_at: new Date(),
|
|
processed: false,
|
|
normalized_at: null,
|
|
hydration_error: null,
|
|
hydration_attempts: 0,
|
|
created_at: new Date(),
|
|
};
|
|
|
|
const normResult = normalizer.normalize(rawPayload);
|
|
console.log(` Normalized products: ${normResult.products.length}`);
|
|
console.log(` Brands extracted: ${normResult.brands.length}`);
|
|
console.log(` Sample product: ${normResult.products[0]?.name}\n`);
|
|
|
|
// Step 4: Write to canonical tables
|
|
console.log('[Step 4] Writing to canonical tables via hydrateToCanonical...');
|
|
const hydrateResult = await hydrateToCanonical(
|
|
pool,
|
|
dispensaryId,
|
|
normResult,
|
|
null // no crawl_run_id for this test
|
|
);
|
|
|
|
console.log(` Products upserted: ${hydrateResult.productsUpserted}`);
|
|
console.log(` Products new: ${hydrateResult.productsNew}`);
|
|
console.log(` Snapshots created: ${hydrateResult.snapshotsCreated}`);
|
|
console.log(` Variants upserted: ${hydrateResult.variantsUpserted}`);
|
|
console.log(` Brands created: ${hydrateResult.brandsCreated}\n`);
|
|
|
|
// Step 5: Verify
|
|
console.log('[Step 5] Verifying data in canonical tables...');
|
|
|
|
const productCount = await pool.query(`
|
|
SELECT COUNT(*) as count FROM store_products WHERE dispensary_id = $1
|
|
`, [dispensaryId]);
|
|
console.log(` store_products count: ${productCount.rows[0].count}`);
|
|
|
|
const variantCount = await pool.query(`
|
|
SELECT COUNT(*) as count FROM product_variants WHERE dispensary_id = $1
|
|
`, [dispensaryId]);
|
|
console.log(` product_variants count: ${variantCount.rows[0].count}`);
|
|
|
|
const snapshotCount = await pool.query(`
|
|
SELECT COUNT(*) as count FROM store_product_snapshots WHERE dispensary_id = $1
|
|
`, [dispensaryId]);
|
|
console.log(` store_product_snapshots count: ${snapshotCount.rows[0].count}`);
|
|
|
|
console.log('\n============================================================');
|
|
console.log('SUCCESS - Crawl and hydration complete!');
|
|
console.log('============================================================');
|
|
|
|
} catch (error: any) {
|
|
console.error('\n============================================================');
|
|
console.error('ERROR:', error.message);
|
|
console.error('============================================================');
|
|
if (error.stack) {
|
|
console.error(error.stack);
|
|
}
|
|
process.exit(1);
|
|
} finally {
|
|
await pool.end();
|
|
}
|
|
}
|
|
|
|
main();
|