feat: SEO template library, discovery pipeline, and orchestrator enhancements
## SEO Template Library - Add complete template library with 7 page types (state, city, category, brand, product, search, regeneration) - Add Template Library tab in SEO Orchestrator with accordion-based editors - Add template preview, validation, and variable injection engine - Add API endpoints: /api/seo/templates, preview, validate, generate, regenerate ## Discovery Pipeline - Add promotion.ts for discovery location validation and promotion - Add discover-all-states.ts script for multi-state discovery - Add promotion log migration (067) - Enhance discovery routes and types ## Orchestrator & Admin - Add crawl_enabled filter to stores page - Add API permissions page - Add job queue management - Add price analytics routes - Add markets and intelligence routes - Enhance dashboard and worker monitoring ## Infrastructure - Add migrations for worker definitions, SEO settings, field alignment - Add canonical pipeline for scraper v2 - Update hydration and sync orchestrator - Enhance multi-state query service 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
271
backend/src/scripts/test-crawl-to-canonical.ts
Normal file
271
backend/src/scripts/test-crawl-to-canonical.ts
Normal file
@@ -0,0 +1,271 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
/**
|
||||
* Test Script: Crawl a single dispensary and write to canonical tables
|
||||
*
|
||||
* This script:
|
||||
* 1. Fetches products from Dutchie GraphQL
|
||||
* 2. Normalizes via DutchieNormalizer
|
||||
* 3. Writes to store_products, product_variants, snapshots via hydrateToCanonical
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx src/scripts/test-crawl-to-canonical.ts <dispensaryId>
|
||||
* npx tsx src/scripts/test-crawl-to-canonical.ts 235
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import dotenv from 'dotenv';
|
||||
import {
|
||||
executeGraphQL,
|
||||
GRAPHQL_HASHES,
|
||||
DUTCHIE_CONFIG,
|
||||
} from '../platforms/dutchie';
|
||||
import {
|
||||
DutchieNormalizer,
|
||||
hydrateToCanonical,
|
||||
} from '../hydration';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
// ============================================================
|
||||
// DATABASE CONNECTION
|
||||
// ============================================================
|
||||
|
||||
function getConnectionString(): string {
|
||||
if (process.env.CANNAIQ_DB_URL) {
|
||||
return process.env.CANNAIQ_DB_URL;
|
||||
}
|
||||
if (process.env.DATABASE_URL) {
|
||||
return process.env.DATABASE_URL;
|
||||
}
|
||||
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
|
||||
const port = process.env.CANNAIQ_DB_PORT || '54320';
|
||||
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
|
||||
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
|
||||
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
|
||||
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
|
||||
}
|
||||
|
||||
const pool = new Pool({ connectionString: getConnectionString() });
|
||||
|
||||
// ============================================================
|
||||
// FETCH PRODUCTS FROM DUTCHIE
|
||||
// ============================================================
|
||||
|
||||
interface FetchResult {
|
||||
products: any[];
|
||||
totalPages: number;
|
||||
totalProducts: number;
|
||||
}
|
||||
|
||||
async function fetchAllProducts(platformDispensaryId: string, cName: string): Promise<FetchResult> {
|
||||
const allProducts: any[] = [];
|
||||
let page = 0;
|
||||
let totalPages = 1;
|
||||
let totalProducts = 0;
|
||||
|
||||
console.log(`[Fetch] Starting fetch for ${platformDispensaryId} (cName: ${cName})`);
|
||||
|
||||
while (page < totalPages && page < DUTCHIE_CONFIG.maxPages) {
|
||||
const variables = {
|
||||
includeEnterpriseSpecials: false,
|
||||
productsFilter: {
|
||||
dispensaryId: platformDispensaryId,
|
||||
pricingType: 'rec',
|
||||
Status: 'Active', // 'Active' = in-stock products with pricing
|
||||
types: [],
|
||||
useCache: true,
|
||||
isDefaultSort: true,
|
||||
sortBy: 'popularSortIdx',
|
||||
sortDirection: 1,
|
||||
bypassOnlineThresholds: true,
|
||||
isKioskMenu: false,
|
||||
removeProductsBelowOptionThresholds: false,
|
||||
},
|
||||
page,
|
||||
perPage: DUTCHIE_CONFIG.perPage,
|
||||
};
|
||||
|
||||
try {
|
||||
const result = await executeGraphQL(
|
||||
'FilteredProducts',
|
||||
variables,
|
||||
GRAPHQL_HASHES.FilteredProducts,
|
||||
{ cName, maxRetries: 3 }
|
||||
);
|
||||
|
||||
const data = result?.data?.filteredProducts;
|
||||
if (!data) {
|
||||
console.error(`[Fetch] No data returned for page ${page}`);
|
||||
break;
|
||||
}
|
||||
|
||||
const products = data.products || [];
|
||||
totalProducts = data.queryInfo?.totalCount || 0;
|
||||
totalPages = Math.ceil(totalProducts / DUTCHIE_CONFIG.perPage);
|
||||
|
||||
allProducts.push(...products);
|
||||
console.log(`[Fetch] Page ${page + 1}/${totalPages}: ${products.length} products (total so far: ${allProducts.length})`);
|
||||
|
||||
page++;
|
||||
|
||||
if (page < totalPages) {
|
||||
await new Promise(r => setTimeout(r, DUTCHIE_CONFIG.pageDelayMs));
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.error(`[Fetch] Error on page ${page}: ${error.message}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return { products: allProducts, totalPages, totalProducts };
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// MAIN
|
||||
// ============================================================
|
||||
|
||||
async function main() {
|
||||
const dispensaryId = parseInt(process.argv[2], 10);
|
||||
|
||||
if (!dispensaryId) {
|
||||
console.error('Usage: npx tsx src/scripts/test-crawl-to-canonical.ts <dispensaryId>');
|
||||
console.error('Example: npx tsx src/scripts/test-crawl-to-canonical.ts 235');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log('============================================================');
|
||||
console.log(`Test Crawl to Canonical - Dispensary ${dispensaryId}`);
|
||||
console.log('============================================================\n');
|
||||
|
||||
try {
|
||||
// Step 1: Get dispensary info
|
||||
console.log('[Step 1] Getting dispensary info...');
|
||||
const dispResult = await pool.query(`
|
||||
SELECT id, name, platform_dispensary_id, menu_url
|
||||
FROM dispensaries
|
||||
WHERE id = $1
|
||||
`, [dispensaryId]);
|
||||
|
||||
if (dispResult.rows.length === 0) {
|
||||
throw new Error(`Dispensary ${dispensaryId} not found`);
|
||||
}
|
||||
|
||||
const disp = dispResult.rows[0];
|
||||
console.log(` Name: ${disp.name}`);
|
||||
console.log(` Platform ID: ${disp.platform_dispensary_id}`);
|
||||
console.log(` Menu URL: ${disp.menu_url}`);
|
||||
|
||||
if (!disp.platform_dispensary_id) {
|
||||
throw new Error('Dispensary does not have a platform_dispensary_id');
|
||||
}
|
||||
|
||||
// Extract cName from menu_url
|
||||
const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
|
||||
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
|
||||
console.log(` cName: ${cName}\n`);
|
||||
|
||||
// Step 2: Fetch products from Dutchie
|
||||
console.log('[Step 2] Fetching products from Dutchie GraphQL...');
|
||||
const fetchResult = await fetchAllProducts(disp.platform_dispensary_id, cName);
|
||||
console.log(` Total products fetched: ${fetchResult.products.length}\n`);
|
||||
|
||||
if (fetchResult.products.length === 0) {
|
||||
console.log('No products fetched. Exiting.');
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
// Step 3: Normalize
|
||||
console.log('[Step 3] Normalizing products...');
|
||||
const normalizer = new DutchieNormalizer();
|
||||
|
||||
// Construct a RawPayload structure that the normalizer expects
|
||||
// The normalizer.normalize() expects: { raw_json, dispensary_id, ... }
|
||||
const rawPayloadForValidation = {
|
||||
products: fetchResult.products,
|
||||
queryInfo: {
|
||||
totalCount: fetchResult.totalProducts,
|
||||
},
|
||||
};
|
||||
|
||||
const validation = normalizer.validatePayload(rawPayloadForValidation);
|
||||
if (!validation.valid) {
|
||||
console.error(` Validation failed: ${validation.errors?.join(', ')}`);
|
||||
process.exit(1);
|
||||
}
|
||||
console.log(` Validation: PASS`);
|
||||
|
||||
// Build proper RawPayload for normalize()
|
||||
const rawPayload = {
|
||||
id: `test-${Date.now()}`,
|
||||
dispensary_id: dispensaryId,
|
||||
crawl_run_id: null,
|
||||
platform: 'dutchie',
|
||||
payload_version: 1,
|
||||
raw_json: rawPayloadForValidation,
|
||||
product_count: fetchResult.totalProducts,
|
||||
pricing_type: 'rec',
|
||||
crawl_mode: 'active',
|
||||
fetched_at: new Date(),
|
||||
processed: false,
|
||||
normalized_at: null,
|
||||
hydration_error: null,
|
||||
hydration_attempts: 0,
|
||||
created_at: new Date(),
|
||||
};
|
||||
|
||||
const normResult = normalizer.normalize(rawPayload);
|
||||
console.log(` Normalized products: ${normResult.products.length}`);
|
||||
console.log(` Brands extracted: ${normResult.brands.length}`);
|
||||
console.log(` Sample product: ${normResult.products[0]?.name}\n`);
|
||||
|
||||
// Step 4: Write to canonical tables
|
||||
console.log('[Step 4] Writing to canonical tables via hydrateToCanonical...');
|
||||
const hydrateResult = await hydrateToCanonical(
|
||||
pool,
|
||||
dispensaryId,
|
||||
normResult,
|
||||
null // no crawl_run_id for this test
|
||||
);
|
||||
|
||||
console.log(` Products upserted: ${hydrateResult.productsUpserted}`);
|
||||
console.log(` Products new: ${hydrateResult.productsNew}`);
|
||||
console.log(` Snapshots created: ${hydrateResult.snapshotsCreated}`);
|
||||
console.log(` Variants upserted: ${hydrateResult.variantsUpserted}`);
|
||||
console.log(` Brands created: ${hydrateResult.brandsCreated}\n`);
|
||||
|
||||
// Step 5: Verify
|
||||
console.log('[Step 5] Verifying data in canonical tables...');
|
||||
|
||||
const productCount = await pool.query(`
|
||||
SELECT COUNT(*) as count FROM store_products WHERE dispensary_id = $1
|
||||
`, [dispensaryId]);
|
||||
console.log(` store_products count: ${productCount.rows[0].count}`);
|
||||
|
||||
const variantCount = await pool.query(`
|
||||
SELECT COUNT(*) as count FROM product_variants WHERE dispensary_id = $1
|
||||
`, [dispensaryId]);
|
||||
console.log(` product_variants count: ${variantCount.rows[0].count}`);
|
||||
|
||||
const snapshotCount = await pool.query(`
|
||||
SELECT COUNT(*) as count FROM store_product_snapshots WHERE dispensary_id = $1
|
||||
`, [dispensaryId]);
|
||||
console.log(` store_product_snapshots count: ${snapshotCount.rows[0].count}`);
|
||||
|
||||
console.log('\n============================================================');
|
||||
console.log('SUCCESS - Crawl and hydration complete!');
|
||||
console.log('============================================================');
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('\n============================================================');
|
||||
console.error('ERROR:', error.message);
|
||||
console.error('============================================================');
|
||||
if (error.stack) {
|
||||
console.error(error.stack);
|
||||
}
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
Reference in New Issue
Block a user