Add CLAUDE guidelines for consolidated pipeline

This commit is contained in:
Kelly
2025-12-02 13:28:23 -07:00
parent 9219d8a77a
commit 04b5c3bd09
32 changed files with 4485 additions and 169 deletions

View File

@@ -0,0 +1,236 @@
/**
* Capture Dutchie GraphQL response structure via Puppeteer interception
* This script navigates to a Dutchie menu page and captures the GraphQL responses
* to understand the exact product data structure
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
import * as fs from 'fs';
puppeteer.use(StealthPlugin());
interface CapturedResponse {
operationName: string;
url: string;
data: any;
timestamp: Date;
}
async function captureSchema(menuUrl: string) {
let browser;
const capturedResponses: CapturedResponse[] = [];
try {
console.log('='.repeat(80));
console.log('DUTCHIE GRAPHQL SCHEMA CAPTURE');
console.log('='.repeat(80));
console.log(`\nTarget URL: ${menuUrl}\n`);
browser = await puppeteer.launch({
headless: 'new',
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-blink-features=AutomationControlled',
]
});
const page = await browser.newPage();
// Use a realistic user agent
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
// Set viewport to desktop size
await page.setViewport({ width: 1920, height: 1080 });
// Hide webdriver flag
await page.evaluateOnNewDocument(() => {
Object.defineProperty(navigator, 'webdriver', { get: () => false });
(window as any).chrome = { runtime: {} };
});
// Intercept all GraphQL responses
page.on('response', async (response) => {
const url = response.url();
// Only capture GraphQL responses
if (!url.includes('graphql')) return;
try {
const contentType = response.headers()['content-type'] || '';
if (!contentType.includes('application/json')) return;
const data = await response.json();
// Extract operation name from URL if possible
const urlParams = new URLSearchParams(url.split('?')[1] || '');
const operationName = urlParams.get('operationName') || 'Unknown';
capturedResponses.push({
operationName,
url: url.substring(0, 200),
data,
timestamp: new Date()
});
console.log(`📡 Captured: ${operationName}`);
// Check for product data
if (data?.data?.filteredProducts?.products) {
const products = data.data.filteredProducts.products;
console.log(` Found ${products.length} products`);
}
} catch (e) {
// Ignore parse errors
}
});
console.log('Navigating to page...');
await page.goto(menuUrl, {
waitUntil: 'networkidle2',
timeout: 90000
});
// Check if it's a Dutchie menu
const isDutchie = await page.evaluate(() => {
return typeof (window as any).reactEnv !== 'undefined';
});
if (isDutchie) {
console.log('✅ Dutchie menu detected\n');
// Get environment info
const reactEnv = await page.evaluate(() => (window as any).reactEnv);
console.log('Dutchie Environment:');
console.log(` dispensaryId: ${reactEnv?.dispensaryId}`);
console.log(` retailerId: ${reactEnv?.retailerId}`);
console.log(` chainId: ${reactEnv?.chainId}`);
}
// Scroll to trigger lazy loading
console.log('\nScrolling to load more products...');
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await new Promise(r => setTimeout(r, 3000));
// Click on a category to trigger more loads
const categoryLinks = await page.$$('a[href*="/products/"]');
if (categoryLinks.length > 0) {
console.log(`Found ${categoryLinks.length} category links, clicking first one...`);
try {
await categoryLinks[0].click();
await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 30000 });
} catch (e) {
console.log('Category navigation failed, continuing...');
}
}
// Wait a bit more for any final responses
await new Promise(r => setTimeout(r, 2000));
console.log(`\n${'='.repeat(80)}`);
console.log(`CAPTURED ${capturedResponses.length} GRAPHQL RESPONSES`);
console.log('='.repeat(80));
// Find product data
let productSchema: any = null;
let sampleProduct: any = null;
for (const resp of capturedResponses) {
console.log(`\n${resp.operationName}:`);
console.log(` URL: ${resp.url.substring(0, 100)}...`);
if (resp.data?.data?.filteredProducts?.products) {
const products = resp.data.data.filteredProducts.products;
console.log(` ✅ Contains ${products.length} products`);
if (products.length > 0 && !sampleProduct) {
sampleProduct = products[0];
productSchema = extractSchema(products[0]);
}
}
// Show top-level data keys
if (resp.data?.data) {
console.log(` Data keys: ${Object.keys(resp.data.data).join(', ')}`);
}
}
// Output the product schema
if (productSchema) {
console.log('\n' + '='.repeat(80));
console.log('PRODUCT SCHEMA (from first product):');
console.log('='.repeat(80));
console.log(JSON.stringify(productSchema, null, 2));
console.log('\n' + '='.repeat(80));
console.log('SAMPLE PRODUCT:');
console.log('='.repeat(80));
console.log(JSON.stringify(sampleProduct, null, 2));
// Save to file
const outputData = {
capturedAt: new Date().toISOString(),
menuUrl,
schema: productSchema,
sampleProduct,
allResponses: capturedResponses.map(r => ({
operationName: r.operationName,
dataKeys: r.data?.data ? Object.keys(r.data.data) : [],
productCount: r.data?.data?.filteredProducts?.products?.length || 0
}))
};
const outputPath = '/tmp/dutchie-schema-capture.json';
fs.writeFileSync(outputPath, JSON.stringify(outputData, null, 2));
console.log(`\nSaved capture to: ${outputPath}`);
} else {
console.log('\n❌ No product data captured');
// Debug: show all responses
console.log('\nAll captured responses:');
for (const resp of capturedResponses) {
console.log(`\n${resp.operationName}:`);
console.log(JSON.stringify(resp.data, null, 2).substring(0, 500));
}
}
} catch (error: any) {
console.error('Error:', error.message);
} finally {
if (browser) {
await browser.close();
}
}
}
/**
* Extract schema from an object (field names + types)
*/
function extractSchema(obj: any, prefix = ''): any {
if (obj === null) return { type: 'null' };
if (obj === undefined) return { type: 'undefined' };
if (Array.isArray(obj)) {
if (obj.length === 0) return { type: 'array', items: 'unknown' };
return {
type: 'array',
items: extractSchema(obj[0], prefix + '[]')
};
}
if (typeof obj === 'object') {
const schema: any = { type: 'object', properties: {} };
for (const [key, value] of Object.entries(obj)) {
schema.properties[key] = extractSchema(value, prefix ? `${prefix}.${key}` : key);
}
return schema;
}
return { type: typeof obj, example: String(obj).substring(0, 100) };
}
// Run
const url = process.argv[2] || 'https://dutchie.com/embedded-menu/AZ-Deeply-Rooted';
captureSchema(url).catch(console.error);

View File

@@ -0,0 +1,66 @@
/**
* Seed crawl: trigger dutchie crawls for all dispensaries with menu_type='dutchie'
* and a resolved platform_dispensary_id. This uses the AZ orchestrator endpoint logic.
*
* Usage (local):
* node dist/scripts/crawl-all-dutchie.js
*
* Requires:
* - DATABASE_URL/CRAWLSY_DATABASE_URL pointing to the consolidated DB
* - Dispensaries table populated with menu_type and platform_dispensary_id
*/
import { query } from '../dutchie-az/db/connection';
import { runDispensaryOrchestrator } from '../services/dispensary-orchestrator';
async function main() {
const { rows } = await query<{
id: number;
name: string;
slug: string;
platform_dispensary_id: string | null;
}>(`
SELECT id, name, slug, platform_dispensary_id
FROM dispensaries
WHERE menu_type = 'dutchie'
AND platform_dispensary_id IS NOT NULL
ORDER BY id
`);
if (!rows.length) {
console.log('No dutchie dispensaries with resolved platform_dispensary_id found.');
process.exit(0);
}
console.log(`Found ${rows.length} dutchie dispensaries with resolved IDs. Triggering crawls...`);
let success = 0;
let failed = 0;
for (const row of rows) {
try {
console.log(`Crawling ${row.id} (${row.name})...`);
const result = await runDispensaryOrchestrator(row.id);
const ok =
result.status === 'success' ||
result.status === 'sandbox_only' ||
result.status === 'detection_only';
if (ok) {
success++;
} else {
failed++;
console.warn(`Crawl returned status ${result.status} for ${row.id} (${row.name})`);
}
} catch (err: any) {
failed++;
console.error(`Failed crawl for ${row.id} (${row.name}): ${err.message}`);
}
}
console.log(`Completed. Success: ${success}, Failed: ${failed}`);
}
main().catch((err) => {
console.error('Fatal:', err);
process.exit(1);
});

View File

@@ -0,0 +1,139 @@
/**
* Run Dutchie GraphQL Scrape
*
* This script demonstrates the full pipeline:
* 1. Puppeteer navigates to Dutchie menu
* 2. GraphQL responses are intercepted
* 3. Products are normalized to our schema
* 4. Products are upserted to database
* 5. Derived views (brands, categories, specials) are automatically updated
*/
import { Pool } from 'pg';
import { scrapeDutchieMenu } from '../scrapers/dutchie-graphql';
const DATABASE_URL = process.env.DATABASE_URL || 'postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus';
async function main() {
const pool = new Pool({ connectionString: DATABASE_URL });
try {
console.log('='.repeat(80));
console.log('DUTCHIE GRAPHQL SCRAPER - FULL PIPELINE TEST');
console.log('='.repeat(80));
console.log(`Database: ${DATABASE_URL.replace(/:[^:@]+@/, ':***@')}`);
// Configuration
const storeId = 1; // Deeply Rooted
const menuUrl = 'https://dutchie.com/embedded-menu/AZ-Deeply-Rooted';
console.log(`\nStore ID: ${storeId}`);
console.log(`Menu URL: ${menuUrl}`);
console.log('\n' + '-'.repeat(80));
// Run the scrape
console.log('\n🚀 Starting scrape...\n');
const result = await scrapeDutchieMenu(pool, storeId, menuUrl);
console.log('\n' + '-'.repeat(80));
console.log('📊 SCRAPE RESULTS:');
console.log('-'.repeat(80));
console.log(` Success: ${result.success}`);
console.log(` Products Found: ${result.productsFound}`);
console.log(` Inserted: ${result.inserted}`);
console.log(` Updated: ${result.updated}`);
if (result.error) {
console.log(` Error: ${result.error}`);
}
// Query derived views to show the result
if (result.success) {
console.log('\n' + '-'.repeat(80));
console.log('📈 DERIVED DATA (from products table):');
console.log('-'.repeat(80));
// Brands
const brandsResult = await pool.query(`
SELECT brand_name, product_count, min_price, max_price
FROM derived_brands
WHERE store_id = $1
ORDER BY product_count DESC
LIMIT 5
`, [storeId]);
console.log('\nTop 5 Brands:');
brandsResult.rows.forEach(row => {
console.log(` - ${row.brand_name}: ${row.product_count} products ($${row.min_price} - $${row.max_price})`);
});
// Specials
const specialsResult = await pool.query(`
SELECT name, brand, rec_price, rec_special_price, discount_percent
FROM current_specials
WHERE store_id = $1
LIMIT 5
`, [storeId]);
console.log('\nTop 5 Specials:');
if (specialsResult.rows.length === 0) {
console.log(' (No specials found - is_on_special may not be populated yet)');
} else {
specialsResult.rows.forEach(row => {
console.log(` - ${row.name} (${row.brand}): $${row.rec_price}$${row.rec_special_price} (${row.discount_percent}% off)`);
});
}
// Categories
const categoriesResult = await pool.query(`
SELECT category_name, product_count
FROM derived_categories
WHERE store_id = $1
ORDER BY product_count DESC
LIMIT 5
`, [storeId]);
console.log('\nTop 5 Categories:');
if (categoriesResult.rows.length === 0) {
console.log(' (No categories found - subcategory may not be populated yet)');
} else {
categoriesResult.rows.forEach(row => {
console.log(` - ${row.category_name}: ${row.product_count} products`);
});
}
// Sample product
const sampleResult = await pool.query(`
SELECT name, brand, subcategory, rec_price, rec_special_price, is_on_special, thc_percentage, status
FROM products
WHERE store_id = $1 AND subcategory IS NOT NULL
ORDER BY updated_at DESC
LIMIT 1
`, [storeId]);
if (sampleResult.rows.length > 0) {
const sample = sampleResult.rows[0];
console.log('\nSample Product (with new fields):');
console.log(` Name: ${sample.name}`);
console.log(` Brand: ${sample.brand}`);
console.log(` Category: ${sample.subcategory}`);
console.log(` Price: $${sample.rec_price}`);
console.log(` Sale Price: ${sample.rec_special_price ? `$${sample.rec_special_price}` : 'N/A'}`);
console.log(` On Special: ${sample.is_on_special}`);
console.log(` THC: ${sample.thc_percentage}%`);
console.log(` Status: ${sample.status}`);
}
}
console.log('\n' + '='.repeat(80));
console.log('✅ SCRAPE COMPLETE');
console.log('='.repeat(80));
} catch (error: any) {
console.error('\n❌ Error:', error.message);
throw error;
} finally {
await pool.end();
}
}
main().catch(console.error);

View File

@@ -0,0 +1,319 @@
/**
* Scrape ALL active products via direct GraphQL pagination
* This is more reliable than category navigation
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
import { Pool } from 'pg';
import { normalizeDutchieProduct, DutchieProduct } from '../scrapers/dutchie-graphql';
puppeteer.use(StealthPlugin());
const DATABASE_URL =
process.env.DATABASE_URL || 'postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus';
const GRAPHQL_HASH = 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0';
async function scrapeAllProducts(menuUrl: string, storeId: number) {
const pool = new Pool({ connectionString: DATABASE_URL });
const browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
try {
const page = await browser.newPage();
await page.setUserAgent(
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36'
);
console.log('Loading menu to establish session...');
await page.goto(menuUrl, {
waitUntil: 'networkidle2',
timeout: 60000,
});
await new Promise((r) => setTimeout(r, 3000));
const dispensaryId = await page.evaluate(() => (window as any).reactEnv?.dispensaryId);
console.log('Dispensary ID:', dispensaryId);
// Paginate through all products
const allProducts: DutchieProduct[] = [];
let pageNum = 0;
const perPage = 100;
console.log('\nFetching all products via paginated GraphQL...');
while (true) {
const result = await page.evaluate(
async (dispId: string, hash: string, page: number, perPage: number) => {
const variables = {
includeEnterpriseSpecials: false,
productsFilter: {
dispensaryId: dispId,
pricingType: 'rec',
Status: 'Active',
types: [],
useCache: false,
isDefaultSort: true,
sortBy: 'popularSortIdx',
sortDirection: 1,
bypassOnlineThresholds: true,
isKioskMenu: false,
removeProductsBelowOptionThresholds: false,
},
page,
perPage,
};
const qs = new URLSearchParams({
operationName: 'FilteredProducts',
variables: JSON.stringify(variables),
extensions: JSON.stringify({ persistedQuery: { version: 1, sha256Hash: hash } }),
});
const resp = await fetch(`https://dutchie.com/graphql?${qs.toString()}`, {
method: 'GET',
headers: {
'content-type': 'application/json',
'apollographql-client-name': 'Marketplace (production)',
},
credentials: 'include',
});
const json = await resp.json();
return {
products: json?.data?.filteredProducts?.products || [],
totalCount: json?.data?.filteredProducts?.queryInfo?.totalCount,
};
},
dispensaryId,
GRAPHQL_HASH,
pageNum,
perPage
);
if (result.products.length === 0) {
break;
}
allProducts.push(...result.products);
console.log(
`Page ${pageNum}: ${result.products.length} products (total so far: ${allProducts.length}/${result.totalCount})`
);
pageNum++;
// Safety limit
if (pageNum > 50) {
console.log('Reached page limit');
break;
}
}
console.log(`\nTotal products fetched: ${allProducts.length}`);
// Normalize and upsert
console.log('\nNormalizing and upserting to database...');
const normalized = allProducts.map(normalizeDutchieProduct);
const client = await pool.connect();
let inserted = 0;
let updated = 0;
try {
await client.query('BEGIN');
for (const product of normalized) {
const result = await client.query(
`
INSERT INTO products (
store_id, external_id, slug, name, enterprise_product_id,
brand, brand_external_id, brand_logo_url,
subcategory, strain_type, canonical_category,
price, rec_price, med_price, rec_special_price, med_special_price,
is_on_special, special_name, discount_percent, special_data,
sku, inventory_quantity, inventory_available, is_below_threshold, status,
thc_percentage, cbd_percentage, cannabinoids,
weight_mg, net_weight_value, net_weight_unit, options, raw_options,
image_url, additional_images,
is_featured, medical_only, rec_only,
source_created_at, source_updated_at,
description, raw_data,
dutchie_url, last_seen_at, updated_at
)
VALUES (
$1, $2, $3, $4, $5,
$6, $7, $8,
$9, $10, $11,
$12, $13, $14, $15, $16,
$17, $18, $19, $20,
$21, $22, $23, $24, $25,
$26, $27, $28,
$29, $30, $31, $32, $33,
$34, $35,
$36, $37, $38,
$39, $40,
$41, $42,
'', NOW(), NOW()
)
ON CONFLICT (store_id, slug) DO UPDATE SET
name = EXCLUDED.name,
enterprise_product_id = EXCLUDED.enterprise_product_id,
brand = EXCLUDED.brand,
brand_external_id = EXCLUDED.brand_external_id,
brand_logo_url = EXCLUDED.brand_logo_url,
subcategory = EXCLUDED.subcategory,
strain_type = EXCLUDED.strain_type,
canonical_category = EXCLUDED.canonical_category,
price = EXCLUDED.price,
rec_price = EXCLUDED.rec_price,
med_price = EXCLUDED.med_price,
rec_special_price = EXCLUDED.rec_special_price,
med_special_price = EXCLUDED.med_special_price,
is_on_special = EXCLUDED.is_on_special,
special_name = EXCLUDED.special_name,
discount_percent = EXCLUDED.discount_percent,
special_data = EXCLUDED.special_data,
sku = EXCLUDED.sku,
inventory_quantity = EXCLUDED.inventory_quantity,
inventory_available = EXCLUDED.inventory_available,
is_below_threshold = EXCLUDED.is_below_threshold,
status = EXCLUDED.status,
thc_percentage = EXCLUDED.thc_percentage,
cbd_percentage = EXCLUDED.cbd_percentage,
cannabinoids = EXCLUDED.cannabinoids,
weight_mg = EXCLUDED.weight_mg,
net_weight_value = EXCLUDED.net_weight_value,
net_weight_unit = EXCLUDED.net_weight_unit,
options = EXCLUDED.options,
raw_options = EXCLUDED.raw_options,
image_url = EXCLUDED.image_url,
additional_images = EXCLUDED.additional_images,
is_featured = EXCLUDED.is_featured,
medical_only = EXCLUDED.medical_only,
rec_only = EXCLUDED.rec_only,
source_created_at = EXCLUDED.source_created_at,
source_updated_at = EXCLUDED.source_updated_at,
description = EXCLUDED.description,
raw_data = EXCLUDED.raw_data,
last_seen_at = NOW(),
updated_at = NOW()
RETURNING (xmax = 0) AS was_inserted
`,
[
storeId,
product.external_id,
product.slug,
product.name,
product.enterprise_product_id,
product.brand,
product.brand_external_id,
product.brand_logo_url,
product.subcategory,
product.strain_type,
product.canonical_category,
product.price,
product.rec_price,
product.med_price,
product.rec_special_price,
product.med_special_price,
product.is_on_special,
product.special_name,
product.discount_percent,
product.special_data ? JSON.stringify(product.special_data) : null,
product.sku,
product.inventory_quantity,
product.inventory_available,
product.is_below_threshold,
product.status,
product.thc_percentage,
product.cbd_percentage,
product.cannabinoids ? JSON.stringify(product.cannabinoids) : null,
product.weight_mg,
product.net_weight_value,
product.net_weight_unit,
product.options,
product.raw_options,
product.image_url,
product.additional_images,
product.is_featured,
product.medical_only,
product.rec_only,
product.source_created_at,
product.source_updated_at,
product.description,
product.raw_data ? JSON.stringify(product.raw_data) : null,
]
);
if (result.rows[0]?.was_inserted) {
inserted++;
} else {
updated++;
}
}
await client.query('COMMIT');
} catch (error) {
await client.query('ROLLBACK');
throw error;
} finally {
client.release();
}
console.log(`\nDatabase: ${inserted} inserted, ${updated} updated`);
// Show summary stats
const stats = await pool.query(
`
SELECT
COUNT(*) as total,
COUNT(*) FILTER (WHERE is_on_special) as specials,
COUNT(DISTINCT brand) as brands,
COUNT(DISTINCT subcategory) as categories
FROM products WHERE store_id = $1
`,
[storeId]
);
console.log('\nStore summary:');
console.log(` Total products: ${stats.rows[0].total}`);
console.log(` On special: ${stats.rows[0].specials}`);
console.log(` Unique brands: ${stats.rows[0].brands}`);
console.log(` Categories: ${stats.rows[0].categories}`);
return {
success: true,
totalProducts: allProducts.length,
inserted,
updated,
};
} finally {
await browser.close();
await pool.end();
}
}
// Run
const menuUrl = process.argv[2] || 'https://dutchie.com/embedded-menu/AZ-Deeply-Rooted';
const storeId = parseInt(process.argv[3] || '1', 10);
console.log('='.repeat(60));
console.log('DUTCHIE GRAPHQL FULL SCRAPE');
console.log('='.repeat(60));
console.log(`Menu URL: ${menuUrl}`);
console.log(`Store ID: ${storeId}`);
console.log('');
scrapeAllProducts(menuUrl, storeId)
.then((result) => {
console.log('\n' + '='.repeat(60));
console.log('COMPLETE');
console.log(JSON.stringify(result, null, 2));
})
.catch((error) => {
console.error('Error:', error.message);
process.exit(1);
});

View File

@@ -0,0 +1,156 @@
/**
* Test script: End-to-end Dutchie GraphQL → DB → Dashboard flow
*
* This demonstrates the complete data pipeline:
* 1. Fetch one product from Dutchie GraphQL via Puppeteer
* 2. Normalize it to our schema
* 3. Show the mapping
*/
import { normalizeDutchieProduct, DutchieProduct, NormalizedProduct } from '../scrapers/dutchie-graphql';
import * as fs from 'fs';
// Load the captured sample product from schema capture
const capturedData = JSON.parse(
fs.readFileSync('/tmp/dutchie-schema-capture.json', 'utf-8')
);
const sampleProduct: DutchieProduct = capturedData.sampleProduct;
console.log('='.repeat(80));
console.log('DUTCHIE GRAPHQL → DATABASE MAPPING DEMONSTRATION');
console.log('='.repeat(80));
console.log('\n📥 RAW DUTCHIE GRAPHQL PRODUCT:');
console.log('-'.repeat(80));
// Show key fields from raw product
const keyRawFields = {
'_id': sampleProduct._id,
'Name': sampleProduct.Name,
'cName': sampleProduct.cName,
'brandName': sampleProduct.brandName,
'brand.id': sampleProduct.brand?.id,
'type': sampleProduct.type,
'subcategory': sampleProduct.subcategory,
'strainType': sampleProduct.strainType,
'Prices': sampleProduct.Prices,
'recPrices': sampleProduct.recPrices,
'recSpecialPrices': sampleProduct.recSpecialPrices,
'special': sampleProduct.special,
'specialData.saleSpecials[0].specialName': sampleProduct.specialData?.saleSpecials?.[0]?.specialName,
'specialData.saleSpecials[0].discount': sampleProduct.specialData?.saleSpecials?.[0]?.discount,
'THCContent.range[0]': sampleProduct.THCContent?.range?.[0],
'CBDContent.range[0]': sampleProduct.CBDContent?.range?.[0],
'Status': sampleProduct.Status,
'Image': sampleProduct.Image,
'POSMetaData.canonicalSKU': sampleProduct.POSMetaData?.canonicalSKU,
'POSMetaData.children[0].quantity': sampleProduct.POSMetaData?.children?.[0]?.quantity,
'POSMetaData.children[0].quantityAvailable': sampleProduct.POSMetaData?.children?.[0]?.quantityAvailable,
};
Object.entries(keyRawFields).forEach(([key, value]) => {
console.log(` ${key}: ${JSON.stringify(value)}`);
});
console.log('\n📤 NORMALIZED DATABASE ROW:');
console.log('-'.repeat(80));
// Normalize the product
const normalized: NormalizedProduct = normalizeDutchieProduct(sampleProduct);
// Show the normalized result (excluding raw_data for readability)
const { raw_data, cannabinoids, special_data, ...displayFields } = normalized;
Object.entries(displayFields).forEach(([key, value]) => {
if (value !== undefined && value !== null) {
console.log(` ${key}: ${JSON.stringify(value)}`);
}
});
console.log('\n🔗 FIELD MAPPING:');
console.log('-'.repeat(80));
const fieldMappings = [
['_id / id', 'external_id', sampleProduct._id, normalized.external_id],
['Name', 'name', sampleProduct.Name, normalized.name],
['cName', 'slug', sampleProduct.cName, normalized.slug],
['brandName', 'brand', sampleProduct.brandName, normalized.brand],
['brand.id', 'brand_external_id', sampleProduct.brand?.id, normalized.brand_external_id],
['subcategory', 'subcategory', sampleProduct.subcategory, normalized.subcategory],
['strainType', 'strain_type', sampleProduct.strainType, normalized.strain_type],
['recPrices[0]', 'rec_price', sampleProduct.recPrices?.[0], normalized.rec_price],
['recSpecialPrices[0]', 'rec_special_price', sampleProduct.recSpecialPrices?.[0], normalized.rec_special_price],
['special', 'is_on_special', sampleProduct.special, normalized.is_on_special],
['specialData...specialName', 'special_name', sampleProduct.specialData?.saleSpecials?.[0]?.specialName?.substring(0, 40) + '...', normalized.special_name?.substring(0, 40) + '...'],
['THCContent.range[0]', 'thc_percentage', sampleProduct.THCContent?.range?.[0], normalized.thc_percentage],
['CBDContent.range[0]', 'cbd_percentage', sampleProduct.CBDContent?.range?.[0], normalized.cbd_percentage],
['Status', 'status', sampleProduct.Status, normalized.status],
['Image', 'image_url', sampleProduct.Image?.substring(0, 50) + '...', normalized.image_url?.substring(0, 50) + '...'],
['POSMetaData.canonicalSKU', 'sku', sampleProduct.POSMetaData?.canonicalSKU, normalized.sku],
];
console.log(' GraphQL Field → DB Column | Value');
console.log(' ' + '-'.repeat(75));
fieldMappings.forEach(([gqlField, dbCol, gqlVal, dbVal]) => {
const gqlStr = String(gqlField).padEnd(30);
const dbStr = String(dbCol).padEnd(20);
console.log(` ${gqlStr}${dbStr} | ${JSON.stringify(dbVal)}`);
});
console.log('\n📊 SQL INSERT STATEMENT:');
console.log('-'.repeat(80));
// Generate example SQL
const sqlExample = `
INSERT INTO products (
store_id, external_id, slug, name,
brand, brand_external_id,
subcategory, strain_type,
rec_price, rec_special_price,
is_on_special, special_name, discount_percent,
thc_percentage, cbd_percentage,
status, image_url, sku
) VALUES (
1, -- store_id (Deeply Rooted)
'${normalized.external_id}', -- external_id
'${normalized.slug}', -- slug
'${normalized.name}', -- name
'${normalized.brand}', -- brand
'${normalized.brand_external_id}', -- brand_external_id
'${normalized.subcategory}', -- subcategory
'${normalized.strain_type}', -- strain_type
${normalized.rec_price}, -- rec_price
${normalized.rec_special_price}, -- rec_special_price
${normalized.is_on_special}, -- is_on_special
'${normalized.special_name?.substring(0, 50)}...', -- special_name
${normalized.discount_percent || 'NULL'}, -- discount_percent
${normalized.thc_percentage}, -- thc_percentage
${normalized.cbd_percentage}, -- cbd_percentage
'${normalized.status}', -- status
'${normalized.image_url}', -- image_url
'${normalized.sku}' -- sku
)
ON CONFLICT (store_id, slug) DO UPDATE SET ...;
`;
console.log(sqlExample);
console.log('\n✅ SUMMARY:');
console.log('-'.repeat(80));
console.log(` Product: ${normalized.name}`);
console.log(` Brand: ${normalized.brand}`);
console.log(` Category: ${normalized.subcategory}`);
console.log(` Price: $${normalized.rec_price}$${normalized.rec_special_price} (${normalized.discount_percent}% off)`);
console.log(` THC: ${normalized.thc_percentage}%`);
console.log(` Status: ${normalized.status}`);
console.log(` On Special: ${normalized.is_on_special}`);
console.log(` SKU: ${normalized.sku}`);
console.log('\n🎯 DERIVED VIEWS (computed from products table):');
console.log('-'.repeat(80));
console.log(' - current_specials: Products where is_on_special = true');
console.log(' - derived_brands: Aggregated by brand name with counts/prices');
console.log(' - derived_categories: Aggregated by subcategory');
console.log('\nAll views are computed from the single products table - no separate tables needed!');

View File

@@ -0,0 +1,233 @@
/**
* Test script to validate Dutchie GraphQL API access and capture response structure
*/
// @ts-ignore - node-fetch type declaration not installed
import fetch from 'node-fetch';
const GRAPHQL_HASHES = {
ConsumerDispensaries: '0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b',
GetAddressBasedDispensaryData: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b',
FilteredProducts: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0',
MenuFiltersV2: '2f0b3233b8a2426b391649ca3f0f7a5d43b9aefd683f6286d7261a2517e3568e',
FilteredSpecials: '0dfb85a4fc138c55a076d4d11bf6d1a25f7cbd511428e1cf5a5b863b3eb23f25',
};
interface DutchieProduct {
id: string;
name: string;
slug?: string;
brand?: string;
brandId?: string;
type?: string;
category?: string;
subcategory?: string;
description?: string;
image?: string;
images?: string[];
THCContent?: any;
CBDContent?: any;
terpenes?: any[];
effects?: string[];
strainType?: string;
weight?: string;
options?: any[];
pricing?: any;
specialPricing?: any;
potencyThc?: any;
potencyCbd?: any;
labResults?: any;
[key: string]: any; // Catch-all for additional fields
}
async function fetchProducts(dispensaryId: string, page = 0, perPage = 25): Promise<any> {
const session = 'crawlsy-session-' + Date.now();
const variables = {
includeEnterpriseSpecials: false,
productsFilter: {
dispensaryId,
pricingType: 'rec',
Status: null, // null to include all (in-stock and out-of-stock)
types: [],
useCache: true,
isDefaultSort: true,
sortBy: 'popularSortIdx',
sortDirection: 1,
bypassOnlineThresholds: true,
isKioskMenu: false,
removeProductsBelowOptionThresholds: false
},
page,
perPage
};
const qs = new URLSearchParams({
operationName: 'FilteredProducts',
variables: JSON.stringify(variables),
extensions: JSON.stringify({ persistedQuery: { version: 1, sha256Hash: GRAPHQL_HASHES.FilteredProducts } })
});
const res = await fetch(`https://dutchie.com/api-3/graphql?${qs.toString()}`, {
headers: {
'x-dutchie-session': session,
'apollographql-client-name': 'Marketplace (production)',
'content-type': 'application/json',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
});
if (!res.ok) {
const text = await res.text();
console.error('HTTP Status:', res.status);
console.error('Response:', text.substring(0, 500));
throw new Error(`HTTP ${res.status}: ${text.substring(0, 200)}`);
}
return res.json();
}
async function resolveDispensaryId(cName: string): Promise<string | null> {
const session = 'crawlsy-session-' + Date.now();
const variables = { input: { dispensaryId: cName } };
const qs = new URLSearchParams({
operationName: 'GetAddressBasedDispensaryData',
variables: JSON.stringify(variables),
extensions: JSON.stringify({ persistedQuery: { version: 1, sha256Hash: GRAPHQL_HASHES.GetAddressBasedDispensaryData } })
});
const res = await fetch(`https://dutchie.com/graphql?${qs.toString()}`, {
headers: {
'x-dutchie-session': session,
'apollographql-client-name': 'Marketplace (production)',
'content-type': 'application/json',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
});
if (!res.ok) {
console.error('Failed to resolve dispensary ID:', res.status);
return null;
}
const data: any = await res.json();
return data?.data?.getAddressBasedDispensaryData?.dispensaryData?.dispensaryId || null;
}
function enumerateFields(obj: any, prefix = ''): string[] {
const fields: string[] = [];
for (const [key, value] of Object.entries(obj)) {
const path = prefix ? `${prefix}.${key}` : key;
if (value === null) {
fields.push(`${path}: null`);
} else if (Array.isArray(value)) {
fields.push(`${path}: Array[${value.length}]`);
if (value.length > 0 && typeof value[0] === 'object') {
const subFields = enumerateFields(value[0], `${path}[0]`);
fields.push(...subFields);
}
} else if (typeof value === 'object') {
fields.push(`${path}: Object`);
const subFields = enumerateFields(value, path);
fields.push(...subFields);
} else {
const typeStr = typeof value;
const preview = String(value).substring(0, 50);
fields.push(`${path}: ${typeStr} = "${preview}"`);
}
}
return fields;
}
async function main() {
console.log('='.repeat(80));
console.log('DUTCHIE GRAPHQL API TEST');
console.log('='.repeat(80));
const cName = 'AZ-Deeply-Rooted';
// Step 1: Resolve dispensary ID
console.log(`\n1. Resolving dispensary ID for "${cName}"...`);
const dispensaryId = await resolveDispensaryId(cName);
const finalDispensaryId = dispensaryId || '6405ef617056e8014d79101b'; // Fallback to known ID
if (!dispensaryId) {
console.log(' Failed to resolve via API, using hardcoded ID: 6405ef617056e8014d79101b');
}
console.log(` Final ID: ${finalDispensaryId}`);
// Step 2: Fetch first page of products
console.log('\n2. Fetching products (page 0, perPage 5)...');
const result = await fetchProducts(finalDispensaryId, 0, 5);
if (result.errors) {
console.error('\nGraphQL Errors:');
console.error(JSON.stringify(result.errors, null, 2));
return;
}
const products = result?.data?.filteredProducts?.products || [];
console.log(` Found ${products.length} products in this page`);
if (products.length === 0) {
console.log('No products returned. Full response:');
console.log(JSON.stringify(result, null, 2));
return;
}
// Step 3: Enumerate all fields from first product
console.log('\n3. PRODUCT FIELD STRUCTURE (from first product):');
console.log('-'.repeat(80));
const product = products[0];
const fields = enumerateFields(product);
fields.forEach(f => console.log(` ${f}`));
// Step 4: Show full sample product JSON
console.log('\n4. FULL SAMPLE PRODUCT JSON:');
console.log('-'.repeat(80));
console.log(JSON.stringify(product, null, 2));
// Step 5: Summary of key fields for schema design
console.log('\n5. KEY FIELDS FOR SCHEMA DESIGN:');
console.log('-'.repeat(80));
const keyFields = [
{ field: 'id', value: product.id },
{ field: 'name', value: product.name },
{ field: 'slug', value: product.slug },
{ field: 'brand', value: product.brand },
{ field: 'brandId', value: product.brandId },
{ field: 'type', value: product.type },
{ field: 'category', value: product.category },
{ field: 'subcategory', value: product.subcategory },
{ field: 'strainType', value: product.strainType },
{ field: 'THCContent', value: product.THCContent },
{ field: 'CBDContent', value: product.CBDContent },
{ field: 'description', value: product.description?.substring(0, 100) + '...' },
{ field: 'image', value: product.image },
{ field: 'options.length', value: product.options?.length },
{ field: 'pricing', value: product.pricing },
{ field: 'terpenes.length', value: product.terpenes?.length },
{ field: 'effects.length', value: product.effects?.length },
];
keyFields.forEach(({ field, value }) => {
console.log(` ${field}: ${JSON.stringify(value)}`);
});
// Step 6: Show an option (variant) if available
if (product.options && product.options.length > 0) {
console.log('\n6. SAMPLE OPTION/VARIANT:');
console.log('-'.repeat(80));
console.log(JSON.stringify(product.options[0], null, 2));
}
}
main().catch(console.error);

View File

@@ -0,0 +1,106 @@
/**
* Test different Status filter values in Dutchie GraphQL
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
const GRAPHQL_HASH = 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0';
async function main() {
const browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setUserAgent(
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36'
);
console.log('Loading menu...');
await page.goto('https://dutchie.com/embedded-menu/AZ-Deeply-Rooted', {
waitUntil: 'networkidle2',
timeout: 60000,
});
await new Promise((r) => setTimeout(r, 3000));
const dispensaryId = await page.evaluate(() => (window as any).reactEnv?.dispensaryId);
console.log('Dispensary ID:', dispensaryId);
// Test different status values
const testCases = [
{ label: 'Active', status: 'Active', includeStatus: true },
{ label: 'Inactive', status: 'Inactive', includeStatus: true },
{ label: 'null', status: null, includeStatus: true },
{ label: 'omitted', status: null, includeStatus: false },
];
for (const testCase of testCases) {
const result = await page.evaluate(
async (dispId: string, hash: string, status: string | null, includeStatus: boolean) => {
const filter: any = {
dispensaryId: dispId,
pricingType: 'rec',
types: [],
useCache: false,
isDefaultSort: true,
sortBy: 'popularSortIdx',
sortDirection: 1,
bypassOnlineThresholds: true,
isKioskMenu: false,
removeProductsBelowOptionThresholds: false,
};
if (includeStatus) {
filter.Status = status;
}
const variables = {
includeEnterpriseSpecials: false,
productsFilter: filter,
page: 0,
perPage: 100,
};
const qs = new URLSearchParams({
operationName: 'FilteredProducts',
variables: JSON.stringify(variables),
extensions: JSON.stringify({ persistedQuery: { version: 1, sha256Hash: hash } }),
});
const resp = await fetch(`https://dutchie.com/graphql?${qs.toString()}`, {
method: 'GET',
headers: {
'content-type': 'application/json',
'apollographql-client-name': 'Marketplace (production)',
},
credentials: 'include',
});
const json = await resp.json();
const products = json?.data?.filteredProducts?.products || [];
return {
count: products.length,
totalCount: json?.data?.filteredProducts?.queryInfo?.totalCount,
sampleStatus: products[0]?.Status,
statuses: [...new Set(products.map((p: any) => p.Status))],
};
},
dispensaryId,
GRAPHQL_HASH,
testCase.status,
testCase.includeStatus
);
console.log(
`Status ${testCase.label}: Products=${result.count}, Total=${result.totalCount}, Statuses=${JSON.stringify(result.statuses)}`
);
}
await browser.close();
}
main().catch(console.error);