Files
cannaiq/backend/dist/scripts/capture-dutchie-schema.js
Kelly 66e07b2009 fix(monitor): remove non-existent worker columns from job_run_logs query
The job_run_logs table tracks scheduled job orchestration, not individual
worker jobs. Worker info (worker_id, worker_hostname) belongs on
dispensary_crawl_jobs, not job_run_logs.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-03 18:45:05 -07:00

237 lines
9.8 KiB
JavaScript

"use strict";
/**
* Capture Dutchie GraphQL response structure via Puppeteer interception
* This script navigates to a Dutchie menu page and captures the GraphQL responses
* to understand the exact product data structure
*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const puppeteer_extra_1 = __importDefault(require("puppeteer-extra"));
const puppeteer_extra_plugin_stealth_1 = __importDefault(require("puppeteer-extra-plugin-stealth"));
const fs = __importStar(require("fs"));
puppeteer_extra_1.default.use((0, puppeteer_extra_plugin_stealth_1.default)());
async function captureSchema(menuUrl) {
let browser;
const capturedResponses = [];
try {
console.log('='.repeat(80));
console.log('DUTCHIE GRAPHQL SCHEMA CAPTURE');
console.log('='.repeat(80));
console.log(`\nTarget URL: ${menuUrl}\n`);
browser = await puppeteer_extra_1.default.launch({
headless: 'new',
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-blink-features=AutomationControlled',
]
});
const page = await browser.newPage();
// Use a realistic user agent
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
// Set viewport to desktop size
await page.setViewport({ width: 1920, height: 1080 });
// Hide webdriver flag
await page.evaluateOnNewDocument(() => {
Object.defineProperty(navigator, 'webdriver', { get: () => false });
window.chrome = { runtime: {} };
});
// Intercept all GraphQL responses
page.on('response', async (response) => {
const url = response.url();
// Only capture GraphQL responses
if (!url.includes('graphql'))
return;
try {
const contentType = response.headers()['content-type'] || '';
if (!contentType.includes('application/json'))
return;
const data = await response.json();
// Extract operation name from URL if possible
const urlParams = new URLSearchParams(url.split('?')[1] || '');
const operationName = urlParams.get('operationName') || 'Unknown';
capturedResponses.push({
operationName,
url: url.substring(0, 200),
data,
timestamp: new Date()
});
console.log(`📡 Captured: ${operationName}`);
// Check for product data
if (data?.data?.filteredProducts?.products) {
const products = data.data.filteredProducts.products;
console.log(` Found ${products.length} products`);
}
}
catch (e) {
// Ignore parse errors
}
});
console.log('Navigating to page...');
await page.goto(menuUrl, {
waitUntil: 'networkidle2',
timeout: 90000
});
// Check if it's a Dutchie menu
const isDutchie = await page.evaluate(() => {
return typeof window.reactEnv !== 'undefined';
});
if (isDutchie) {
console.log('✅ Dutchie menu detected\n');
// Get environment info
const reactEnv = await page.evaluate(() => window.reactEnv);
console.log('Dutchie Environment:');
console.log(` dispensaryId: ${reactEnv?.dispensaryId}`);
console.log(` retailerId: ${reactEnv?.retailerId}`);
console.log(` chainId: ${reactEnv?.chainId}`);
}
// Scroll to trigger lazy loading
console.log('\nScrolling to load more products...');
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await new Promise(r => setTimeout(r, 3000));
// Click on a category to trigger more loads
const categoryLinks = await page.$$('a[href*="/products/"]');
if (categoryLinks.length > 0) {
console.log(`Found ${categoryLinks.length} category links, clicking first one...`);
try {
await categoryLinks[0].click();
await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 30000 });
}
catch (e) {
console.log('Category navigation failed, continuing...');
}
}
// Wait a bit more for any final responses
await new Promise(r => setTimeout(r, 2000));
console.log(`\n${'='.repeat(80)}`);
console.log(`CAPTURED ${capturedResponses.length} GRAPHQL RESPONSES`);
console.log('='.repeat(80));
// Find product data
let productSchema = null;
let sampleProduct = null;
for (const resp of capturedResponses) {
console.log(`\n${resp.operationName}:`);
console.log(` URL: ${resp.url.substring(0, 100)}...`);
if (resp.data?.data?.filteredProducts?.products) {
const products = resp.data.data.filteredProducts.products;
console.log(` ✅ Contains ${products.length} products`);
if (products.length > 0 && !sampleProduct) {
sampleProduct = products[0];
productSchema = extractSchema(products[0]);
}
}
// Show top-level data keys
if (resp.data?.data) {
console.log(` Data keys: ${Object.keys(resp.data.data).join(', ')}`);
}
}
// Output the product schema
if (productSchema) {
console.log('\n' + '='.repeat(80));
console.log('PRODUCT SCHEMA (from first product):');
console.log('='.repeat(80));
console.log(JSON.stringify(productSchema, null, 2));
console.log('\n' + '='.repeat(80));
console.log('SAMPLE PRODUCT:');
console.log('='.repeat(80));
console.log(JSON.stringify(sampleProduct, null, 2));
// Save to file
const outputData = {
capturedAt: new Date().toISOString(),
menuUrl,
schema: productSchema,
sampleProduct,
allResponses: capturedResponses.map(r => ({
operationName: r.operationName,
dataKeys: r.data?.data ? Object.keys(r.data.data) : [],
productCount: r.data?.data?.filteredProducts?.products?.length || 0
}))
};
const outputPath = '/tmp/dutchie-schema-capture.json';
fs.writeFileSync(outputPath, JSON.stringify(outputData, null, 2));
console.log(`\nSaved capture to: ${outputPath}`);
}
else {
console.log('\n❌ No product data captured');
// Debug: show all responses
console.log('\nAll captured responses:');
for (const resp of capturedResponses) {
console.log(`\n${resp.operationName}:`);
console.log(JSON.stringify(resp.data, null, 2).substring(0, 500));
}
}
}
catch (error) {
console.error('Error:', error.message);
}
finally {
if (browser) {
await browser.close();
}
}
}
/**
* Extract schema from an object (field names + types)
*/
function extractSchema(obj, prefix = '') {
if (obj === null)
return { type: 'null' };
if (obj === undefined)
return { type: 'undefined' };
if (Array.isArray(obj)) {
if (obj.length === 0)
return { type: 'array', items: 'unknown' };
return {
type: 'array',
items: extractSchema(obj[0], prefix + '[]')
};
}
if (typeof obj === 'object') {
const schema = { type: 'object', properties: {} };
for (const [key, value] of Object.entries(obj)) {
schema.properties[key] = extractSchema(value, prefix ? `${prefix}.${key}` : key);
}
return schema;
}
return { type: typeof obj, example: String(obj).substring(0, 100) };
}
// Run
const url = process.argv[2] || 'https://dutchie.com/embedded-menu/AZ-Deeply-Rooted';
captureSchema(url).catch(console.error);