The job_run_logs table tracks scheduled job orchestration, not individual worker jobs. Worker info (worker_id, worker_hostname) belongs on dispensary_crawl_jobs, not job_run_logs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
237 lines
9.8 KiB
JavaScript
237 lines
9.8 KiB
JavaScript
"use strict";
|
|
/**
|
|
* Capture Dutchie GraphQL response structure via Puppeteer interception
|
|
* This script navigates to a Dutchie menu page and captures the GraphQL responses
|
|
* to understand the exact product data structure
|
|
*/
|
|
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
if (k2 === undefined) k2 = k;
|
|
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
}
|
|
Object.defineProperty(o, k2, desc);
|
|
}) : (function(o, m, k, k2) {
|
|
if (k2 === undefined) k2 = k;
|
|
o[k2] = m[k];
|
|
}));
|
|
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
}) : function(o, v) {
|
|
o["default"] = v;
|
|
});
|
|
var __importStar = (this && this.__importStar) || (function () {
|
|
var ownKeys = function(o) {
|
|
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
var ar = [];
|
|
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
return ar;
|
|
};
|
|
return ownKeys(o);
|
|
};
|
|
return function (mod) {
|
|
if (mod && mod.__esModule) return mod;
|
|
var result = {};
|
|
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
__setModuleDefault(result, mod);
|
|
return result;
|
|
};
|
|
})();
|
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
};
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
const puppeteer_extra_1 = __importDefault(require("puppeteer-extra"));
|
|
const puppeteer_extra_plugin_stealth_1 = __importDefault(require("puppeteer-extra-plugin-stealth"));
|
|
const fs = __importStar(require("fs"));
|
|
puppeteer_extra_1.default.use((0, puppeteer_extra_plugin_stealth_1.default)());
|
|
async function captureSchema(menuUrl) {
|
|
let browser;
|
|
const capturedResponses = [];
|
|
try {
|
|
console.log('='.repeat(80));
|
|
console.log('DUTCHIE GRAPHQL SCHEMA CAPTURE');
|
|
console.log('='.repeat(80));
|
|
console.log(`\nTarget URL: ${menuUrl}\n`);
|
|
browser = await puppeteer_extra_1.default.launch({
|
|
headless: 'new',
|
|
args: [
|
|
'--no-sandbox',
|
|
'--disable-setuid-sandbox',
|
|
'--disable-dev-shm-usage',
|
|
'--disable-blink-features=AutomationControlled',
|
|
]
|
|
});
|
|
const page = await browser.newPage();
|
|
// Use a realistic user agent
|
|
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
|
|
// Set viewport to desktop size
|
|
await page.setViewport({ width: 1920, height: 1080 });
|
|
// Hide webdriver flag
|
|
await page.evaluateOnNewDocument(() => {
|
|
Object.defineProperty(navigator, 'webdriver', { get: () => false });
|
|
window.chrome = { runtime: {} };
|
|
});
|
|
// Intercept all GraphQL responses
|
|
page.on('response', async (response) => {
|
|
const url = response.url();
|
|
// Only capture GraphQL responses
|
|
if (!url.includes('graphql'))
|
|
return;
|
|
try {
|
|
const contentType = response.headers()['content-type'] || '';
|
|
if (!contentType.includes('application/json'))
|
|
return;
|
|
const data = await response.json();
|
|
// Extract operation name from URL if possible
|
|
const urlParams = new URLSearchParams(url.split('?')[1] || '');
|
|
const operationName = urlParams.get('operationName') || 'Unknown';
|
|
capturedResponses.push({
|
|
operationName,
|
|
url: url.substring(0, 200),
|
|
data,
|
|
timestamp: new Date()
|
|
});
|
|
console.log(`📡 Captured: ${operationName}`);
|
|
// Check for product data
|
|
if (data?.data?.filteredProducts?.products) {
|
|
const products = data.data.filteredProducts.products;
|
|
console.log(` Found ${products.length} products`);
|
|
}
|
|
}
|
|
catch (e) {
|
|
// Ignore parse errors
|
|
}
|
|
});
|
|
console.log('Navigating to page...');
|
|
await page.goto(menuUrl, {
|
|
waitUntil: 'networkidle2',
|
|
timeout: 90000
|
|
});
|
|
// Check if it's a Dutchie menu
|
|
const isDutchie = await page.evaluate(() => {
|
|
return typeof window.reactEnv !== 'undefined';
|
|
});
|
|
if (isDutchie) {
|
|
console.log('✅ Dutchie menu detected\n');
|
|
// Get environment info
|
|
const reactEnv = await page.evaluate(() => window.reactEnv);
|
|
console.log('Dutchie Environment:');
|
|
console.log(` dispensaryId: ${reactEnv?.dispensaryId}`);
|
|
console.log(` retailerId: ${reactEnv?.retailerId}`);
|
|
console.log(` chainId: ${reactEnv?.chainId}`);
|
|
}
|
|
// Scroll to trigger lazy loading
|
|
console.log('\nScrolling to load more products...');
|
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
|
await new Promise(r => setTimeout(r, 3000));
|
|
// Click on a category to trigger more loads
|
|
const categoryLinks = await page.$$('a[href*="/products/"]');
|
|
if (categoryLinks.length > 0) {
|
|
console.log(`Found ${categoryLinks.length} category links, clicking first one...`);
|
|
try {
|
|
await categoryLinks[0].click();
|
|
await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 30000 });
|
|
}
|
|
catch (e) {
|
|
console.log('Category navigation failed, continuing...');
|
|
}
|
|
}
|
|
// Wait a bit more for any final responses
|
|
await new Promise(r => setTimeout(r, 2000));
|
|
console.log(`\n${'='.repeat(80)}`);
|
|
console.log(`CAPTURED ${capturedResponses.length} GRAPHQL RESPONSES`);
|
|
console.log('='.repeat(80));
|
|
// Find product data
|
|
let productSchema = null;
|
|
let sampleProduct = null;
|
|
for (const resp of capturedResponses) {
|
|
console.log(`\n${resp.operationName}:`);
|
|
console.log(` URL: ${resp.url.substring(0, 100)}...`);
|
|
if (resp.data?.data?.filteredProducts?.products) {
|
|
const products = resp.data.data.filteredProducts.products;
|
|
console.log(` ✅ Contains ${products.length} products`);
|
|
if (products.length > 0 && !sampleProduct) {
|
|
sampleProduct = products[0];
|
|
productSchema = extractSchema(products[0]);
|
|
}
|
|
}
|
|
// Show top-level data keys
|
|
if (resp.data?.data) {
|
|
console.log(` Data keys: ${Object.keys(resp.data.data).join(', ')}`);
|
|
}
|
|
}
|
|
// Output the product schema
|
|
if (productSchema) {
|
|
console.log('\n' + '='.repeat(80));
|
|
console.log('PRODUCT SCHEMA (from first product):');
|
|
console.log('='.repeat(80));
|
|
console.log(JSON.stringify(productSchema, null, 2));
|
|
console.log('\n' + '='.repeat(80));
|
|
console.log('SAMPLE PRODUCT:');
|
|
console.log('='.repeat(80));
|
|
console.log(JSON.stringify(sampleProduct, null, 2));
|
|
// Save to file
|
|
const outputData = {
|
|
capturedAt: new Date().toISOString(),
|
|
menuUrl,
|
|
schema: productSchema,
|
|
sampleProduct,
|
|
allResponses: capturedResponses.map(r => ({
|
|
operationName: r.operationName,
|
|
dataKeys: r.data?.data ? Object.keys(r.data.data) : [],
|
|
productCount: r.data?.data?.filteredProducts?.products?.length || 0
|
|
}))
|
|
};
|
|
const outputPath = '/tmp/dutchie-schema-capture.json';
|
|
fs.writeFileSync(outputPath, JSON.stringify(outputData, null, 2));
|
|
console.log(`\nSaved capture to: ${outputPath}`);
|
|
}
|
|
else {
|
|
console.log('\n❌ No product data captured');
|
|
// Debug: show all responses
|
|
console.log('\nAll captured responses:');
|
|
for (const resp of capturedResponses) {
|
|
console.log(`\n${resp.operationName}:`);
|
|
console.log(JSON.stringify(resp.data, null, 2).substring(0, 500));
|
|
}
|
|
}
|
|
}
|
|
catch (error) {
|
|
console.error('Error:', error.message);
|
|
}
|
|
finally {
|
|
if (browser) {
|
|
await browser.close();
|
|
}
|
|
}
|
|
}
|
|
/**
|
|
* Extract schema from an object (field names + types)
|
|
*/
|
|
function extractSchema(obj, prefix = '') {
|
|
if (obj === null)
|
|
return { type: 'null' };
|
|
if (obj === undefined)
|
|
return { type: 'undefined' };
|
|
if (Array.isArray(obj)) {
|
|
if (obj.length === 0)
|
|
return { type: 'array', items: 'unknown' };
|
|
return {
|
|
type: 'array',
|
|
items: extractSchema(obj[0], prefix + '[]')
|
|
};
|
|
}
|
|
if (typeof obj === 'object') {
|
|
const schema = { type: 'object', properties: {} };
|
|
for (const [key, value] of Object.entries(obj)) {
|
|
schema.properties[key] = extractSchema(value, prefix ? `${prefix}.${key}` : key);
|
|
}
|
|
return schema;
|
|
}
|
|
return { type: typeof obj, example: String(obj).substring(0, 100) };
|
|
}
|
|
// Run
|
|
const url = process.argv[2] || 'https://dutchie.com/embedded-menu/AZ-Deeply-Rooted';
|
|
captureSchema(url).catch(console.error);
|