Files
cannaiq/backend/test-intercept.js
Kelly a35976b9e9 chore: Clean up deprecated code and docs
- Move deprecated directories to src/_deprecated/:
  - hydration/ (old pipeline approach)
  - scraper-v2/ (old Puppeteer scraper)
  - canonical-hydration/ (merged into tasks)
  - Unused services: availability, crawler-logger, geolocation, etc
  - Unused utils: age-gate-playwright, HomepageValidator, stealthBrowser

- Archive outdated docs to docs/_archive/:
  - ANALYTICS_RUNBOOK.md
  - ANALYTICS_V2_EXAMPLES.md
  - BRAND_INTELLIGENCE_API.md
  - CRAWL_PIPELINE.md
  - TASK_WORKFLOW_2024-12-10.md
  - WORKER_TASK_ARCHITECTURE.md
  - ORGANIC_SCRAPING_GUIDE.md

- Add docs/CODEBASE_MAP.md as single source of truth
- Add warning files to deprecated/archived directories
- Slim down CLAUDE.md to essential rules only

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-11 22:17:40 -07:00

181 lines
5.2 KiB
JavaScript

/**
* Stealth Browser Payload Capture - Direct GraphQL Injection
*
* Uses the browser session to make GraphQL requests that look organic.
* Adds proper headers matching what Dutchie's frontend sends.
*/
const puppeteer = require('puppeteer-extra');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
const fs = require('fs');
puppeteer.use(StealthPlugin());
async function capturePayload(config) {
const {
dispensaryId = null,
platformId,
cName,
outputPath = `/tmp/payload_${cName}_${Date.now()}.json`,
} = config;
const browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const page = await browser.newPage();
// Establish session by visiting the embedded menu
const embedUrl = `https://dutchie.com/embedded-menu/${cName}?menuType=rec`;
console.log(`[Capture] Establishing session at ${embedUrl}...`);
await page.goto(embedUrl, {
waitUntil: 'networkidle2',
timeout: 60000
});
console.log('[Capture] Session established, fetching ALL products...');
// Fetch all products using GET requests with proper headers
const result = await page.evaluate(async (platformId, cName) => {
const allProducts = [];
const logs = [];
let pageNum = 0;
const perPage = 100;
let totalCount = 0;
const sessionId = 'browser-session-' + Date.now();
try {
while (pageNum < 30) { // Max 30 pages = 3000 products
const variables = {
includeEnterpriseSpecials: false,
productsFilter: {
dispensaryId: platformId,
pricingType: 'rec',
Status: 'Active', // 'Active' for in-stock products per CLAUDE.md
types: [],
useCache: true,
isDefaultSort: true,
sortBy: 'popularSortIdx',
sortDirection: 1,
bypassOnlineThresholds: true,
isKioskMenu: false,
removeProductsBelowOptionThresholds: false,
},
page: pageNum,
perPage: perPage,
};
const extensions = {
persistedQuery: {
version: 1,
sha256Hash: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0'
}
};
// Build GET URL like the browser does
const qs = new URLSearchParams({
operationName: 'FilteredProducts',
variables: JSON.stringify(variables),
extensions: JSON.stringify(extensions)
});
const url = `https://dutchie.com/api-3/graphql?${qs.toString()}`;
const response = await fetch(url, {
method: 'GET',
headers: {
'Accept': 'application/json',
'content-type': 'application/json',
'x-dutchie-session': sessionId,
'apollographql-client-name': 'Marketplace (production)',
},
credentials: 'include'
});
logs.push(`Page ${pageNum}: HTTP ${response.status}`);
if (!response.ok) {
const text = await response.text();
logs.push(`HTTP error: ${response.status} - ${text.slice(0, 200)}`);
break;
}
const json = await response.json();
if (json.errors) {
logs.push(`GraphQL error: ${JSON.stringify(json.errors).slice(0, 200)}`);
break;
}
const data = json?.data?.filteredProducts;
if (!data || !data.products) {
logs.push('No products in response');
break;
}
const products = data.products;
allProducts.push(...products);
if (pageNum === 0) {
totalCount = data.queryInfo?.totalCount || 0;
logs.push(`Total reported: ${totalCount}`);
}
logs.push(`Got ${products.length} products (total: ${allProducts.length}/${totalCount})`);
if (allProducts.length >= totalCount || products.length < perPage) {
break;
}
pageNum++;
// Small delay between pages to be polite
await new Promise(r => setTimeout(r, 200));
}
} catch (err) {
logs.push(`Error: ${err.message}`);
}
return { products: allProducts, totalCount, logs };
}, platformId, cName);
await browser.close();
// Print logs from browser context
result.logs.forEach(log => console.log(`[Browser] ${log}`));
console.log(`[Capture] Got ${result.products.length} products (API reported ${result.totalCount})`);
const payload = {
dispensaryId: dispensaryId,
platformId: platformId,
cName,
fetchedAt: new Date().toISOString(),
productCount: result.products.length,
products: result.products,
};
fs.writeFileSync(outputPath, JSON.stringify(payload, null, 2));
console.log(`\n=== Capture Complete ===`);
console.log(`Total products: ${result.products.length}`);
console.log(`Saved to: ${outputPath}`);
console.log(`File size: ${(fs.statSync(outputPath).size / 1024).toFixed(1)} KB`);
return payload;
}
// Run
(async () => {
const payload = await capturePayload({
cName: 'AZ-Deeply-Rooted',
platformId: '6405ef617056e8014d79101b',
});
if (payload.products.length > 0) {
const sample = payload.products[0];
console.log(`\nSample: ${sample.Name || sample.name} - ${sample.brand?.name || sample.brandName}`);
}
})().catch(console.error);