chore: Clean up deprecated code and docs
- Move deprecated directories to src/_deprecated/: - hydration/ (old pipeline approach) - scraper-v2/ (old Puppeteer scraper) - canonical-hydration/ (merged into tasks) - Unused services: availability, crawler-logger, geolocation, etc - Unused utils: age-gate-playwright, HomepageValidator, stealthBrowser - Archive outdated docs to docs/_archive/: - ANALYTICS_RUNBOOK.md - ANALYTICS_V2_EXAMPLES.md - BRAND_INTELLIGENCE_API.md - CRAWL_PIPELINE.md - TASK_WORKFLOW_2024-12-10.md - WORKER_TASK_ARCHITECTURE.md - ORGANIC_SCRAPING_GUIDE.md - Add docs/CODEBASE_MAP.md as single source of truth - Add warning files to deprecated/archived directories - Slim down CLAUDE.md to essential rules only 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
180
backend/test-intercept.js
Normal file
180
backend/test-intercept.js
Normal file
@@ -0,0 +1,180 @@
|
||||
/**
|
||||
* Stealth Browser Payload Capture - Direct GraphQL Injection
|
||||
*
|
||||
* Uses the browser session to make GraphQL requests that look organic.
|
||||
* Adds proper headers matching what Dutchie's frontend sends.
|
||||
*/
|
||||
|
||||
const puppeteer = require('puppeteer-extra');
|
||||
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
|
||||
const fs = require('fs');
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
async function capturePayload(config) {
|
||||
const {
|
||||
dispensaryId = null,
|
||||
platformId,
|
||||
cName,
|
||||
outputPath = `/tmp/payload_${cName}_${Date.now()}.json`,
|
||||
} = config;
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: 'new',
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
|
||||
// Establish session by visiting the embedded menu
|
||||
const embedUrl = `https://dutchie.com/embedded-menu/${cName}?menuType=rec`;
|
||||
console.log(`[Capture] Establishing session at ${embedUrl}...`);
|
||||
|
||||
await page.goto(embedUrl, {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
|
||||
console.log('[Capture] Session established, fetching ALL products...');
|
||||
|
||||
// Fetch all products using GET requests with proper headers
|
||||
const result = await page.evaluate(async (platformId, cName) => {
|
||||
const allProducts = [];
|
||||
const logs = [];
|
||||
let pageNum = 0;
|
||||
const perPage = 100;
|
||||
let totalCount = 0;
|
||||
const sessionId = 'browser-session-' + Date.now();
|
||||
|
||||
try {
|
||||
while (pageNum < 30) { // Max 30 pages = 3000 products
|
||||
const variables = {
|
||||
includeEnterpriseSpecials: false,
|
||||
productsFilter: {
|
||||
dispensaryId: platformId,
|
||||
pricingType: 'rec',
|
||||
Status: 'Active', // 'Active' for in-stock products per CLAUDE.md
|
||||
types: [],
|
||||
useCache: true,
|
||||
isDefaultSort: true,
|
||||
sortBy: 'popularSortIdx',
|
||||
sortDirection: 1,
|
||||
bypassOnlineThresholds: true,
|
||||
isKioskMenu: false,
|
||||
removeProductsBelowOptionThresholds: false,
|
||||
},
|
||||
page: pageNum,
|
||||
perPage: perPage,
|
||||
};
|
||||
|
||||
const extensions = {
|
||||
persistedQuery: {
|
||||
version: 1,
|
||||
sha256Hash: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0'
|
||||
}
|
||||
};
|
||||
|
||||
// Build GET URL like the browser does
|
||||
const qs = new URLSearchParams({
|
||||
operationName: 'FilteredProducts',
|
||||
variables: JSON.stringify(variables),
|
||||
extensions: JSON.stringify(extensions)
|
||||
});
|
||||
const url = `https://dutchie.com/api-3/graphql?${qs.toString()}`;
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'Accept': 'application/json',
|
||||
'content-type': 'application/json',
|
||||
'x-dutchie-session': sessionId,
|
||||
'apollographql-client-name': 'Marketplace (production)',
|
||||
},
|
||||
credentials: 'include'
|
||||
});
|
||||
|
||||
logs.push(`Page ${pageNum}: HTTP ${response.status}`);
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text();
|
||||
logs.push(`HTTP error: ${response.status} - ${text.slice(0, 200)}`);
|
||||
break;
|
||||
}
|
||||
|
||||
const json = await response.json();
|
||||
|
||||
if (json.errors) {
|
||||
logs.push(`GraphQL error: ${JSON.stringify(json.errors).slice(0, 200)}`);
|
||||
break;
|
||||
}
|
||||
|
||||
const data = json?.data?.filteredProducts;
|
||||
if (!data || !data.products) {
|
||||
logs.push('No products in response');
|
||||
break;
|
||||
}
|
||||
|
||||
const products = data.products;
|
||||
allProducts.push(...products);
|
||||
|
||||
if (pageNum === 0) {
|
||||
totalCount = data.queryInfo?.totalCount || 0;
|
||||
logs.push(`Total reported: ${totalCount}`);
|
||||
}
|
||||
|
||||
logs.push(`Got ${products.length} products (total: ${allProducts.length}/${totalCount})`);
|
||||
|
||||
if (allProducts.length >= totalCount || products.length < perPage) {
|
||||
break;
|
||||
}
|
||||
|
||||
pageNum++;
|
||||
|
||||
// Small delay between pages to be polite
|
||||
await new Promise(r => setTimeout(r, 200));
|
||||
}
|
||||
} catch (err) {
|
||||
logs.push(`Error: ${err.message}`);
|
||||
}
|
||||
|
||||
return { products: allProducts, totalCount, logs };
|
||||
}, platformId, cName);
|
||||
|
||||
await browser.close();
|
||||
|
||||
// Print logs from browser context
|
||||
result.logs.forEach(log => console.log(`[Browser] ${log}`));
|
||||
|
||||
console.log(`[Capture] Got ${result.products.length} products (API reported ${result.totalCount})`);
|
||||
|
||||
const payload = {
|
||||
dispensaryId: dispensaryId,
|
||||
platformId: platformId,
|
||||
cName,
|
||||
fetchedAt: new Date().toISOString(),
|
||||
productCount: result.products.length,
|
||||
products: result.products,
|
||||
};
|
||||
|
||||
fs.writeFileSync(outputPath, JSON.stringify(payload, null, 2));
|
||||
|
||||
console.log(`\n=== Capture Complete ===`);
|
||||
console.log(`Total products: ${result.products.length}`);
|
||||
console.log(`Saved to: ${outputPath}`);
|
||||
console.log(`File size: ${(fs.statSync(outputPath).size / 1024).toFixed(1)} KB`);
|
||||
|
||||
return payload;
|
||||
}
|
||||
|
||||
// Run
|
||||
(async () => {
|
||||
const payload = await capturePayload({
|
||||
cName: 'AZ-Deeply-Rooted',
|
||||
platformId: '6405ef617056e8014d79101b',
|
||||
});
|
||||
|
||||
if (payload.products.length > 0) {
|
||||
const sample = payload.products[0];
|
||||
console.log(`\nSample: ${sample.Name || sample.name} - ${sample.brand?.name || sample.brandName}`);
|
||||
}
|
||||
})().catch(console.error);
|
||||
Reference in New Issue
Block a user