Force new git SHA to avoid CI scientific notation bug. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
118 lines
3.4 KiB
TypeScript
118 lines
3.4 KiB
TypeScript
import puppeteer from 'puppeteer';
|
|
import fs from 'fs';
|
|
|
|
async function sleep(ms: number): Promise<void> {
|
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
}
|
|
|
|
async function main() {
|
|
const browser = await puppeteer.launch({
|
|
headless: true,
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
|
});
|
|
|
|
const page = await browser.newPage();
|
|
|
|
// Enable request interception but don't modify requests
|
|
await page.setRequestInterception(true);
|
|
|
|
const capturedProducts: any[] = [];
|
|
|
|
page.on('request', (request) => {
|
|
request.continue();
|
|
});
|
|
|
|
// Use CDP to intercept responses
|
|
const client = await page.target().createCDPSession();
|
|
await client.send('Network.enable');
|
|
|
|
client.on('Network.responseReceived', async (event) => {
|
|
const url = event.response.url;
|
|
if (url.includes('gapcommerceapi.com/product/search')) {
|
|
console.log('ES Response detected: ' + event.response.status);
|
|
|
|
try {
|
|
const response = await client.send('Network.getResponseBody', {
|
|
requestId: event.requestId,
|
|
});
|
|
|
|
const body = response.base64Encoded
|
|
? Buffer.from(response.body, 'base64').toString('utf8')
|
|
: response.body;
|
|
|
|
const json = JSON.parse(body);
|
|
const products = json.hits?.hits?.map((h: any) => h._source) || [];
|
|
capturedProducts.push(...products);
|
|
console.log('Captured ' + products.length + ' products (total: ' + capturedProducts.length + ')');
|
|
|
|
} catch (err: any) {
|
|
console.log('Could not get response body: ' + err.message);
|
|
}
|
|
}
|
|
});
|
|
|
|
console.log('Loading page with CDP interception...\n');
|
|
|
|
await page.goto('https://shop.bestdispensary.com/shop', {
|
|
waitUntil: 'networkidle2',
|
|
timeout: 60000
|
|
});
|
|
await sleep(3000);
|
|
|
|
// Bypass age gate
|
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
|
if (ageGate) {
|
|
console.log('Bypassing age gate...');
|
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
|
if (btn) await btn.click();
|
|
await sleep(3000);
|
|
}
|
|
|
|
// Click load more many times
|
|
console.log('\nClicking Load More...');
|
|
for (let i = 0; i < 30; i++) {
|
|
try {
|
|
const btn = await page.$('button.collection__load-more');
|
|
if (!btn) break;
|
|
|
|
const visible = await page.evaluate((b) => {
|
|
const rect = b.getBoundingClientRect();
|
|
return rect.width > 0 && rect.height > 0;
|
|
}, btn);
|
|
|
|
if (!visible) break;
|
|
|
|
await btn.click();
|
|
await sleep(1500);
|
|
console.log('Click ' + (i+1) + ': ' + capturedProducts.length + ' products');
|
|
} catch {
|
|
break;
|
|
}
|
|
}
|
|
|
|
console.log('\n=== RESULTS ===\n');
|
|
console.log('Total captured: ' + capturedProducts.length);
|
|
|
|
if (capturedProducts.length > 0) {
|
|
// Dedupe
|
|
const seen = new Set();
|
|
const unique = capturedProducts.filter(p => {
|
|
const id = p.id || p.productId;
|
|
if (!id || seen.has(id)) return false;
|
|
seen.add(id);
|
|
return true;
|
|
});
|
|
|
|
console.log('Unique products: ' + unique.length);
|
|
console.log('\nFields: ' + Object.keys(unique[0]).sort().join('\n'));
|
|
console.log('\nSample:\n' + JSON.stringify(unique[0], null, 2));
|
|
|
|
fs.writeFileSync('/tmp/treez-products.json', JSON.stringify(unique, null, 2));
|
|
console.log('\nSaved to /tmp/treez-products.json');
|
|
}
|
|
|
|
await browser.close();
|
|
}
|
|
|
|
main();
|