Force new git SHA to avoid CI scientific notation bug. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
172 lines
5.4 KiB
TypeScript
172 lines
5.4 KiB
TypeScript
import puppeteer from 'puppeteer';
|
|
import fs from 'fs';
|
|
|
|
async function sleep(ms: number): Promise<void> {
|
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
}
|
|
|
|
async function main() {
|
|
const browser = await puppeteer.launch({
|
|
headless: true,
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
|
});
|
|
|
|
const page = await browser.newPage();
|
|
await page.setViewport({ width: 1920, height: 1080 });
|
|
|
|
console.log('Loading page...\n');
|
|
|
|
await page.goto('https://shop.bestdispensary.com/shop', {
|
|
waitUntil: 'networkidle2',
|
|
timeout: 60000
|
|
});
|
|
await sleep(3000);
|
|
|
|
// Bypass age gate
|
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
|
if (ageGate) {
|
|
console.log('Bypassing age gate...');
|
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
|
if (btn) await btn.click();
|
|
await sleep(3000);
|
|
}
|
|
|
|
// Extract __NEXT_DATA__
|
|
console.log('\n=== NEXT.JS DATA ===\n');
|
|
|
|
const nextData = await page.evaluate(() => {
|
|
const script = document.getElementById('__NEXT_DATA__');
|
|
if (script) {
|
|
try {
|
|
return JSON.parse(script.textContent || '');
|
|
} catch { return null; }
|
|
}
|
|
return null;
|
|
});
|
|
|
|
if (nextData) {
|
|
console.log('Top keys: ' + Object.keys(nextData).join(', '));
|
|
if (nextData.props?.pageProps) {
|
|
console.log('pageProps keys: ' + Object.keys(nextData.props.pageProps).join(', '));
|
|
|
|
// Look for products
|
|
const pp = nextData.props.pageProps;
|
|
if (pp.products) {
|
|
console.log('\nFound products: ' + pp.products.length);
|
|
if (pp.products[0]) {
|
|
console.log('Product fields: ' + Object.keys(pp.products[0]).join(', '));
|
|
console.log('\nSample:\n' + JSON.stringify(pp.products[0], null, 2));
|
|
}
|
|
}
|
|
if (pp.initialProducts) {
|
|
console.log('\nFound initialProducts: ' + pp.initialProducts.length);
|
|
}
|
|
if (pp.data) {
|
|
console.log('\nFound data: ' + (Array.isArray(pp.data) ? pp.data.length + ' items' : typeof pp.data));
|
|
}
|
|
}
|
|
}
|
|
|
|
// Also check window object
|
|
console.log('\n=== WINDOW GLOBALS ===\n');
|
|
|
|
const windowData = await page.evaluate(() => {
|
|
const win = window as any;
|
|
const result: any = {};
|
|
|
|
// Common patterns for storing product data
|
|
const patterns = ['products', 'items', 'data', 'state', 'store', 'redux', 'apollo'];
|
|
|
|
Object.keys(win).forEach(key => {
|
|
const lowerKey = key.toLowerCase();
|
|
if (patterns.some(p => lowerKey.includes(p))) {
|
|
try {
|
|
const val = win[key];
|
|
if (typeof val === 'object' && val !== null) {
|
|
result[key] = {
|
|
type: Array.isArray(val) ? 'array' : 'object',
|
|
keys: Object.keys(val).slice(0, 10),
|
|
length: Array.isArray(val) ? val.length : undefined,
|
|
};
|
|
}
|
|
} catch {}
|
|
}
|
|
});
|
|
|
|
return result;
|
|
});
|
|
|
|
console.log('Window globals with data-like names:');
|
|
Object.entries(windowData).forEach(([k, v]: [string, any]) => {
|
|
console.log(' ' + k + ': ' + v.type + (v.length ? ' (' + v.length + ')' : '') + ' - keys: ' + v.keys?.join(', '));
|
|
});
|
|
|
|
// Try to find React state
|
|
console.log('\n=== EXTRACTING FROM DOM ===\n');
|
|
|
|
const domProducts = await page.evaluate(() => {
|
|
const products: any[] = [];
|
|
|
|
document.querySelectorAll('a[href*="/product/"]').forEach((card: Element) => {
|
|
const product: any = {};
|
|
|
|
product.href = card.getAttribute('href');
|
|
product.name = card.querySelector('h3, h4, h5')?.textContent?.trim();
|
|
|
|
// Get all text
|
|
const allText = card.textContent || '';
|
|
|
|
// Extract THC %
|
|
const thcMatch = allText.match(/(\d+(?:\.\d+)?)\s*%/);
|
|
if (thcMatch) product.thc = thcMatch[1];
|
|
|
|
// Extract price
|
|
const priceMatch = allText.match(/\$(\d+(?:\.\d+)?)/);
|
|
if (priceMatch) product.price = priceMatch[1];
|
|
|
|
// Extract weight
|
|
const weightMatch = allText.match(/(\d+(?:\.\d+)?)\s*[gG]/);
|
|
if (weightMatch) product.weight = weightMatch[1] + 'g';
|
|
|
|
// Get brand from card
|
|
const brandEl = card.querySelector('[class*="brand"]');
|
|
product.brand = brandEl?.textContent?.trim();
|
|
|
|
// Get strain type
|
|
const strainTypes = ['Indica', 'Sativa', 'Hybrid', 'I/S', 'S/I', 'CBD'];
|
|
strainTypes.forEach(st => {
|
|
if (allText.includes(st)) product.strainType = st;
|
|
});
|
|
|
|
// Get image
|
|
const img = card.querySelector('img');
|
|
product.image = img?.getAttribute('src');
|
|
|
|
products.push(product);
|
|
});
|
|
|
|
return products;
|
|
});
|
|
|
|
console.log('Products from DOM: ' + domProducts.length);
|
|
if (domProducts.length > 0) {
|
|
console.log('\nSample:\n' + JSON.stringify(domProducts[0], null, 2));
|
|
|
|
// Show variety
|
|
console.log('\n=== DATA QUALITY ===');
|
|
const withThc = domProducts.filter(p => p.thc).length;
|
|
const withPrice = domProducts.filter(p => p.price).length;
|
|
const withBrand = domProducts.filter(p => p.brand).length;
|
|
const withStrain = domProducts.filter(p => p.strainType).length;
|
|
|
|
console.log('With THC%: ' + withThc + '/' + domProducts.length);
|
|
console.log('With Price: ' + withPrice + '/' + domProducts.length);
|
|
console.log('With Brand: ' + withBrand + '/' + domProducts.length);
|
|
console.log('With Strain: ' + withStrain + '/' + domProducts.length);
|
|
}
|
|
|
|
await browser.close();
|
|
}
|
|
|
|
main();
|