Files
cannaiq/backend/scripts/test-treez-page-state.ts
Kelly 698995e46f chore: bump task worker version comment
Force new git SHA to avoid CI scientific notation bug.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 02:02:30 -07:00

172 lines
5.4 KiB
TypeScript

import puppeteer from 'puppeteer';
import fs from 'fs';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
console.log('Loading page...\n');
await page.goto('https://shop.bestdispensary.com/shop', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log('Bypassing age gate...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(3000);
}
// Extract __NEXT_DATA__
console.log('\n=== NEXT.JS DATA ===\n');
const nextData = await page.evaluate(() => {
const script = document.getElementById('__NEXT_DATA__');
if (script) {
try {
return JSON.parse(script.textContent || '');
} catch { return null; }
}
return null;
});
if (nextData) {
console.log('Top keys: ' + Object.keys(nextData).join(', '));
if (nextData.props?.pageProps) {
console.log('pageProps keys: ' + Object.keys(nextData.props.pageProps).join(', '));
// Look for products
const pp = nextData.props.pageProps;
if (pp.products) {
console.log('\nFound products: ' + pp.products.length);
if (pp.products[0]) {
console.log('Product fields: ' + Object.keys(pp.products[0]).join(', '));
console.log('\nSample:\n' + JSON.stringify(pp.products[0], null, 2));
}
}
if (pp.initialProducts) {
console.log('\nFound initialProducts: ' + pp.initialProducts.length);
}
if (pp.data) {
console.log('\nFound data: ' + (Array.isArray(pp.data) ? pp.data.length + ' items' : typeof pp.data));
}
}
}
// Also check window object
console.log('\n=== WINDOW GLOBALS ===\n');
const windowData = await page.evaluate(() => {
const win = window as any;
const result: any = {};
// Common patterns for storing product data
const patterns = ['products', 'items', 'data', 'state', 'store', 'redux', 'apollo'];
Object.keys(win).forEach(key => {
const lowerKey = key.toLowerCase();
if (patterns.some(p => lowerKey.includes(p))) {
try {
const val = win[key];
if (typeof val === 'object' && val !== null) {
result[key] = {
type: Array.isArray(val) ? 'array' : 'object',
keys: Object.keys(val).slice(0, 10),
length: Array.isArray(val) ? val.length : undefined,
};
}
} catch {}
}
});
return result;
});
console.log('Window globals with data-like names:');
Object.entries(windowData).forEach(([k, v]: [string, any]) => {
console.log(' ' + k + ': ' + v.type + (v.length ? ' (' + v.length + ')' : '') + ' - keys: ' + v.keys?.join(', '));
});
// Try to find React state
console.log('\n=== EXTRACTING FROM DOM ===\n');
const domProducts = await page.evaluate(() => {
const products: any[] = [];
document.querySelectorAll('a[href*="/product/"]').forEach((card: Element) => {
const product: any = {};
product.href = card.getAttribute('href');
product.name = card.querySelector('h3, h4, h5')?.textContent?.trim();
// Get all text
const allText = card.textContent || '';
// Extract THC %
const thcMatch = allText.match(/(\d+(?:\.\d+)?)\s*%/);
if (thcMatch) product.thc = thcMatch[1];
// Extract price
const priceMatch = allText.match(/\$(\d+(?:\.\d+)?)/);
if (priceMatch) product.price = priceMatch[1];
// Extract weight
const weightMatch = allText.match(/(\d+(?:\.\d+)?)\s*[gG]/);
if (weightMatch) product.weight = weightMatch[1] + 'g';
// Get brand from card
const brandEl = card.querySelector('[class*="brand"]');
product.brand = brandEl?.textContent?.trim();
// Get strain type
const strainTypes = ['Indica', 'Sativa', 'Hybrid', 'I/S', 'S/I', 'CBD'];
strainTypes.forEach(st => {
if (allText.includes(st)) product.strainType = st;
});
// Get image
const img = card.querySelector('img');
product.image = img?.getAttribute('src');
products.push(product);
});
return products;
});
console.log('Products from DOM: ' + domProducts.length);
if (domProducts.length > 0) {
console.log('\nSample:\n' + JSON.stringify(domProducts[0], null, 2));
// Show variety
console.log('\n=== DATA QUALITY ===');
const withThc = domProducts.filter(p => p.thc).length;
const withPrice = domProducts.filter(p => p.price).length;
const withBrand = domProducts.filter(p => p.brand).length;
const withStrain = domProducts.filter(p => p.strainType).length;
console.log('With THC%: ' + withThc + '/' + domProducts.length);
console.log('With Price: ' + withPrice + '/' + domProducts.length);
console.log('With Brand: ' + withBrand + '/' + domProducts.length);
console.log('With Strain: ' + withStrain + '/' + domProducts.length);
}
await browser.close();
}
main();