import puppeteer from 'puppeteer'; async function sleep(ms: number): Promise { return new Promise(resolve => setTimeout(resolve, ms)); } async function main() { const browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox'], }); const page = await browser.newPage(); await page.setViewport({ width: 1920, height: 1080 }); // Go to a brand page with products await page.goto('https://shop.bestdispensary.com/brand/best', { waitUntil: 'networkidle2', timeout: 60000 }); await sleep(3000); // Bypass age gate const ageGate = await page.$('[data-testid="age-gate-modal"]'); if (ageGate) { const btn = await page.$('[data-testid="age-gate-submit-button"]'); if (btn) await btn.click(); await sleep(2000); } // Get detailed product card structure console.log('Analyzing product card structure...\n'); const productData = await page.evaluate(() => { const products: any[] = []; document.querySelectorAll('a[href*="/product/"]').forEach((card: Element) => { const product: any = {}; // URL/slug product.href = card.getAttribute('href'); product.slug = product.href?.split('/product/')[1]; // Image const img = card.querySelector('img'); product.imageUrl = img?.getAttribute('src'); product.imageAlt = img?.getAttribute('alt'); // Name (usually in h3 or similar) const nameEl = card.querySelector('h3, h4, h5, [class*="name"], [class*="title"]'); product.name = nameEl?.textContent?.trim(); // Brand const brandEl = card.querySelector('[class*="brand"], [class*="Brand"]'); product.brand = brandEl?.textContent?.trim(); // Price const priceEl = card.querySelector('[class*="price"], [class*="Price"]'); product.priceText = priceEl?.textContent?.trim(); // Category/Type badges const badges: string[] = []; card.querySelectorAll('[class*="badge"], [class*="tag"], [class*="label"]').forEach((b: Element) => { const text = b.textContent?.trim(); if (text) badges.push(text); }); product.badges = badges; // THC/CBD info const thcEl = card.querySelector('[class*="thc"], [class*="THC"]'); const cbdEl = card.querySelector('[class*="cbd"], [class*="CBD"]'); product.thc = thcEl?.textContent?.trim(); product.cbd = cbdEl?.textContent?.trim(); // Weight/size const weightEl = card.querySelector('[class*="weight"], [class*="size"], [class*="gram"]'); product.weight = weightEl?.textContent?.trim(); // Get all text content for analysis product.allText = card.textContent?.replace(/\s+/g, ' ').trim().slice(0, 200); // Get all classes on the card product.cardClasses = card.className; products.push(product); }); return products; }); console.log('Found ' + productData.length + ' products\n'); console.log('Sample product data:\n'); // Show first 3 products in detail productData.slice(0, 3).forEach((p: any, i: number) => { console.log('Product ' + (i+1) + ':'); console.log(' Name: ' + p.name); console.log(' Brand: ' + p.brand); console.log(' Slug: ' + p.slug); console.log(' Price: ' + p.priceText); console.log(' THC: ' + p.thc); console.log(' CBD: ' + p.cbd); console.log(' Weight: ' + p.weight); console.log(' Badges: ' + JSON.stringify(p.badges)); console.log(' Image: ' + (p.imageUrl ? p.imageUrl.slice(0, 60) + '...' : 'none')); console.log(' All Text: ' + p.allText); console.log(''); }); // Now visit a product detail page if (productData.length > 0) { const productUrl = 'https://shop.bestdispensary.com' + productData[0].href; console.log('\n=== PRODUCT DETAIL PAGE ==='); console.log('Visiting: ' + productUrl + '\n'); await page.goto(productUrl, { waitUntil: 'networkidle2', timeout: 30000 }); await sleep(2000); const detailData = await page.evaluate(() => { const data: any = {}; // Get all text elements data.h1 = document.querySelector('h1')?.textContent?.trim(); data.h2s = Array.from(document.querySelectorAll('h2')).map(h => h.textContent?.trim()); // Price const priceEls = document.querySelectorAll('[class*="price"], [class*="Price"]'); data.prices = Array.from(priceEls).map(p => p.textContent?.trim()); // Description const descEl = document.querySelector('[class*="description"], [class*="Description"], p'); data.description = descEl?.textContent?.trim().slice(0, 300); // THC/CBD data.cannabinoids = []; document.querySelectorAll('[class*="thc"], [class*="THC"], [class*="cbd"], [class*="CBD"], [class*="cannabinoid"]').forEach(el => { data.cannabinoids.push(el.textContent?.trim()); }); // Category/strain type const typeEls = document.querySelectorAll('[class*="strain"], [class*="type"], [class*="category"]'); data.types = Array.from(typeEls).map(t => t.textContent?.trim()); // Weight options const weightEls = document.querySelectorAll('[class*="weight"], [class*="size"], [class*="option"]'); data.weights = Array.from(weightEls).map(w => w.textContent?.trim()).filter(w => w && w.length < 30); // Images const imgs = document.querySelectorAll('img[src*="product"], img[src*="menu"]'); data.images = Array.from(imgs).map(img => img.getAttribute('src')).slice(0, 3); // Get body text for analysis const main = document.querySelector('main'); data.mainText = main?.textContent?.replace(/\s+/g, ' ').trim().slice(0, 500); return data; }); console.log('Product Detail:'); console.log(' H1: ' + detailData.h1); console.log(' H2s: ' + JSON.stringify(detailData.h2s)); console.log(' Prices: ' + JSON.stringify(detailData.prices)); console.log(' Description: ' + (detailData.description || 'none')); console.log(' Cannabinoids: ' + JSON.stringify(detailData.cannabinoids)); console.log(' Types: ' + JSON.stringify(detailData.types)); console.log(' Weights: ' + JSON.stringify(detailData.weights)); console.log(' Images: ' + JSON.stringify(detailData.images)); console.log('\n Main text sample: ' + detailData.mainText); } await browser.close(); } main().catch(console.error);