Force new git SHA to avoid CI scientific notation bug. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
175 lines
6.4 KiB
TypeScript
175 lines
6.4 KiB
TypeScript
import puppeteer from 'puppeteer';
|
|
|
|
async function sleep(ms: number): Promise<void> {
|
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
}
|
|
|
|
async function main() {
|
|
const browser = await puppeteer.launch({
|
|
headless: true,
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
|
});
|
|
|
|
const page = await browser.newPage();
|
|
await page.setViewport({ width: 1920, height: 1080 });
|
|
|
|
// Go to a brand page with products
|
|
await page.goto('https://shop.bestdispensary.com/brand/best', {
|
|
waitUntil: 'networkidle2',
|
|
timeout: 60000
|
|
});
|
|
await sleep(3000);
|
|
|
|
// Bypass age gate
|
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
|
if (ageGate) {
|
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
|
if (btn) await btn.click();
|
|
await sleep(2000);
|
|
}
|
|
|
|
// Get detailed product card structure
|
|
console.log('Analyzing product card structure...\n');
|
|
|
|
const productData = await page.evaluate(() => {
|
|
const products: any[] = [];
|
|
|
|
document.querySelectorAll('a[href*="/product/"]').forEach((card: Element) => {
|
|
const product: any = {};
|
|
|
|
// URL/slug
|
|
product.href = card.getAttribute('href');
|
|
product.slug = product.href?.split('/product/')[1];
|
|
|
|
// Image
|
|
const img = card.querySelector('img');
|
|
product.imageUrl = img?.getAttribute('src');
|
|
product.imageAlt = img?.getAttribute('alt');
|
|
|
|
// Name (usually in h3 or similar)
|
|
const nameEl = card.querySelector('h3, h4, h5, [class*="name"], [class*="title"]');
|
|
product.name = nameEl?.textContent?.trim();
|
|
|
|
// Brand
|
|
const brandEl = card.querySelector('[class*="brand"], [class*="Brand"]');
|
|
product.brand = brandEl?.textContent?.trim();
|
|
|
|
// Price
|
|
const priceEl = card.querySelector('[class*="price"], [class*="Price"]');
|
|
product.priceText = priceEl?.textContent?.trim();
|
|
|
|
// Category/Type badges
|
|
const badges: string[] = [];
|
|
card.querySelectorAll('[class*="badge"], [class*="tag"], [class*="label"]').forEach((b: Element) => {
|
|
const text = b.textContent?.trim();
|
|
if (text) badges.push(text);
|
|
});
|
|
product.badges = badges;
|
|
|
|
// THC/CBD info
|
|
const thcEl = card.querySelector('[class*="thc"], [class*="THC"]');
|
|
const cbdEl = card.querySelector('[class*="cbd"], [class*="CBD"]');
|
|
product.thc = thcEl?.textContent?.trim();
|
|
product.cbd = cbdEl?.textContent?.trim();
|
|
|
|
// Weight/size
|
|
const weightEl = card.querySelector('[class*="weight"], [class*="size"], [class*="gram"]');
|
|
product.weight = weightEl?.textContent?.trim();
|
|
|
|
// Get all text content for analysis
|
|
product.allText = card.textContent?.replace(/\s+/g, ' ').trim().slice(0, 200);
|
|
|
|
// Get all classes on the card
|
|
product.cardClasses = card.className;
|
|
|
|
products.push(product);
|
|
});
|
|
|
|
return products;
|
|
});
|
|
|
|
console.log('Found ' + productData.length + ' products\n');
|
|
console.log('Sample product data:\n');
|
|
|
|
// Show first 3 products in detail
|
|
productData.slice(0, 3).forEach((p: any, i: number) => {
|
|
console.log('Product ' + (i+1) + ':');
|
|
console.log(' Name: ' + p.name);
|
|
console.log(' Brand: ' + p.brand);
|
|
console.log(' Slug: ' + p.slug);
|
|
console.log(' Price: ' + p.priceText);
|
|
console.log(' THC: ' + p.thc);
|
|
console.log(' CBD: ' + p.cbd);
|
|
console.log(' Weight: ' + p.weight);
|
|
console.log(' Badges: ' + JSON.stringify(p.badges));
|
|
console.log(' Image: ' + (p.imageUrl ? p.imageUrl.slice(0, 60) + '...' : 'none'));
|
|
console.log(' All Text: ' + p.allText);
|
|
console.log('');
|
|
});
|
|
|
|
// Now visit a product detail page
|
|
if (productData.length > 0) {
|
|
const productUrl = 'https://shop.bestdispensary.com' + productData[0].href;
|
|
console.log('\n=== PRODUCT DETAIL PAGE ===');
|
|
console.log('Visiting: ' + productUrl + '\n');
|
|
|
|
await page.goto(productUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
await sleep(2000);
|
|
|
|
const detailData = await page.evaluate(() => {
|
|
const data: any = {};
|
|
|
|
// Get all text elements
|
|
data.h1 = document.querySelector('h1')?.textContent?.trim();
|
|
data.h2s = Array.from(document.querySelectorAll('h2')).map(h => h.textContent?.trim());
|
|
|
|
// Price
|
|
const priceEls = document.querySelectorAll('[class*="price"], [class*="Price"]');
|
|
data.prices = Array.from(priceEls).map(p => p.textContent?.trim());
|
|
|
|
// Description
|
|
const descEl = document.querySelector('[class*="description"], [class*="Description"], p');
|
|
data.description = descEl?.textContent?.trim().slice(0, 300);
|
|
|
|
// THC/CBD
|
|
data.cannabinoids = [];
|
|
document.querySelectorAll('[class*="thc"], [class*="THC"], [class*="cbd"], [class*="CBD"], [class*="cannabinoid"]').forEach(el => {
|
|
data.cannabinoids.push(el.textContent?.trim());
|
|
});
|
|
|
|
// Category/strain type
|
|
const typeEls = document.querySelectorAll('[class*="strain"], [class*="type"], [class*="category"]');
|
|
data.types = Array.from(typeEls).map(t => t.textContent?.trim());
|
|
|
|
// Weight options
|
|
const weightEls = document.querySelectorAll('[class*="weight"], [class*="size"], [class*="option"]');
|
|
data.weights = Array.from(weightEls).map(w => w.textContent?.trim()).filter(w => w && w.length < 30);
|
|
|
|
// Images
|
|
const imgs = document.querySelectorAll('img[src*="product"], img[src*="menu"]');
|
|
data.images = Array.from(imgs).map(img => img.getAttribute('src')).slice(0, 3);
|
|
|
|
// Get body text for analysis
|
|
const main = document.querySelector('main');
|
|
data.mainText = main?.textContent?.replace(/\s+/g, ' ').trim().slice(0, 500);
|
|
|
|
return data;
|
|
});
|
|
|
|
console.log('Product Detail:');
|
|
console.log(' H1: ' + detailData.h1);
|
|
console.log(' H2s: ' + JSON.stringify(detailData.h2s));
|
|
console.log(' Prices: ' + JSON.stringify(detailData.prices));
|
|
console.log(' Description: ' + (detailData.description || 'none'));
|
|
console.log(' Cannabinoids: ' + JSON.stringify(detailData.cannabinoids));
|
|
console.log(' Types: ' + JSON.stringify(detailData.types));
|
|
console.log(' Weights: ' + JSON.stringify(detailData.weights));
|
|
console.log(' Images: ' + JSON.stringify(detailData.images));
|
|
console.log('\n Main text sample: ' + detailData.mainText);
|
|
}
|
|
|
|
await browser.close();
|
|
}
|
|
|
|
main().catch(console.error);
|