Files
cannaiq/backend/scripts/test-treez-product-data.ts
Kelly 698995e46f chore: bump task worker version comment
Force new git SHA to avoid CI scientific notation bug.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 02:02:30 -07:00

175 lines
6.4 KiB
TypeScript

import puppeteer from 'puppeteer';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Go to a brand page with products
await page.goto('https://shop.bestdispensary.com/brand/best', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
// Get detailed product card structure
console.log('Analyzing product card structure...\n');
const productData = await page.evaluate(() => {
const products: any[] = [];
document.querySelectorAll('a[href*="/product/"]').forEach((card: Element) => {
const product: any = {};
// URL/slug
product.href = card.getAttribute('href');
product.slug = product.href?.split('/product/')[1];
// Image
const img = card.querySelector('img');
product.imageUrl = img?.getAttribute('src');
product.imageAlt = img?.getAttribute('alt');
// Name (usually in h3 or similar)
const nameEl = card.querySelector('h3, h4, h5, [class*="name"], [class*="title"]');
product.name = nameEl?.textContent?.trim();
// Brand
const brandEl = card.querySelector('[class*="brand"], [class*="Brand"]');
product.brand = brandEl?.textContent?.trim();
// Price
const priceEl = card.querySelector('[class*="price"], [class*="Price"]');
product.priceText = priceEl?.textContent?.trim();
// Category/Type badges
const badges: string[] = [];
card.querySelectorAll('[class*="badge"], [class*="tag"], [class*="label"]').forEach((b: Element) => {
const text = b.textContent?.trim();
if (text) badges.push(text);
});
product.badges = badges;
// THC/CBD info
const thcEl = card.querySelector('[class*="thc"], [class*="THC"]');
const cbdEl = card.querySelector('[class*="cbd"], [class*="CBD"]');
product.thc = thcEl?.textContent?.trim();
product.cbd = cbdEl?.textContent?.trim();
// Weight/size
const weightEl = card.querySelector('[class*="weight"], [class*="size"], [class*="gram"]');
product.weight = weightEl?.textContent?.trim();
// Get all text content for analysis
product.allText = card.textContent?.replace(/\s+/g, ' ').trim().slice(0, 200);
// Get all classes on the card
product.cardClasses = card.className;
products.push(product);
});
return products;
});
console.log('Found ' + productData.length + ' products\n');
console.log('Sample product data:\n');
// Show first 3 products in detail
productData.slice(0, 3).forEach((p: any, i: number) => {
console.log('Product ' + (i+1) + ':');
console.log(' Name: ' + p.name);
console.log(' Brand: ' + p.brand);
console.log(' Slug: ' + p.slug);
console.log(' Price: ' + p.priceText);
console.log(' THC: ' + p.thc);
console.log(' CBD: ' + p.cbd);
console.log(' Weight: ' + p.weight);
console.log(' Badges: ' + JSON.stringify(p.badges));
console.log(' Image: ' + (p.imageUrl ? p.imageUrl.slice(0, 60) + '...' : 'none'));
console.log(' All Text: ' + p.allText);
console.log('');
});
// Now visit a product detail page
if (productData.length > 0) {
const productUrl = 'https://shop.bestdispensary.com' + productData[0].href;
console.log('\n=== PRODUCT DETAIL PAGE ===');
console.log('Visiting: ' + productUrl + '\n');
await page.goto(productUrl, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(2000);
const detailData = await page.evaluate(() => {
const data: any = {};
// Get all text elements
data.h1 = document.querySelector('h1')?.textContent?.trim();
data.h2s = Array.from(document.querySelectorAll('h2')).map(h => h.textContent?.trim());
// Price
const priceEls = document.querySelectorAll('[class*="price"], [class*="Price"]');
data.prices = Array.from(priceEls).map(p => p.textContent?.trim());
// Description
const descEl = document.querySelector('[class*="description"], [class*="Description"], p');
data.description = descEl?.textContent?.trim().slice(0, 300);
// THC/CBD
data.cannabinoids = [];
document.querySelectorAll('[class*="thc"], [class*="THC"], [class*="cbd"], [class*="CBD"], [class*="cannabinoid"]').forEach(el => {
data.cannabinoids.push(el.textContent?.trim());
});
// Category/strain type
const typeEls = document.querySelectorAll('[class*="strain"], [class*="type"], [class*="category"]');
data.types = Array.from(typeEls).map(t => t.textContent?.trim());
// Weight options
const weightEls = document.querySelectorAll('[class*="weight"], [class*="size"], [class*="option"]');
data.weights = Array.from(weightEls).map(w => w.textContent?.trim()).filter(w => w && w.length < 30);
// Images
const imgs = document.querySelectorAll('img[src*="product"], img[src*="menu"]');
data.images = Array.from(imgs).map(img => img.getAttribute('src')).slice(0, 3);
// Get body text for analysis
const main = document.querySelector('main');
data.mainText = main?.textContent?.replace(/\s+/g, ' ').trim().slice(0, 500);
return data;
});
console.log('Product Detail:');
console.log(' H1: ' + detailData.h1);
console.log(' H2s: ' + JSON.stringify(detailData.h2s));
console.log(' Prices: ' + JSON.stringify(detailData.prices));
console.log(' Description: ' + (detailData.description || 'none'));
console.log(' Cannabinoids: ' + JSON.stringify(detailData.cannabinoids));
console.log(' Types: ' + JSON.stringify(detailData.types));
console.log(' Weights: ' + JSON.stringify(detailData.weights));
console.log(' Images: ' + JSON.stringify(detailData.images));
console.log('\n Main text sample: ' + detailData.mainText);
}
await browser.close();
}
main().catch(console.error);