/** * Test Treez brand-based product extraction * 1. Load /brands page * 2. Click "load more brands" to get all brands * 3. Extract brand URLs * 4. Visit each brand and extract products */ import puppeteer, { Page } from 'puppeteer'; const STORE_ID = 'best'; async function sleep(ms: number): Promise { return new Promise(resolve => setTimeout(resolve, ms)); } async function bypassAgeGate(page: Page): Promise { const ageGate = await page.$('[data-testid="age-gate-modal"]'); if (ageGate) { console.log('[AgeGate] Detected, bypassing...'); const btn = await page.$('[data-testid="age-gate-submit-button"]'); if (btn) await btn.click(); await sleep(2000); } } async function loadAllBrands(page: Page): Promise { console.log('[Brands] Looking for "load more" option...'); // Look for select/dropdown with "load more" or "all brands" option const selectInfo = await page.evaluate(() => { const selects = document.querySelectorAll('select'); const info: { selector: string; options: string[] }[] = []; selects.forEach((sel, i) => { const options = Array.from(sel.options).map(o => o.text); info.push({ selector: `select:nth-of-type(${i + 1})`, options }); }); return info; }); console.log('[Brands] Found selects:', JSON.stringify(selectInfo, null, 2)); // Look for any button or link with "load more" or "show all" const loadMoreButtons = await page.evaluate(() => { const elements = document.querySelectorAll('button, a, [role="button"]'); const matches: { text: string; tag: string }[] = []; elements.forEach(el => { const text = el.textContent?.toLowerCase() || ''; if (text.includes('load more') || text.includes('show all') || text.includes('view all')) { matches.push({ text: el.textContent?.trim() || '', tag: el.tagName }); } }); return matches; }); console.log('[Brands] Found load more buttons:', loadMoreButtons); // Try to find and interact with the brands dropdown // First, let's see all interactive elements with "brand" in them const brandElements = await page.evaluate(() => { const all = document.querySelectorAll('*'); const matches: { tag: string; class: string; text: string }[] = []; all.forEach(el => { const className = el.className?.toString?.() || ''; const text = el.textContent?.trim().slice(0, 100) || ''; if (className.toLowerCase().includes('brand') || className.toLowerCase().includes('select')) { matches.push({ tag: el.tagName, class: className.slice(0, 100), text: text.slice(0, 50), }); } }); return matches.slice(0, 20); }); console.log('[Brands] Brand-related elements:', JSON.stringify(brandElements.slice(0, 10), null, 2)); } async function extractBrandLinks(page: Page): Promise<{ name: string; url: string }[]> { const brands = await page.evaluate(() => { const links: { name: string; url: string }[] = []; // Look for brand cards/links const selectors = [ 'a[href*="/brand/"]', 'a[href*="/brands/"]', '[class*="brand"] a', '[class*="Brand"] a', ]; selectors.forEach(sel => { document.querySelectorAll(sel).forEach(el => { const href = el.getAttribute('href'); const name = el.textContent?.trim() || ''; if (href && name && !links.some(l => l.url === href)) { links.push({ name, url: href }); } }); }); return links; }); return brands; } async function extractProductsFromBrandPage(page: Page): Promise { // Scroll to load all products let previousHeight = 0; let scrollCount = 0; let sameHeightCount = 0; while (scrollCount < 20) { const currentHeight = await page.evaluate(() => document.body.scrollHeight); if (currentHeight === previousHeight) { sameHeightCount++; if (sameHeightCount >= 3) break; } else { sameHeightCount = 0; } await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); await sleep(1000); previousHeight = currentHeight; scrollCount++; } // Extract products const products = await page.evaluate(() => { const results: any[] = []; const seen = new Set(); document.querySelectorAll('[class*="product_product__"]').forEach(el => { const nameEl = el.querySelector('[class*="product__name"], [class*="name__"]'); const name = nameEl?.textContent?.trim() || ''; if (!name || seen.has(name)) return; seen.add(name); const priceEl = el.querySelector('[class*="price"]'); const priceText = priceEl?.textContent || ''; const priceMatch = priceText.match(/\$(\d+(?:\.\d{2})?)/); const price = priceMatch ? parseFloat(priceMatch[1]) : null; const linkEl = el.querySelector('a[href*="/product/"]'); let productId = ''; if (linkEl) { const href = linkEl.getAttribute('href') || ''; const match = href.match(/\/product\/([^\/?]+)/); productId = match ? match[1] : ''; } results.push({ productId: productId || `treez_${name.replace(/\s+/g, '_').toLowerCase().slice(0, 30)}`, name, price, }); }); return results; }); return products; } async function main() { console.log('='.repeat(60)); console.log('Testing Treez Brand-Based Extraction'); console.log('='.repeat(60)); const browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox'], }); const page = await browser.newPage(); await page.setViewport({ width: 1920, height: 1080 }); // Block images await page.setRequestInterception(true); page.on('request', (req) => { if (['image', 'font', 'media'].includes(req.resourceType())) { req.abort(); } else { req.continue(); } }); try { // Navigate to brands page const brandsUrl = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`; console.log(`\n[1] Navigating to ${brandsUrl}`); await page.goto(brandsUrl, { waitUntil: 'networkidle2', timeout: 60000 }); await sleep(2000); await bypassAgeGate(page); await sleep(1000); // Screenshot to see what we're working with await page.screenshot({ path: '/tmp/treez-brands-page.png', fullPage: false }); console.log('[1] Screenshot saved to /tmp/treez-brands-page.png'); // Try to load all brands console.log('\n[2] Exploring brand selection options...'); await loadAllBrands(page); // Extract brand links console.log('\n[3] Extracting brand links...'); const brandLinks = await extractBrandLinks(page); console.log(`Found ${brandLinks.length} brand links:`); brandLinks.slice(0, 10).forEach(b => console.log(` - ${b.name}: ${b.url}`)); // If we found brand links, visit a couple to test if (brandLinks.length > 0) { console.log('\n[4] Testing product extraction from first 3 brands...'); let totalProducts = 0; const allProducts: any[] = []; for (const brand of brandLinks.slice(0, 3)) { const brandUrl = brand.url.startsWith('http') ? brand.url : `https://${STORE_ID}.treez.io${brand.url}`; console.log(`\n Visiting brand: ${brand.name}`); console.log(` URL: ${brandUrl}`); await page.goto(brandUrl, { waitUntil: 'networkidle2', timeout: 30000 }); await sleep(2000); const products = await extractProductsFromBrandPage(page); console.log(` Products found: ${products.length}`); allProducts.push(...products.map(p => ({ ...p, brand: brand.name }))); totalProducts += products.length; } console.log(`\n[5] Summary from 3 brands: ${totalProducts} products`); console.log(`Estimated total (${brandLinks.length} brands): ~${Math.round(totalProducts / 3 * brandLinks.length)} products`); } } catch (error: any) { console.error('Error:', error.message); } finally { await browser.close(); } } main().catch(console.error);