import { chromium as playwright } from 'playwright-extra'; import StealthPlugin from 'puppeteer-extra-plugin-stealth'; import fs from 'fs/promises'; import path from 'path'; playwright.use(StealthPlugin()); type Product = { name: string; brand?: string; price?: number; size?: string; category?: string; url?: string; imageUrl?: string; inStock: boolean; }; type BrandGroup = { brand: string; products: Product[]; }; const TARGET_URL = 'https://dutchie.com/embedded-menu/AZ-Deeply-Rooted'; const OUTPUT_DIR = path.join(process.cwd(), 'scrape-output', 'deeply-rooted'); const JSON_PATH = path.join(OUTPUT_DIR, 'inventory-by-brand.json'); async function ensureDirs(): Promise { await fs.mkdir(OUTPUT_DIR, { recursive: true }); } async function waitForCloudflare(page: any, maxWaitMs = 60000): Promise { const start = Date.now(); while (Date.now() - start < maxWaitMs) { const title = await page.title().catch(() => ''); const content = await page.content().catch(() => ''); const challenge = title.includes('Attention Required') || title.includes('Just a moment') || content.includes('challenge-platform') || content.includes('cf-challenge'); if (!challenge) return true; await page.waitForTimeout(2000); } return false; } async function loadAllProducts(page: any): Promise { const maxScrolls = 40; for (let i = 0; i < maxScrolls; i++) { const beforeCount = await page.$$eval('[data-testid*="product"], [data-testid*="card"]', (els) => els.length); await page.mouse.wheel(0, 1400); await page.waitForTimeout(900); const afterCount = await page.$$eval('[data-testid*="product"], [data-testid*="card"]', (els) => els.length); if (afterCount <= beforeCount) break; } await page.evaluate(() => window.scrollTo({ top: 0 })); } async function extractProducts(page: any): Promise { const script = ` (() => { function parsePrice(text) { if (!text) return undefined; const match = text.match(/\\$?(\\d+(?:\\.\\d{1,2})?)/); return match ? parseFloat(match[1]) : undefined; } function pickImage(card) { const imgEl = card.querySelector('img[src^="http"]') || card.querySelector('source[srcset]'); if (imgEl && imgEl.src && imgEl.src.startsWith('http')) { return imgEl.src; } if (imgEl && imgEl.srcset) { const first = imgEl.srcset.split(',')[0]?.trim().split(' ')[0]; if (first && first.startsWith('http')) return first; } const dataSrc = card.querySelector('img[data-src]')?.getAttribute('data-src'); if (dataSrc && dataSrc.startsWith('http')) return dataSrc; return undefined; } const cards = Array.from( document.querySelectorAll('[data-testid="product-list-item"], [data-testid="card-link"], [data-testid*="product-card"]') ); return cards .map((card) => { const name = card.querySelector('[data-testid="product-card-name"]')?.innerText?.trim() || card.querySelector('[data-testid="product-name"]')?.innerText?.trim() || card.querySelector('h3, h4')?.innerText?.trim() || (card.textContent || '').split('\\n').map((t) => t.trim()).find((t) => t.length > 3) || ''; const brand = card.querySelector('[data-testid="product-card-brand"]')?.innerText?.trim() || card.querySelector('[data-testid="product-brand"]')?.innerText?.trim() || undefined; const priceText = card.querySelector('[data-testid="product-card-price"]')?.innerText || card.textContent || ''; const price = parsePrice(priceText); const size = card.querySelector('[data-testid*="size"]')?.innerText?.trim() || card.querySelector('[data-testid*="weight"]')?.innerText?.trim() || undefined; const category = card.querySelector('[data-testid*="category"]')?.innerText?.trim() || undefined; const link = card.querySelector('a[href*="/product/"]'); const url = link?.href; const imageUrl = pickImage(card); const cardText = (card.textContent || '').toLowerCase(); const inStock = !(cardText.includes('sold out') || cardText.includes('out of stock')); return { name, brand, price, size, category, url, imageUrl, inStock }; }) .filter((p) => p.name); })(); `; return page.evaluate(script); } function groupByBrand(products: Product[]): BrandGroup[] { const map = new Map(); for (const p of products) { const key = p.brand || 'Unknown'; if (!map.has(key)) map.set(key, []); map.get(key)!.push(p); } return Array.from(map.entries()).map(([brand, prods]) => ({ brand, products: prods })); } async function main() { await ensureDirs(); const browser = await playwright.launch({ headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-blink-features=AutomationControlled'], }); const page = await browser.newPage({ viewport: { width: 1300, height: 900 }, userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', }); try { console.log(`Navigating to ${TARGET_URL}...`); await page.goto(TARGET_URL, { waitUntil: 'domcontentloaded', timeout: 90000 }); const cfOk = await waitForCloudflare(page, 60000); if (!cfOk) throw new Error('Cloudflare challenge not passed in time'); await page.waitForSelector('[data-testid*="product"], [data-testid*="card"]', { timeout: 60000 }).catch(() => undefined); await loadAllProducts(page); const products = await extractProducts(page); const grouped = groupByBrand(products); await fs.writeFile(JSON_PATH, JSON.stringify(grouped, null, 2)); console.log(`Found ${products.length} products across ${grouped.length} brands`); console.log(`Saved grouped inventory to ${JSON_PATH}`); } catch (err) { console.error('Inventory scrape failed:', err); process.exitCode = 1; } finally { await page.context().browser()?.close(); } } main();