import { chromium } from 'playwright'; import { z } from 'zod'; import { openai } from '../llm-scraper/node_modules/@ai-sdk/openai'; import LLMScraper from '../llm-scraper/dist/index.js'; async function main() { if (!process.env.OPENAI_API_KEY) { throw new Error('Set OPENAI_API_KEY before running this test.'); } const model = process.env.OPENAI_MODEL || 'gpt-4o-mini'; const targetUrl = 'https://azdeeplyrooted.com/menu'; const browser = await chromium.launch({ headless: true }); const page = await browser.newPage({ viewport: { width: 1280, height: 900 } }); try { console.log(`Opening ${targetUrl}...`); await page.goto(targetUrl, { waitUntil: 'domcontentloaded', timeout: 60000 }); const iframeHandle = await page.waitForSelector( 'iframe[srcdoc*="dutchie"], iframe[id^="iframe-"]', { timeout: 30000 } ); const frame = await iframeHandle.contentFrame(); if (!frame) throw new Error('Could not access Dutchie iframe content.'); await frame.waitForLoadState('domcontentloaded', { timeout: 30000 }); await frame .waitForSelector('[data-testid*="product"], [class*="product-card"]', { timeout: 60000 }) .catch(() => undefined); await page.waitForTimeout(2000); const schema = z.object({ products: z .array( z.object({ name: z.string(), brand: z.string().optional(), price: z.number().optional(), category: z.string().optional(), size: z.string().optional(), url: z.string().url().optional(), }) ) .min(1) .max(40) .describe('Products visible in the embedded Dutchie menu (limit to first page)'), }); const scraper = new LLMScraper(openai(model)); const { data } = await scraper.run(page, schema, { format: 'custom', formatFunction: async (currentPage) => { const iframe = (await currentPage.$('iframe[srcdoc*=\"dutchie\"]')) || (await currentPage.$('iframe[id^=\"iframe-\"]')); const innerFrame = await iframe?.contentFrame(); return innerFrame ? innerFrame.content() : currentPage.content(); }, prompt: 'Extract the cannabis menu items currently visible in the embedded Dutchie menu. ' + 'Return name, brand, numeric price (no currency symbol), category/size if present, ' + 'and product URL if available. Skip navigation or filter labels.', mode: 'json', }); console.log(`Scraped ${data.products.length} products from ${targetUrl}`); console.log(JSON.stringify(data.products.slice(0, 10), null, 2)); } finally { await browser.close(); } } main().catch((error) => { console.error('❌ LLM scraper test failed:', error); process.exit(1); });