cannaiq/backend/test-llm-scraper-deeply-rooted.ts

import { chromium } from 'playwright';
import { z } from 'zod';
import { openai } from '../llm-scraper/node_modules/@ai-sdk/openai';
import LLMScraper from '../llm-scraper/dist/index.js';

async function main() {
  if (!process.env.OPENAI_API_KEY) {
    throw new Error('Set OPENAI_API_KEY before running this test.');
  }

  const model = process.env.OPENAI_MODEL || 'gpt-4o-mini';
  const targetUrl = 'https://azdeeplyrooted.com/menu';

  const browser = await chromium.launch({ headless: true });
  const page = await browser.newPage({ viewport: { width: 1280, height: 900 } });

  try {
    console.log(`Opening ${targetUrl}...`);
    await page.goto(targetUrl, { waitUntil: 'domcontentloaded', timeout: 60000 });

    const iframeHandle = await page.waitForSelector(
      'iframe[srcdoc*="dutchie"], iframe[id^="iframe-"]',
      { timeout: 30000 }
    );
    const frame = await iframeHandle.contentFrame();
    if (!frame) throw new Error('Could not access Dutchie iframe content.');

    await frame.waitForLoadState('domcontentloaded', { timeout: 30000 });
    await frame
      .waitForSelector('[data-testid*="product"], [class*="product-card"]', { timeout: 60000 })
      .catch(() => undefined);
    await page.waitForTimeout(2000);

    const schema = z.object({
      products: z
        .array(
          z.object({
            name: z.string(),
            brand: z.string().optional(),
            price: z.number().optional(),
            category: z.string().optional(),
            size: z.string().optional(),
            url: z.string().url().optional(),
          })
        )
        .min(1)
        .max(40)
        .describe('Products visible in the embedded Dutchie menu (limit to first page)'),
    });

    const scraper = new LLMScraper(openai(model));
    const { data } = await scraper.run(page, schema, {
      format: 'custom',
      formatFunction: async (currentPage) => {
        const iframe =
          (await currentPage.$('iframe[srcdoc*=\"dutchie\"]')) ||
          (await currentPage.$('iframe[id^=\"iframe-\"]'));
        const innerFrame = await iframe?.contentFrame();
        return innerFrame ? innerFrame.content() : currentPage.content();
      },
      prompt:
        'Extract the cannabis menu items currently visible in the embedded Dutchie menu. ' +
        'Return name, brand, numeric price (no currency symbol), category/size if present, ' +
        'and product URL if available. Skip navigation or filter labels.',
      mode: 'json',
    });

    console.log(`Scraped ${data.products.length} products from ${targetUrl}`);
    console.log(JSON.stringify(data.products.slice(0, 10), null, 2));
  } finally {
    await browser.close();
  }
}

main().catch((error) => {
  console.error('❌ LLM scraper test failed:', error);
  process.exit(1);
});