The job_run_logs table tracks scheduled job orchestration, not individual worker jobs. Worker info (worker_id, worker_hostname) belongs on dispensary_crawl_jobs, not job_run_logs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
79 lines
2.7 KiB
TypeScript
79 lines
2.7 KiB
TypeScript
import { chromium } from 'playwright';
|
|
import { z } from 'zod';
|
|
import { openai } from '../llm-scraper/node_modules/@ai-sdk/openai';
|
|
import LLMScraper from '../llm-scraper/dist/index.js';
|
|
|
|
async function main() {
|
|
if (!process.env.OPENAI_API_KEY) {
|
|
throw new Error('Set OPENAI_API_KEY before running this test.');
|
|
}
|
|
|
|
const model = process.env.OPENAI_MODEL || 'gpt-4o-mini';
|
|
const targetUrl = 'https://azdeeplyrooted.com/menu';
|
|
|
|
const browser = await chromium.launch({ headless: true });
|
|
const page = await browser.newPage({ viewport: { width: 1280, height: 900 } });
|
|
|
|
try {
|
|
console.log(`Opening ${targetUrl}...`);
|
|
await page.goto(targetUrl, { waitUntil: 'domcontentloaded', timeout: 60000 });
|
|
|
|
const iframeHandle = await page.waitForSelector(
|
|
'iframe[srcdoc*="dutchie"], iframe[id^="iframe-"]',
|
|
{ timeout: 30000 }
|
|
);
|
|
const frame = await iframeHandle.contentFrame();
|
|
if (!frame) throw new Error('Could not access Dutchie iframe content.');
|
|
|
|
await frame.waitForLoadState('domcontentloaded', { timeout: 30000 });
|
|
await frame
|
|
.waitForSelector('[data-testid*="product"], [class*="product-card"]', { timeout: 60000 })
|
|
.catch(() => undefined);
|
|
await page.waitForTimeout(2000);
|
|
|
|
const schema = z.object({
|
|
products: z
|
|
.array(
|
|
z.object({
|
|
name: z.string(),
|
|
brand: z.string().optional(),
|
|
price: z.number().optional(),
|
|
category: z.string().optional(),
|
|
size: z.string().optional(),
|
|
url: z.string().url().optional(),
|
|
})
|
|
)
|
|
.min(1)
|
|
.max(40)
|
|
.describe('Products visible in the embedded Dutchie menu (limit to first page)'),
|
|
});
|
|
|
|
const scraper = new LLMScraper(openai(model));
|
|
const { data } = await scraper.run(page, schema, {
|
|
format: 'custom',
|
|
formatFunction: async (currentPage) => {
|
|
const iframe =
|
|
(await currentPage.$('iframe[srcdoc*=\"dutchie\"]')) ||
|
|
(await currentPage.$('iframe[id^=\"iframe-\"]'));
|
|
const innerFrame = await iframe?.contentFrame();
|
|
return innerFrame ? innerFrame.content() : currentPage.content();
|
|
},
|
|
prompt:
|
|
'Extract the cannabis menu items currently visible in the embedded Dutchie menu. ' +
|
|
'Return name, brand, numeric price (no currency symbol), category/size if present, ' +
|
|
'and product URL if available. Skip navigation or filter labels.',
|
|
mode: 'json',
|
|
});
|
|
|
|
console.log(`Scraped ${data.products.length} products from ${targetUrl}`);
|
|
console.log(JSON.stringify(data.products.slice(0, 10), null, 2));
|
|
} finally {
|
|
await browser.close();
|
|
}
|
|
}
|
|
|
|
main().catch((error) => {
|
|
console.error('❌ LLM scraper test failed:', error);
|
|
process.exit(1);
|
|
});
|