feat: Add stale process monitor, users route, landing page, archive old scripts
- Add backend stale process monitoring API (/api/stale-processes) - Add users management route - Add frontend landing page and stale process monitor UI on /scraper-tools - Move old development scripts to backend/archive/ - Update frontend build with new features 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
78
backend/archive/test-llm-scraper-deeply-rooted.ts
Normal file
78
backend/archive/test-llm-scraper-deeply-rooted.ts
Normal file
@@ -0,0 +1,78 @@
|
||||
import { chromium } from 'playwright';
|
||||
import { z } from 'zod';
|
||||
import { openai } from '../llm-scraper/node_modules/@ai-sdk/openai';
|
||||
import LLMScraper from '../llm-scraper/dist/index.js';
|
||||
|
||||
async function main() {
|
||||
if (!process.env.OPENAI_API_KEY) {
|
||||
throw new Error('Set OPENAI_API_KEY before running this test.');
|
||||
}
|
||||
|
||||
const model = process.env.OPENAI_MODEL || 'gpt-4o-mini';
|
||||
const targetUrl = 'https://azdeeplyrooted.com/menu';
|
||||
|
||||
const browser = await chromium.launch({ headless: true });
|
||||
const page = await browser.newPage({ viewport: { width: 1280, height: 900 } });
|
||||
|
||||
try {
|
||||
console.log(`Opening ${targetUrl}...`);
|
||||
await page.goto(targetUrl, { waitUntil: 'domcontentloaded', timeout: 60000 });
|
||||
|
||||
const iframeHandle = await page.waitForSelector(
|
||||
'iframe[srcdoc*="dutchie"], iframe[id^="iframe-"]',
|
||||
{ timeout: 30000 }
|
||||
);
|
||||
const frame = await iframeHandle.contentFrame();
|
||||
if (!frame) throw new Error('Could not access Dutchie iframe content.');
|
||||
|
||||
await frame.waitForLoadState('domcontentloaded', { timeout: 30000 });
|
||||
await frame
|
||||
.waitForSelector('[data-testid*="product"], [class*="product-card"]', { timeout: 60000 })
|
||||
.catch(() => undefined);
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
const schema = z.object({
|
||||
products: z
|
||||
.array(
|
||||
z.object({
|
||||
name: z.string(),
|
||||
brand: z.string().optional(),
|
||||
price: z.number().optional(),
|
||||
category: z.string().optional(),
|
||||
size: z.string().optional(),
|
||||
url: z.string().url().optional(),
|
||||
})
|
||||
)
|
||||
.min(1)
|
||||
.max(40)
|
||||
.describe('Products visible in the embedded Dutchie menu (limit to first page)'),
|
||||
});
|
||||
|
||||
const scraper = new LLMScraper(openai(model));
|
||||
const { data } = await scraper.run(page, schema, {
|
||||
format: 'custom',
|
||||
formatFunction: async (currentPage) => {
|
||||
const iframe =
|
||||
(await currentPage.$('iframe[srcdoc*=\"dutchie\"]')) ||
|
||||
(await currentPage.$('iframe[id^=\"iframe-\"]'));
|
||||
const innerFrame = await iframe?.contentFrame();
|
||||
return innerFrame ? innerFrame.content() : currentPage.content();
|
||||
},
|
||||
prompt:
|
||||
'Extract the cannabis menu items currently visible in the embedded Dutchie menu. ' +
|
||||
'Return name, brand, numeric price (no currency symbol), category/size if present, ' +
|
||||
'and product URL if available. Skip navigation or filter labels.',
|
||||
mode: 'json',
|
||||
});
|
||||
|
||||
console.log(`Scraped ${data.products.length} products from ${targetUrl}`);
|
||||
console.log(JSON.stringify(data.products.slice(0, 10), null, 2));
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
console.error('❌ LLM scraper test failed:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user