The job_run_logs table tracks scheduled job orchestration, not individual worker jobs. Worker info (worker_id, worker_hostname) belongs on dispensary_crawl_jobs, not job_run_logs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
116 lines
4.0 KiB
TypeScript
116 lines
4.0 KiB
TypeScript
import { chromium, Frame } from 'playwright';
|
|
|
|
type Product = {
|
|
name: string;
|
|
brand?: string;
|
|
price?: number;
|
|
size?: string;
|
|
category?: string;
|
|
url?: string;
|
|
};
|
|
|
|
async function getDutchieFrame(page: any): Promise<Frame> {
|
|
const iframeHandle = await page.waitForSelector(
|
|
'iframe[src*="dutchie"], iframe[srcdoc*="dutchie"], iframe[id^="iframe-"]',
|
|
{ timeout: 45000 }
|
|
);
|
|
|
|
const frame = await iframeHandle.contentFrame();
|
|
if (!frame) {
|
|
throw new Error('Unable to access embedded Dutchie iframe.');
|
|
}
|
|
|
|
await frame.waitForLoadState('domcontentloaded', { timeout: 30000 });
|
|
return frame;
|
|
}
|
|
|
|
async function loadAllProducts(frame: Frame): Promise<void> {
|
|
const maxScrolls = 20;
|
|
for (let i = 0; i < maxScrolls; i++) {
|
|
const beforeCount = await frame.$$eval('[data-testid*="product"], [data-testid*="card"]', els => els.length);
|
|
await frame.mouse.wheel(0, 1200);
|
|
await frame.waitForTimeout(800);
|
|
const afterCount = await frame.$$eval('[data-testid*="product"], [data-testid*="card"]', els => els.length);
|
|
if (afterCount <= beforeCount) break;
|
|
}
|
|
await frame.evaluate(() => window.scrollTo({ top: 0 }));
|
|
}
|
|
|
|
async function extractProducts(frame: Frame): Promise<Product[]> {
|
|
return frame.evaluate(() => {
|
|
const cards = Array.from(
|
|
document.querySelectorAll('[data-testid="product-list-item"], [data-testid="card-link"], [data-testid*="product-card"]')
|
|
);
|
|
|
|
return cards.map((card: Element) => {
|
|
const name =
|
|
(card.querySelector('[data-testid="product-card-name"]') as HTMLElement)?.innerText?.trim() ||
|
|
(card.querySelector('[data-testid="product-name"]') as HTMLElement)?.innerText?.trim() ||
|
|
(card.querySelector('h3, h4') as HTMLElement)?.innerText?.trim() ||
|
|
(card.textContent || '').split('\n').map(t => t.trim()).find(t => t.length > 3) ||
|
|
'';
|
|
|
|
const brand =
|
|
(card.querySelector('[data-testid="product-card-brand"]') as HTMLElement)?.innerText?.trim() ||
|
|
(card.querySelector('[data-testid="product-brand"]') as HTMLElement)?.innerText?.trim() ||
|
|
undefined;
|
|
|
|
const priceText =
|
|
(card.querySelector('[data-testid="product-card-price"]') as HTMLElement)?.innerText ||
|
|
(card.textContent || '');
|
|
const priceMatch = priceText.match(/\$?(\d+(?:\.\d{2})?)/);
|
|
const price = priceMatch ? parseFloat(priceMatch[1]) : undefined;
|
|
|
|
const size =
|
|
(card.querySelector('[data-testid*="size"]') as HTMLElement)?.innerText?.trim() ||
|
|
(card.querySelector('[data-testid*="weight"]') as HTMLElement)?.innerText?.trim() ||
|
|
undefined;
|
|
|
|
const category =
|
|
(card.querySelector('[data-testid*="category"]') as HTMLElement)?.innerText?.trim() ||
|
|
undefined;
|
|
|
|
const link = card.querySelector('a[href*="/product/"]') as HTMLAnchorElement | null;
|
|
const url = link?.href;
|
|
|
|
return { name, brand, price, size, category, url };
|
|
}).filter(p => p.name);
|
|
});
|
|
}
|
|
|
|
async function main() {
|
|
const targetUrl = 'https://azdeeplyrooted.com/menu';
|
|
|
|
const browser = await chromium.launch({
|
|
headless: true,
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-blink-features=AutomationControlled'],
|
|
});
|
|
|
|
const page = await browser.newPage({
|
|
viewport: { width: 1300, height: 900 },
|
|
userAgent:
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
});
|
|
|
|
try {
|
|
console.log(`Navigating to ${targetUrl}...`);
|
|
await page.goto(targetUrl, { waitUntil: 'domcontentloaded', timeout: 60000 });
|
|
|
|
const frame = await getDutchieFrame(page);
|
|
await frame.waitForSelector('[data-testid*="product"], [data-testid*="card"]', { timeout: 60000 }).catch(() => undefined);
|
|
|
|
await loadAllProducts(frame);
|
|
const products = await extractProducts(frame);
|
|
|
|
console.log(`Found ${products.length} products`);
|
|
console.log(JSON.stringify(products.slice(0, 20), null, 2));
|
|
} catch (err) {
|
|
console.error('Scrape failed:', err);
|
|
process.exitCode = 1;
|
|
} finally {
|
|
await browser.close();
|
|
}
|
|
}
|
|
|
|
main();
|