import puppeteer from 'puppeteer-extra'; import StealthPlugin from 'puppeteer-extra-plugin-stealth'; import { logger } from './logger'; // Apply stealth plugin puppeteer.use(StealthPlugin()); export async function debugDutchiePage(url: string) { const browser = await puppeteer.launch({ headless: 'new', args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'] }); const page = await browser.newPage(); await page.setViewport({ width: 1920, height: 1080 }); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'); logger.info('scraper', `Loading: ${url}`); try { await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 }); logger.info('scraper', 'Page loaded, waiting for content...'); // Wait for content to render await page.waitForTimeout(8000); const debug = await page.evaluate(() => { // Try to find product cards const productSelectors = [ '[data-testid*="product"]', '[class*="Product"]', '[class*="product"]', 'article', '[role="article"]', 'li' ]; const results: any = { selectors: {} }; for (const selector of productSelectors) { const elements = document.querySelectorAll(selector); results.selectors[selector] = elements.length; } // Get sample HTML from first few matches const firstMatch = document.querySelector('[class*="product" i], article, [data-testid*="product"]'); if (firstMatch) { results.sampleHTML = firstMatch.outerHTML.substring(0, 1000); results.sampleText = firstMatch.textContent?.substring(0, 500); } // Get all class names that might be products const allElements = document.querySelectorAll('*'); const classNames = new Set(); allElements.forEach(el => { const classes = el.className; if (typeof classes === 'string' && classes.toLowerCase().includes('product')) { classes.split(' ').forEach(c => classNames.add(c)); } }); results.productClasses = Array.from(classNames).slice(0, 20); results.bodyTextSample = document.body.innerText.substring(0, 500); return results; }); logger.info('scraper', `Debug results:\n${JSON.stringify(debug, null, 2)}`); } catch (error) { logger.error('scraper', `Debug navigation error: ${error}`); // Try to get whatever we can try { const partialDebug = await page.evaluate(() => { return { url: window.location.href, title: document.title, bodyLength: document.body?.innerHTML?.length || 0, bodyStart: document.body?.innerHTML?.substring(0, 500) || '' }; }); logger.info('scraper', `Partial debug:\n${JSON.stringify(partialDebug, null, 2)}`); } catch (e) { logger.error('scraper', `Could not get partial debug: ${e}`); } } await browser.close(); }