Files
cannaiq/backend/src/services/scraper-debug.ts
2025-11-28 19:45:44 -07:00

93 lines
3.0 KiB
TypeScript

import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
import { logger } from './logger';
// Apply stealth plugin
puppeteer.use(StealthPlugin());
export async function debugDutchiePage(url: string) {
const browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
logger.info('scraper', `Loading: ${url}`);
try {
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
logger.info('scraper', 'Page loaded, waiting for content...');
// Wait for content to render
await page.waitForTimeout(8000);
const debug = await page.evaluate(() => {
// Try to find product cards
const productSelectors = [
'[data-testid*="product"]',
'[class*="Product"]',
'[class*="product"]',
'article',
'[role="article"]',
'li'
];
const results: any = {
selectors: {}
};
for (const selector of productSelectors) {
const elements = document.querySelectorAll(selector);
results.selectors[selector] = elements.length;
}
// Get sample HTML from first few matches
const firstMatch = document.querySelector('[class*="product" i], article, [data-testid*="product"]');
if (firstMatch) {
results.sampleHTML = firstMatch.outerHTML.substring(0, 1000);
results.sampleText = firstMatch.textContent?.substring(0, 500);
}
// Get all class names that might be products
const allElements = document.querySelectorAll('*');
const classNames = new Set<string>();
allElements.forEach(el => {
const classes = el.className;
if (typeof classes === 'string' && classes.toLowerCase().includes('product')) {
classes.split(' ').forEach(c => classNames.add(c));
}
});
results.productClasses = Array.from(classNames).slice(0, 20);
results.bodyTextSample = document.body.innerText.substring(0, 500);
return results;
});
logger.info('scraper', `Debug results:\n${JSON.stringify(debug, null, 2)}`);
} catch (error) {
logger.error('scraper', `Debug navigation error: ${error}`);
// Try to get whatever we can
try {
const partialDebug = await page.evaluate(() => {
return {
url: window.location.href,
title: document.title,
bodyLength: document.body?.innerHTML?.length || 0,
bodyStart: document.body?.innerHTML?.substring(0, 500) || ''
};
});
logger.info('scraper', `Partial debug:\n${JSON.stringify(partialDebug, null, 2)}`);
} catch (e) {
logger.error('scraper', `Could not get partial debug: ${e}`);
}
}
await browser.close();
}