81 lines
3.6 KiB
JavaScript
81 lines
3.6 KiB
JavaScript
"use strict";
|
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
};
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.debugDutchiePage = debugDutchiePage;
|
|
const puppeteer_1 = __importDefault(require("puppeteer"));
|
|
const logger_1 = require("./logger");
|
|
async function debugDutchiePage(url) {
|
|
const browser = await puppeteer_1.default.launch({
|
|
headless: 'new',
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
|
|
});
|
|
const page = await browser.newPage();
|
|
await page.setViewport({ width: 1920, height: 1080 });
|
|
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
|
|
logger_1.logger.info('scraper', `Loading: ${url}`);
|
|
try {
|
|
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
logger_1.logger.info('scraper', 'Page loaded, waiting for content...');
|
|
// Wait for content to render
|
|
await page.waitForTimeout(8000);
|
|
const debug = await page.evaluate(() => {
|
|
// Try to find product cards
|
|
const productSelectors = [
|
|
'[data-testid*="product"]',
|
|
'[class*="Product"]',
|
|
'[class*="product"]',
|
|
'article',
|
|
'[role="article"]',
|
|
'li'
|
|
];
|
|
const results = {
|
|
selectors: {}
|
|
};
|
|
for (const selector of productSelectors) {
|
|
const elements = document.querySelectorAll(selector);
|
|
results.selectors[selector] = elements.length;
|
|
}
|
|
// Get sample HTML from first few matches
|
|
const firstMatch = document.querySelector('[class*="product" i], article, [data-testid*="product"]');
|
|
if (firstMatch) {
|
|
results.sampleHTML = firstMatch.outerHTML.substring(0, 1000);
|
|
results.sampleText = firstMatch.textContent?.substring(0, 500);
|
|
}
|
|
// Get all class names that might be products
|
|
const allElements = document.querySelectorAll('*');
|
|
const classNames = new Set();
|
|
allElements.forEach(el => {
|
|
const classes = el.className;
|
|
if (typeof classes === 'string' && classes.toLowerCase().includes('product')) {
|
|
classes.split(' ').forEach(c => classNames.add(c));
|
|
}
|
|
});
|
|
results.productClasses = Array.from(classNames).slice(0, 20);
|
|
results.bodyTextSample = document.body.innerText.substring(0, 500);
|
|
return results;
|
|
});
|
|
logger_1.logger.info('scraper', `Debug results:\n${JSON.stringify(debug, null, 2)}`);
|
|
}
|
|
catch (error) {
|
|
logger_1.logger.error('scraper', `Debug navigation error: ${error}`);
|
|
// Try to get whatever we can
|
|
try {
|
|
const partialDebug = await page.evaluate(() => {
|
|
return {
|
|
url: window.location.href,
|
|
title: document.title,
|
|
bodyLength: document.body?.innerHTML?.length || 0,
|
|
bodyStart: document.body?.innerHTML?.substring(0, 500) || ''
|
|
};
|
|
});
|
|
logger_1.logger.info('scraper', `Partial debug:\n${JSON.stringify(partialDebug, null, 2)}`);
|
|
}
|
|
catch (e) {
|
|
logger_1.logger.error('scraper', `Could not get partial debug: ${e}`);
|
|
}
|
|
}
|
|
await browser.close();
|
|
}
|