- Add backend stale process monitoring API (/api/stale-processes) - Add users management route - Add frontend landing page and stale process monitor UI on /scraper-tools - Move old development scripts to backend/archive/ - Update frontend build with new features 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
92 lines
2.9 KiB
TypeScript
92 lines
2.9 KiB
TypeScript
import puppeteer from 'puppeteer-extra';
|
|
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|
import { Pool } from 'pg';
|
|
import fs from 'fs';
|
|
|
|
puppeteer.use(StealthPlugin());
|
|
|
|
const pool = new Pool({
|
|
connectionString: 'postgresql://sail:password@localhost:5432/dutchie_menus'
|
|
});
|
|
|
|
async function debug() {
|
|
let browser;
|
|
|
|
try {
|
|
// Get proxy
|
|
const proxyResult = await pool.query(`SELECT host, port, protocol FROM proxies ORDER BY RANDOM() LIMIT 1`);
|
|
const proxy = proxyResult.rows[0];
|
|
const proxyUrl = `${proxy.protocol}://${proxy.host}:${proxy.port}`;
|
|
|
|
console.log('🔌 Proxy:', proxyUrl);
|
|
|
|
browser = await puppeteer.launch({
|
|
headless: true,
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox', `--proxy-server=${proxyUrl}`]
|
|
});
|
|
|
|
const page = await browser.newPage();
|
|
|
|
// Set Googlebot UA
|
|
await page.setUserAgent('Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)');
|
|
|
|
// Log all requests being made
|
|
page.on('request', request => {
|
|
console.log('\n📤 REQUEST:', request.method(), request.url());
|
|
console.log(' Headers:', JSON.stringify(request.headers(), null, 2));
|
|
});
|
|
|
|
// Log all responses
|
|
page.on('response', response => {
|
|
console.log('\n📥 RESPONSE:', response.status(), response.url());
|
|
console.log(' Headers:', JSON.stringify(response.headers(), null, 2));
|
|
});
|
|
|
|
const url = 'https://curaleaf.com/stores/curaleaf-dispensary-phoenix-airport/brands';
|
|
console.log('\n🌐 Going to:', url);
|
|
|
|
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
|
|
await page.waitForTimeout(3000);
|
|
|
|
// Get what the browser sees
|
|
const pageData = await page.evaluate(() => ({
|
|
title: document.title,
|
|
url: window.location.href,
|
|
userAgent: navigator.userAgent,
|
|
bodyHTML: document.body.innerHTML,
|
|
bodyText: document.body.innerText
|
|
}));
|
|
|
|
console.log('\n📄 PAGE DATA:');
|
|
console.log('Title:', pageData.title);
|
|
console.log('URL:', pageData.url);
|
|
console.log('User Agent (browser sees):', pageData.userAgent);
|
|
console.log('Body HTML length:', pageData.bodyHTML.length, 'chars');
|
|
console.log('Body text length:', pageData.bodyText.length, 'chars');
|
|
|
|
// Save HTML to file
|
|
fs.writeFileSync('/tmp/page.html', pageData.bodyHTML);
|
|
console.log('\n💾 Saved HTML to /tmp/page.html');
|
|
|
|
// Save screenshot
|
|
await page.screenshot({ path: '/tmp/screenshot.png', fullPage: true });
|
|
console.log('📸 Saved screenshot to /tmp/screenshot.png');
|
|
|
|
// Show first 500 chars of HTML
|
|
console.log('\n📝 First 500 chars of HTML:');
|
|
console.log(pageData.bodyHTML.substring(0, 500));
|
|
|
|
// Show first 500 chars of text
|
|
console.log('\n📝 First 500 chars of text:');
|
|
console.log(pageData.bodyText.substring(0, 500));
|
|
|
|
} catch (error: any) {
|
|
console.error('❌ Error:', error.message);
|
|
} finally {
|
|
if (browser) await browser.close();
|
|
await pool.end();
|
|
}
|
|
}
|
|
|
|
debug();
|