- Add backend stale process monitoring API (/api/stale-processes) - Add users management route - Add frontend landing page and stale process monitor UI on /scraper-tools - Move old development scripts to backend/archive/ - Update frontend build with new features 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
135 lines
4.5 KiB
TypeScript
135 lines
4.5 KiB
TypeScript
import { createStealthBrowser, createStealthContext, waitForPageLoad, isCloudflareChallenge, waitForCloudflareChallenge } from './src/utils/stealthBrowser';
|
|
import { getRandomProxy } from './src/utils/proxyManager';
|
|
import { pool } from './src/db/migrate';
|
|
import * as fs from 'fs/promises';
|
|
|
|
async function debugDutchieSelectors() {
|
|
console.log('🔍 Debugging Dutchie page structure...\n');
|
|
|
|
const url = 'https://dutchie.com/dispensary/sol-flower-dispensary';
|
|
|
|
// Get proxy
|
|
const proxy = await getRandomProxy();
|
|
console.log(`Using proxy: ${proxy?.server || 'none'}\n`);
|
|
|
|
const browser = await createStealthBrowser({ proxy: proxy || undefined, headless: true });
|
|
|
|
try {
|
|
const context = await createStealthContext(browser, { state: 'Arizona' });
|
|
const page = await context.newPage();
|
|
|
|
console.log(`Loading: ${url}`);
|
|
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
|
|
|
|
// Check for Cloudflare
|
|
if (await isCloudflareChallenge(page)) {
|
|
console.log('🛡️ Cloudflare detected, waiting...');
|
|
await waitForCloudflareChallenge(page, 60000);
|
|
}
|
|
|
|
await waitForPageLoad(page);
|
|
|
|
// Wait for content
|
|
await page.waitForTimeout(5000);
|
|
|
|
console.log('\n📸 Taking screenshot...');
|
|
await page.screenshot({ path: '/tmp/dutchie-page.png', fullPage: true });
|
|
|
|
console.log('💾 Saving HTML...');
|
|
const html = await page.content();
|
|
await fs.writeFile('/tmp/dutchie-page.html', html);
|
|
|
|
console.log('\n🔎 Looking for common React/product patterns...\n');
|
|
|
|
// Try to find product containers by various methods
|
|
const patterns = [
|
|
// React data attributes
|
|
'a[href*="/product/"]',
|
|
'[data-testid*="product"]',
|
|
'[data-cy*="product"]',
|
|
'[data-test*="product"]',
|
|
|
|
// Common class patterns
|
|
'[class*="ProductCard"]',
|
|
'[class*="product-card"]',
|
|
'[class*="Product_"]',
|
|
'[class*="MenuItem"]',
|
|
'[class*="menu-item"]',
|
|
|
|
// Semantic HTML
|
|
'article',
|
|
'[role="article"]',
|
|
'[role="listitem"]',
|
|
|
|
// Link patterns
|
|
'a[href*="/menu/"]',
|
|
'a[href*="/products/"]',
|
|
'a[href*="/item/"]',
|
|
];
|
|
|
|
for (const selector of patterns) {
|
|
const count = await page.locator(selector).count();
|
|
if (count > 0) {
|
|
console.log(`✓ ${selector}: ${count} elements`);
|
|
|
|
// Get details of first element
|
|
try {
|
|
const first = page.locator(selector).first();
|
|
const html = await first.evaluate(el => el.outerHTML.substring(0, 500));
|
|
const classes = await first.getAttribute('class');
|
|
const testId = await first.getAttribute('data-testid');
|
|
|
|
console.log(` Classes: ${classes || 'none'}`);
|
|
console.log(` Data-testid: ${testId || 'none'}`);
|
|
console.log(` HTML preview: ${html}...`);
|
|
console.log('');
|
|
} catch (e) {
|
|
console.log(` (Could not get element details)`);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Try to extract actual product links
|
|
console.log('\n🔗 Looking for product links...\n');
|
|
const links = await page.locator('a[href*="/product/"], a[href*="/menu/"], a[href*="/item/"]').all();
|
|
|
|
if (links.length > 0) {
|
|
console.log(`Found ${links.length} potential product links:`);
|
|
for (let i = 0; i < Math.min(5, links.length); i++) {
|
|
const href = await links[i].getAttribute('href');
|
|
const text = await links[i].textContent();
|
|
console.log(` ${i + 1}. ${href}`);
|
|
console.log(` Text: ${text?.substring(0, 100)}`);
|
|
}
|
|
}
|
|
|
|
// Check page title and URL
|
|
console.log(`\n📄 Page title: ${await page.title()}`);
|
|
console.log(`📍 Final URL: ${page.url()}`);
|
|
|
|
// Try to find the main content container
|
|
console.log('\n🎯 Looking for main content container...\n');
|
|
const mainPatterns = ['main', '[role="main"]', '#root', '#app', '[id*="app"]'];
|
|
for (const selector of mainPatterns) {
|
|
const count = await page.locator(selector).count();
|
|
if (count > 0) {
|
|
console.log(`✓ ${selector}: found`);
|
|
const classes = await page.locator(selector).first().getAttribute('class');
|
|
console.log(` Classes: ${classes || 'none'}`);
|
|
}
|
|
}
|
|
|
|
console.log('\n✅ Debug complete!');
|
|
console.log('📸 Screenshot saved to: /tmp/dutchie-page.png');
|
|
console.log('💾 HTML saved to: /tmp/dutchie-page.html');
|
|
|
|
} catch (error) {
|
|
console.error('❌ Error:', error);
|
|
} finally {
|
|
await browser.close();
|
|
await pool.end();
|
|
}
|
|
}
|
|
|
|
debugDutchieSelectors();
|