Files
cannaiq/backend/debug-dutchie-selectors.ts
2025-11-28 19:45:44 -07:00

135 lines
4.5 KiB
TypeScript

import { createStealthBrowser, createStealthContext, waitForPageLoad, isCloudflareChallenge, waitForCloudflareChallenge } from './src/utils/stealthBrowser';
import { getRandomProxy } from './src/utils/proxyManager';
import { pool } from './src/db/migrate';
import * as fs from 'fs/promises';
async function debugDutchieSelectors() {
console.log('🔍 Debugging Dutchie page structure...\n');
const url = 'https://dutchie.com/dispensary/sol-flower-dispensary';
// Get proxy
const proxy = await getRandomProxy();
console.log(`Using proxy: ${proxy?.server || 'none'}\n`);
const browser = await createStealthBrowser({ proxy: proxy || undefined, headless: true });
try {
const context = await createStealthContext(browser, { state: 'Arizona' });
const page = await context.newPage();
console.log(`Loading: ${url}`);
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
// Check for Cloudflare
if (await isCloudflareChallenge(page)) {
console.log('🛡️ Cloudflare detected, waiting...');
await waitForCloudflareChallenge(page, 60000);
}
await waitForPageLoad(page);
// Wait for content
await page.waitForTimeout(5000);
console.log('\n📸 Taking screenshot...');
await page.screenshot({ path: '/tmp/dutchie-page.png', fullPage: true });
console.log('💾 Saving HTML...');
const html = await page.content();
await fs.writeFile('/tmp/dutchie-page.html', html);
console.log('\n🔎 Looking for common React/product patterns...\n');
// Try to find product containers by various methods
const patterns = [
// React data attributes
'a[href*="/product/"]',
'[data-testid*="product"]',
'[data-cy*="product"]',
'[data-test*="product"]',
// Common class patterns
'[class*="ProductCard"]',
'[class*="product-card"]',
'[class*="Product_"]',
'[class*="MenuItem"]',
'[class*="menu-item"]',
// Semantic HTML
'article',
'[role="article"]',
'[role="listitem"]',
// Link patterns
'a[href*="/menu/"]',
'a[href*="/products/"]',
'a[href*="/item/"]',
];
for (const selector of patterns) {
const count = await page.locator(selector).count();
if (count > 0) {
console.log(`${selector}: ${count} elements`);
// Get details of first element
try {
const first = page.locator(selector).first();
const html = await first.evaluate(el => el.outerHTML.substring(0, 500));
const classes = await first.getAttribute('class');
const testId = await first.getAttribute('data-testid');
console.log(` Classes: ${classes || 'none'}`);
console.log(` Data-testid: ${testId || 'none'}`);
console.log(` HTML preview: ${html}...`);
console.log('');
} catch (e) {
console.log(` (Could not get element details)`);
}
}
}
// Try to extract actual product links
console.log('\n🔗 Looking for product links...\n');
const links = await page.locator('a[href*="/product/"], a[href*="/menu/"], a[href*="/item/"]').all();
if (links.length > 0) {
console.log(`Found ${links.length} potential product links:`);
for (let i = 0; i < Math.min(5, links.length); i++) {
const href = await links[i].getAttribute('href');
const text = await links[i].textContent();
console.log(` ${i + 1}. ${href}`);
console.log(` Text: ${text?.substring(0, 100)}`);
}
}
// Check page title and URL
console.log(`\n📄 Page title: ${await page.title()}`);
console.log(`📍 Final URL: ${page.url()}`);
// Try to find the main content container
console.log('\n🎯 Looking for main content container...\n');
const mainPatterns = ['main', '[role="main"]', '#root', '#app', '[id*="app"]'];
for (const selector of mainPatterns) {
const count = await page.locator(selector).count();
if (count > 0) {
console.log(`${selector}: found`);
const classes = await page.locator(selector).first().getAttribute('class');
console.log(` Classes: ${classes || 'none'}`);
}
}
console.log('\n✅ Debug complete!');
console.log('📸 Screenshot saved to: /tmp/dutchie-page.png');
console.log('💾 HTML saved to: /tmp/dutchie-page.html');
} catch (error) {
console.error('❌ Error:', error);
} finally {
await browser.close();
await pool.end();
}
}
debugDutchieSelectors();