import puppeteer from 'puppeteer-extra'; import StealthPlugin from 'puppeteer-extra-plugin-stealth'; import { Pool } from 'pg'; puppeteer.use(StealthPlugin()); const pool = new Pool({ connectionString: 'postgresql://sail:password@localhost:5432/dutchie_menus' }); async function testBrandScrape() { let browser; try { // 1. Get the store const storeResult = await pool.query( "SELECT id, name, slug, dutchie_url FROM stores WHERE slug = $1", ['curaleaf-az-48th-street-med'] ); if (storeResult.rows.length === 0) { console.log('Store not found'); return; } const store = storeResult.rows[0]; console.log(`\nTesting brand scrape for: ${store.name}`); console.log(`URL: ${store.dutchie_url}\n`); // 2. Get an active proxy const proxyResult = await pool.query(` SELECT host, port, protocol, username, password FROM proxies LIMIT 1 `); if (proxyResult.rows.length === 0) { console.log('No active proxies available - will try without proxy'); } const proxy = proxyResult.rows[0]; if (proxy) { console.log(`Using proxy: ${proxy.host}:${proxy.port}`); } console.log(`User-Agent: Googlebot`); console.log('─'.repeat(60)); // 3. Launch browser with proxy const browserArgs = [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-blink-features=AutomationControlled' ]; if (proxy) { const proxyUrl = `${proxy.protocol}://${proxy.host}:${proxy.port}`; browserArgs.push(`--proxy-server=${proxyUrl}`); } browser = await puppeteer.launch({ headless: true, args: browserArgs }); const page = await browser.newPage(); // Set Googlebot user agent await page.setUserAgent('Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'); // Anti-detection await page.evaluateOnNewDocument(() => { Object.defineProperty(navigator, 'webdriver', { get: () => false, }); }); // 4. Navigate and extract brands console.log('\nNavigating to store page...'); await page.goto(store.dutchie_url, { waitUntil: 'networkidle2', timeout: 60000 }); console.log('Page loaded, extracting brands...\n'); // 5. Extract brands from product cards const brands = await page.evaluate(() => { const brandSet = new Set(); // Try multiple selectors const selectors = [ '[class*="brand"]', '[class*="Brand"]', '[data-testid*="brand"]', '[class*="product"] [class*="brand"]' ]; for (const selector of selectors) { const elements = document.querySelectorAll(selector); elements.forEach(el => { const text = el.textContent?.trim(); if (text && text.length > 0 && text.length < 100) { brandSet.add(text); } }); } return Array.from(brandSet); }); console.log('BRANDS FOUND:'); console.log('─'.repeat(60)); if (brands.length === 0) { console.log('No brands found!'); console.log('\nLet me also check what the page structure looks like...\n'); // Debug: show page structure const pageInfo = await page.evaluate(() => { return { title: document.title, productCards: document.querySelectorAll('[class*="product"], [class*="Product"]').length, hasImages: document.querySelectorAll('img[src*="dutchie"]').length, bodyText: document.body.textContent?.substring(0, 500) }; }); console.log('Page Title:', pageInfo.title); console.log('Product Cards Found:', pageInfo.productCards); console.log('Dutchie Images:', pageInfo.hasImages); console.log('\nFirst 500 chars of page:'); console.log(pageInfo.bodyText); } else { brands.sort().forEach((brand, i) => { console.log(`${i + 1}. ${brand}`); }); console.log('─'.repeat(60)); console.log(`Total unique brands: ${brands.length}`); } } catch (error: any) { console.error('Error:', error.message); if (error.stack) { console.error(error.stack); } } finally { if (browser) { await browser.close(); } await pool.end(); } } testBrandScrape();