import { createStealthBrowser, createStealthContext, waitForPageLoad, isCloudflareChallenge, waitForCloudflareChallenge } from './src/utils/stealthBrowser'; import { getRandomProxy } from './src/utils/proxyManager'; import { pool } from './src/db/migrate'; interface Brand { name: string; logo_url?: string; } async function scrapeBrands(storeId: number) { console.log(`šŸ·ļø Scraping brands for store ID: ${storeId}\n`); try { // Get store info const storeResult = await pool.query(` SELECT id, name, dutchie_url FROM stores WHERE id = $1 `, [storeId]); if (storeResult.rows.length === 0) { throw new Error('Store not found'); } const store = storeResult.rows[0]; console.log(`Store: ${store.name}`); // Build brands page URL const brandsUrl = `${store.dutchie_url}/brands`; console.log(`Brands URL: ${brandsUrl}\n`); // Get proxy const proxy = await getRandomProxy(); if (proxy) { console.log(`šŸ” Using proxy: ${proxy.server}\n`); } const browser = await createStealthBrowser({ proxy: proxy || undefined, headless: true }); try { const context = await createStealthContext(browser, { state: 'Arizona' }); const page = await context.newPage(); console.log('🌐 Loading brands page...'); await page.goto(brandsUrl, { waitUntil: 'domcontentloaded', timeout: 60000 }); // Check for Cloudflare if (await isCloudflareChallenge(page)) { console.log('šŸ›”ļø Cloudflare detected, waiting...'); const passed = await waitForCloudflareChallenge(page, 60000); if (!passed) { console.log('āŒ Failed to pass Cloudflare'); await browser.close(); await pool.end(); return; } } await waitForPageLoad(page); await page.waitForTimeout(3000); console.log('\nšŸ“¦ Extracting brands...\n'); // Extract brands const brands = await page.evaluate(() => { const foundBrands: Brand[] = []; // Look for brand cards/links const brandLinks = document.querySelectorAll('a[href*="/brands/"], a[href*="/brand/"]'); brandLinks.forEach(link => { const img = link.querySelector('img'); const logoUrl = img ? img.getAttribute('src') || '' : ''; // Try to get brand name let name = ''; const heading = link.querySelector('h1, h2, h3, h4, h5, h6'); if (heading?.textContent) { name = heading.textContent.trim(); } else if (img?.alt) { name = img.alt.trim(); } else { const text = link.textContent?.trim() || ''; name = text.split('\n')[0].trim(); } if (name && name.length > 1 && name.length < 100) { foundBrands.push({ name, logo_url: logoUrl || undefined }); } }); return foundBrands; }); console.log(`āœ… Found ${brands.length} brands!\n`); if (brands.length > 0) { console.log('Brands found:'); brands.forEach((brand, i) => { console.log(`\n${i + 1}. ${brand.name}`); if (brand.logo_url) console.log(` Logo: ${brand.logo_url.substring(0, 80)}...`); }); // Save brands to database with timestamps console.log(`\nšŸ’¾ Saving brands to database...`); for (const brand of brands) { // Insert or update brand const brandResult = await pool.query(` INSERT INTO brands (name, logo_url, first_seen_at, last_seen_at) VALUES ($1, $2, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) ON CONFLICT (name) DO UPDATE SET logo_url = COALESCE($2, brands.logo_url), last_seen_at = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP RETURNING id `, [brand.name, brand.logo_url]); const brandId = brandResult.rows[0].id; // Link brand to store await pool.query(` INSERT INTO store_brands (store_id, brand_id, first_seen_at, last_seen_at, active) VALUES ($1, $2, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, true) ON CONFLICT (store_id, brand_id) DO UPDATE SET last_seen_at = CURRENT_TIMESTAMP, active = true, updated_at = CURRENT_TIMESTAMP `, [storeId, brandId]); } console.log(`āœ… Saved ${brands.length} brands with timestamps!`); } else { console.log('āš ļø No brands found - page structure may be different'); // Save page for debugging console.log('\nšŸ“ø Saving screenshot and HTML for debugging...'); await page.screenshot({ path: '/tmp/brands-page.png', fullPage: true }); const html = await page.content(); const fs = await import('fs/promises'); await fs.writeFile('/tmp/brands-page.html', html); console.log('Saved to /tmp/brands-page.png and /tmp/brands-page.html'); } await browser.close(); } catch (error) { console.error('āŒ Error:', error); await browser.close(); } } catch (error) { console.error('āŒ Error:', error); } finally { await pool.end(); } } // Scrape Sol Flower Deer Valley (ID 23) scrapeBrands(23);