import puppeteer from 'puppeteer-extra'; import StealthPlugin from 'puppeteer-extra-plugin-stealth'; import { Browser, Page } from 'puppeteer'; import { SocksProxyAgent } from 'socks-proxy-agent'; import { pool } from '../db/pool'; import { downloadProductImageLegacy } from '../utils/image-storage'; import { logger } from './logger'; import { registerScraper, updateScraperStats, completeScraper } from '../routes/scraper-monitor'; import { incrementProxyFailure, getActiveProxy, isBotDetectionError, putProxyInTimeout } from './proxy'; import { bypassAgeGate, detectStateFromUrl, setAgeGateCookies } from '../utils/age-gate'; import { normalizeAvailability, AvailabilityStatus } from './availability'; // Apply stealth plugin for antidetect/anti-fingerprinting puppeteer.use(StealthPlugin()); interface ProxyConfig { host: string; port: number; protocol: string; username?: string; password?: string; } interface Product { dutchieProductId: string; name: string; variant?: string; description?: string; price?: number; originalPrice?: number; strainType?: string; thcPercentage?: number; cbdPercentage?: number; brand?: string; weight?: string; imageUrl?: string; dutchieUrl: string; metadata?: any; // Availability tracking availabilityStatus?: AvailabilityStatus; availabilityRaw?: any; stockQuantity?: number | null; } export const USER_AGENTS = { 'chrome-windows': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'chrome-mac': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'chrome-linux': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'mobile-ios': 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1', 'mobile-android': 'Mozilla/5.0 (Linux; Android 13; Pixel 7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36', 'googlebot': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'bingbot': 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)' }; export const USER_AGENT_GROUPS = { desktop: ['chrome-windows', 'chrome-mac', 'chrome-linux'], mobile: ['mobile-ios', 'mobile-android'], serp: ['googlebot', 'bingbot'] }; function getRandomUserAgentFromGroup(group: string[]): string { const randomKey = group[Math.floor(Math.random() * group.length)]; return USER_AGENTS[randomKey as keyof typeof USER_AGENTS]; } export function getUserAgent(key?: string): string { if (!key) return getRandomUserAgentFromGroup(USER_AGENT_GROUPS.desktop); // Check if it's a group if (key === 'rotate-desktop') return getRandomUserAgentFromGroup(USER_AGENT_GROUPS.desktop); if (key === 'rotate-mobile') return getRandomUserAgentFromGroup(USER_AGENT_GROUPS.mobile); if (key === 'rotate-serp') return getRandomUserAgentFromGroup(USER_AGENT_GROUPS.serp); // Otherwise treat as specific UA return USER_AGENTS[key as keyof typeof USER_AGENTS] || getRandomUserAgentFromGroup(USER_AGENT_GROUPS.desktop); } function extractImageIdFromUrl(url: string): string | null { try { const match = url.match(/images\.dutchie\.com\/([a-f0-9]+)/i); return match ? match[1] : null; } catch (e) { return null; } } function getFullSizeImageUrl(imageUrl: string): string { const imageId = extractImageIdFromUrl(imageUrl); if (!imageId) return imageUrl; return `https://images.dutchie.com/${imageId}?auto=format&fit=max&q=95&w=2000&h=2000`; } function sanitizeProductData(product: any): any { return { ...product, name: product.name?.substring(0, 500) || 'Unnamed Product', description: product.description || null, brand: product.brand?.substring(0, 500) || null, weight: product.weight?.substring(0, 100) || null, thc: product.thc && product.thc < 100 ? product.thc : null, cbd: product.cbd && product.cbd < 100 ? product.cbd : null }; } async function makePageStealthy(page: Page): Promise { await page.evaluateOnNewDocument(() => { Object.defineProperty(navigator, 'webdriver', { get: () => false, }); }); await page.evaluateOnNewDocument(() => { Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5], }); }); await page.evaluateOnNewDocument(() => { Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'], }); }); await page.evaluateOnNewDocument(() => { (window as any).chrome = { runtime: {}, }; }); await page.evaluateOnNewDocument(() => { const originalQuery = window.navigator.permissions.query; window.navigator.permissions.query = (parameters: any) => parameters.name === 'notifications' ? Promise.resolve({ state: 'denied' } as PermissionStatus) : originalQuery(parameters); }); } async function scrapeProductDetails(page: Page, productUrl: string, productName: string): Promise { const maxRetries = 3; let lastError = null; for (let attempt = 1; attempt <= maxRetries; attempt++) { try { await page.goto(productUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }); const details = await page.evaluate(() => { const allText = document.body.textContent || ''; let fullSizeImage = null; const mainImageSelectors = [ 'img[class*="ProductImage"]', 'img[class*="product-image"]', '[class*="ImageGallery"] img', 'main img', 'img[src*="images.dutchie.com"]' ]; for (const sel of mainImageSelectors) { const img = document.querySelector(sel) as HTMLImageElement; if (img?.src && img.src.includes('dutchie.com')) { fullSizeImage = img.src; break; } } let description = ''; const descSelectors = [ '[class*="description"]', '[class*="Description"]', '[data-testid*="description"]', 'p[class*="product"]' ]; for (const sel of descSelectors) { const el = document.querySelector(sel); if (el?.textContent?.trim() && el.textContent.length > 20) { description = el.textContent.trim(); break; } } let thc = null; const thcPatterns = [ /THC[:\s]*(\d+\.?\d*)\s*%/i, /Total\s+THC[:\s]*(\d+\.?\d*)\s*%/i, /(\d+\.?\d*)\s*%\s+THC/i ]; for (const pattern of thcPatterns) { const match = allText.match(pattern); if (match) { thc = parseFloat(match[1]); break; } } let cbd = null; const cbdPatterns = [ /CBD[:\s]*(\d+\.?\d*)\s*%/i, /Total\s+CBD[:\s]*(\d+\.?\d*)\s*%/i, /(\d+\.?\d*)\s*%\s+CBD/i ]; for (const pattern of cbdPatterns) { const match = allText.match(pattern); if (match) { cbd = parseFloat(match[1]); break; } } let strainType = null; if (allText.match(/\bindica\b/i)) strainType = 'Indica'; else if (allText.match(/\bsativa\b/i)) strainType = 'Sativa'; else if (allText.match(/\bhybrid\b/i)) strainType = 'Hybrid'; const terpenes: string[] = []; const terpeneNames = [ 'Myrcene', 'Limonene', 'Caryophyllene', 'Pinene', 'Linalool', 'Humulene', 'Terpinolene', 'Ocimene', 'Bisabolol', 'Valencene' ]; terpeneNames.forEach(terp => { if (allText.match(new RegExp(`\\b${terp}\\b`, 'i'))) { terpenes.push(terp); } }); const effects: string[] = []; const effectNames = [ 'Relaxed', 'Happy', 'Euphoric', 'Uplifted', 'Creative', 'Energetic', 'Focused', 'Calm', 'Sleepy', 'Hungry', 'Talkative', 'Giggly', 'Aroused' ]; effectNames.forEach(effect => { if (allText.match(new RegExp(`\\b${effect}\\b`, 'i'))) { effects.push(effect); } }); let brand = null; const brandSelectors = [ '[class*="brand"]', '[class*="Brand"]', '[data-testid*="brand"]' ]; for (const sel of brandSelectors) { const el = document.querySelector(sel); if (el?.textContent?.trim()) { brand = el.textContent.trim(); break; } } let lineage = null; const lineageMatch = allText.match(/(?:Lineage|Genetics|Parents?)[:\s]*([^\n]+)/i); if (lineageMatch) { lineage = lineageMatch[1].trim(); } const flavors: string[] = []; const flavorNames = [ 'Sweet', 'Citrus', 'Earthy', 'Pine', 'Berry', 'Diesel', 'Sour', 'Floral', 'Spicy', 'Woody', 'Tropical', 'Fruity', 'Vanilla', 'Mint', 'Cheese', 'Grape', 'Lemon', 'Orange' ]; flavorNames.forEach(flavor => { if (allText.match(new RegExp(`\\b${flavor}\\b`, 'i'))) { flavors.push(flavor); } }); const weights: string[] = []; const weightMatches = allText.matchAll(/(\d+\.?\d*\s*(?:g|oz|mg|gram))/gi); for (const match of weightMatches) { const weight = match[1].trim(); if (!weights.includes(weight)) { weights.push(weight); } } return { fullSizeImage, description, thc, cbd, strainType, terpenes, effects, brand, lineage, flavors, weights }; }); return details; } catch (error) { lastError = error; logger.warn('scraper', ` Attempt ${attempt}/${maxRetries} failed for ${productName}: ${error}`); // No delays - just retry immediately } } logger.error('scraper', ` ✗ All attempts failed for ${productName}`); return { fullSizeImage: null, description: null, thc: null, cbd: null, strainType: null, terpenes: [], effects: [], brand: null, lineage: null, flavors: [], weights: [] }; } export async function scrapeCategory(storeId: number, categoryId: number, userAgent?: string): Promise { let browser: Browser | null = null; const scraperId = `cat-${categoryId}-${Date.now()}`; let proxyId: number | null = null; try { const categoryResult = await pool.query(` SELECT c.*, s.slug as store_slug, s.name as store_name FROM categories c JOIN stores s ON c.store_id = s.id WHERE c.id = $1 `, [categoryId]); if (categoryResult.rows.length === 0) { throw new Error('Category not found'); } const category = categoryResult.rows[0]; logger.info('scraper', `Scraping category: ${category.name} for ${category.store_name}`); // Register scraper with monitoring system registerScraper(scraperId, storeId, category.store_name, categoryId, category.name); const proxy = await getActiveProxy(); if (proxy) { proxyId = proxy.id; } const launchOptions: any = { headless: 'new', args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-blink-features=AutomationControlled', '--window-size=1920,1080' ] }; if (proxy) { if (proxy.protocol === 'socks5') { launchOptions.args.push(`--proxy-server=socks5://${proxy.host}:${proxy.port}`); } else if (proxy.protocol === 'http' || proxy.protocol === 'https') { launchOptions.args.push(`--proxy-server=${proxy.protocol}://${proxy.host}:${proxy.port}`); } logger.info('scraper', `Using proxy: ${proxy.protocol}://${proxy.host}:${proxy.port}`); } browser = await puppeteer.launch(launchOptions); const page = await browser.newPage(); await makePageStealthy(page); await page.setViewport({ width: 1920, height: 1080 }); // Use provided userAgent or random if not specified const ua = getUserAgent(userAgent); await page.setUserAgent(ua); // Set age gate bypass cookies BEFORE navigation (standard for all cannabis sites) const state = detectStateFromUrl(category.dutchie_url); await setAgeGateCookies(page, category.dutchie_url, state); logger.info('scraper', `Loading page: ${category.dutchie_url}`); try { await page.goto(category.dutchie_url, { waitUntil: 'networkidle2', timeout: 60000 }); // If age gate still appears, try to bypass it await bypassAgeGate(page, state); // Wait for products to load await page.waitForSelector('[data-testid="product-list-item"], a[href*="/product/"]', { timeout: 30000, }).catch(() => { logger.warn('scraper', 'No product selectors found, trying anyway...'); }); logger.info('scraper', 'Scrolling to load all products...'); await autoScroll(page); } catch (navError) { logger.error('scraper', `Navigation error: ${navError}`); // Check if this is bot detection - put proxy in timeout instead of hard failure if (proxyId) { const errorMsg = String(navError); if (isBotDetectionError(errorMsg)) { // Bot detection! Put this proxy in timeout and get a new one logger.warn('scraper', `🤖 Bot detection triggered for proxy ${proxyId}!`); putProxyInTimeout(proxyId, errorMsg); throw new Error(`Bot detection: ${errorMsg}`); } else if (errorMsg.includes('timeout') || errorMsg.includes('net::') || errorMsg.includes('ERR_') || errorMsg.includes('Navigation')) { // Regular proxy failure - increment failure count logger.warn('scraper', `Proxy failure detected, incrementing failure count for proxy ${proxyId}`); await incrementProxyFailure(proxyId, errorMsg); } } throw navError; } logger.info('scraper', 'Extracting product list from page...'); const products = await page.evaluate(() => { const items: any[] = []; const cards = document.querySelectorAll('[data-testid="product-list-item"]'); console.log(`Found ${cards.length} product cards`); cards.forEach((card) => { try { const allText = card.textContent || ''; let name = ''; const nameSelectors = ['a[href*="/product/"]', 'h1', 'h2', 'h3', 'h4']; for (const sel of nameSelectors) { const el = card.querySelector(sel); if (el?.textContent?.trim()) { name = el.textContent.trim(); name = name.split('\n')[0].trim(); break; } } if (!name || name.length < 2) return; let price = null; let originalPrice = null; const priceMatches = allText.match(/\$(\d+\.?\d*)/g); if (priceMatches && priceMatches.length > 0) { price = parseFloat(priceMatches[0].replace('$', '')); if (priceMatches.length > 1) { originalPrice = parseFloat(priceMatches[1].replace('$', '')); } } // Extract variant (weight/size) - look for common patterns let variant = null; const variantPatterns = [ /(\d+\.?\d*\s*(?:g|oz|mg|ml|gram|ounce))/i, // Weight units /(\d+\s*pack)/i, // Pack sizes /(\d+\s*ct)/i, // Count /(\d+\s*x\s*\d+\.?\d*\s*(?:g|mg|ml))/i // Multi-pack (e.g., 5x0.5g) ]; for (const pattern of variantPatterns) { const match = allText.match(pattern); if (match) { variant = match[1].trim(); break; } } const linkEl = card.querySelector('a[href*="/product/"]') as HTMLAnchorElement | null; let href = linkEl?.href || linkEl?.getAttribute('href') || ''; if (href && href.startsWith('/')) { href = 'https://dutchie.com' + href; } items.push({ name, variant, price, originalPrice, href: href || window.location.href }); } catch (err) { console.error('Error parsing product card:', err); } }); return items; }); logger.info('scraper', `Found ${products.length} products total`); logger.info('scraper', `Now visiting each product page for complete details...`); let successCount = 0; let failCount = 0; // Update initial stats updateScraperStats(scraperId, { productsProcessed: 0, productsTotal: products.length }); for (let i = 0; i < products.length; i++) { const product = products[i]; try { logger.info('scraper', ` [${i + 1}/${products.length}] ${product.name}`); updateScraperStats(scraperId, { productsProcessed: i + 1, productsTotal: products.length }, `Processing: ${product.name}`); if (!product.href) { logger.warn('scraper', ` ⚠ No product URL, skipping details`); product.metadata = {}; failCount++; continue; } const details = await scrapeProductDetails(page, product.href, product.name); product.imageUrl = details.fullSizeImage ? getFullSizeImageUrl(details.fullSizeImage) : null; product.description = details.description; product.thc = details.thc; product.cbd = details.cbd; product.strainType = details.strainType; product.brand = details.brand; product.weight = details.weights.length > 0 ? details.weights[0] : null; product.metadata = { terpenes: details.terpenes, effects: details.effects, lineage: details.lineage, flavors: details.flavors, allWeights: details.weights }; if (details.thc || details.cbd || details.description) { logger.info('scraper', ` ✓ THC: ${details.thc}%, CBD: ${details.cbd}%`); successCount++; } else { logger.warn('scraper', ` ⚠ Limited data extracted`); failCount++; } // No delays - scrape fast! } catch (error) { logger.error('scraper', ` ✗ Unexpected error: ${error}`); product.metadata = {}; failCount++; } } await browser.close(); logger.info('scraper', `━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`); logger.info('scraper', `✅ Category complete: ${category.name}`); logger.info('scraper', ` Total products: ${products.length}`); logger.info('scraper', ` Success: ${successCount}`); logger.info('scraper', ` Failed: ${failCount}`); logger.info('scraper', `━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`); await pool.query(` UPDATE categories SET last_scraped_at = CURRENT_TIMESTAMP WHERE id = $1 `, [categoryId]); // Mark scraper as complete completeScraper(scraperId); const formattedProducts: Product[] = products.map((p, index) => { const sanitized = sanitizeProductData(p); // Normalize availability from Dutchie product data const availability = normalizeAvailability(p); return { dutchieProductId: `${category.store_slug}-${category.slug}-${Date.now()}-${index}`, name: sanitized.name, variant: p.variant || null, description: sanitized.description, price: p.price, originalPrice: p.originalPrice, thcPercentage: sanitized.thc, cbdPercentage: sanitized.cbd, strainType: p.strainType, brand: sanitized.brand, weight: sanitized.weight, imageUrl: p.imageUrl, dutchieUrl: p.href, metadata: p.metadata || {}, availabilityStatus: availability.status, availabilityRaw: availability.raw, stockQuantity: availability.quantity }; }); return formattedProducts; } catch (error) { logger.error('scraper', `❌ Category scraping error: ${error}`); // Smart proxy error handling if (proxyId) { const errorMsg = String(error); if (isBotDetectionError(errorMsg)) { // Bot detection! Put this proxy in timeout logger.warn('scraper', `🤖 Bot detection triggered for proxy ${proxyId}!`); putProxyInTimeout(proxyId, errorMsg); } else if (errorMsg.includes('timeout') || errorMsg.includes('net::') || errorMsg.includes('ERR_') || errorMsg.includes('Navigation') || errorMsg.includes('Protocol error') || errorMsg.includes('Target closed')) { // Regular proxy failure - increment failure count logger.warn('scraper', `Proxy failure detected, incrementing failure count for proxy ${proxyId}`); await incrementProxyFailure(proxyId, errorMsg); } } // Mark scraper as failed completeScraper(scraperId, String(error)); if (browser) { try { await browser.close(); } catch (e) { logger.error('scraper', `Error closing browser: ${e}`); } } throw error; } } async function autoScroll(page: Page) { await page.evaluate(async () => { await new Promise((resolve) => { let totalHeight = 0; const distance = 500; const timer = setInterval(() => { const scrollHeight = document.body.scrollHeight; window.scrollBy(0, distance); totalHeight += distance; if (totalHeight >= scrollHeight) { clearInterval(timer); resolve(); } }, 200); }); }); } export async function saveProducts(storeId: number, categoryId: number, products: Product[]): Promise { const client = await pool.connect(); try { await client.query('BEGIN'); logger.info('scraper', `Saving ${products.length} products to database...`); // Mark all products as out-of-stock before processing (they'll be re-marked if found) // Also update availability_status and last_seen_out_of_stock_at for state transition tracking await client.query(` UPDATE products SET in_stock = false, availability_status = 'out_of_stock', last_seen_out_of_stock_at = CASE WHEN availability_status != 'out_of_stock' THEN CURRENT_TIMESTAMP ELSE last_seen_out_of_stock_at END WHERE store_id = $1 AND category_id = $2 AND in_stock = true `, [storeId, categoryId]); for (const product of products) { try { // Get availability from product (defaults to in_stock if product exists in scraped data) const availStatus = product.availabilityStatus || 'in_stock'; const availRaw = product.availabilityRaw ? JSON.stringify(product.availabilityRaw) : null; const stockQty = product.stockQuantity ?? null; const existingResult = await client.query(` SELECT id, image_url, local_image_path, availability_status FROM products WHERE store_id = $1 AND name = $2 AND category_id = $3 AND (variant = $4 OR (variant IS NULL AND $4 IS NULL)) `, [storeId, product.name, categoryId, product.variant || null]); let localImagePath = null; let productId: number; if (existingResult.rows.length > 0) { productId = existingResult.rows[0].id; localImagePath = existingResult.rows[0].local_image_path; const prevStatus = existingResult.rows[0].availability_status; // Determine if we need to update last_seen_in_stock_at const isNowInStock = availStatus === 'in_stock' || availStatus === 'limited'; const wasOutOfStock = prevStatus === 'out_of_stock' || prevStatus === 'unknown'; await client.query(` UPDATE products SET name = $1, variant = $2, description = $3, price = $4, strain_type = $5, thc_percentage = $6, cbd_percentage = $7, brand = $8, weight = $9, image_url = $10, dutchie_url = $11, in_stock = true, metadata = $12, last_seen_at = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP, availability_status = $14, availability_raw = $15, stock_quantity = $16, last_seen_in_stock_at = CASE WHEN $17 THEN CURRENT_TIMESTAMP ELSE last_seen_in_stock_at END WHERE id = $13 `, [ product.name, product.variant, product.description, product.price, product.strainType, product.thcPercentage, product.cbdPercentage, product.brand, product.weight, product.imageUrl, product.dutchieUrl, JSON.stringify(product.metadata), productId, availStatus, availRaw, stockQty, isNowInStock && wasOutOfStock ]); } else { // Generate unique slug from product name + timestamp + random suffix const baseSlug = product.name .toLowerCase() .replace(/[^a-z0-9]+/g, '-') .replace(/^-|-$/g, '') .substring(0, 150); const uniqueSuffix = `${Date.now()}-${Math.random().toString(36).substr(2, 6)}`; const slug = `${baseSlug}-${uniqueSuffix}`; const insertResult = await client.query(` INSERT INTO products ( store_id, category_id, dutchie_product_id, name, slug, variant, description, price, strain_type, thc_percentage, cbd_percentage, brand, weight, image_url, dutchie_url, in_stock, metadata, availability_status, availability_raw, stock_quantity, last_seen_in_stock_at ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, true, $16, $17, $18, $19, CURRENT_TIMESTAMP) RETURNING id `, [ storeId, categoryId, product.dutchieProductId, product.name, slug, product.variant, product.description, product.price, product.strainType, product.thcPercentage, product.cbdPercentage, product.brand, product.weight, product.imageUrl, product.dutchieUrl, JSON.stringify(product.metadata), availStatus, availRaw, stockQty ]); productId = insertResult.rows[0].id; } if (product.imageUrl && !localImagePath) { try { const result = await downloadProductImageLegacy(product.imageUrl, 0, productId); localImagePath = result.urls?.original || null; await client.query(` UPDATE products SET local_image_path = $1 WHERE id = $2 `, [localImagePath, productId]); } catch (error) { logger.error('images', `Failed to download image for ${product.name}: ${error}`); } } } catch (productError) { logger.error('scraper', `Failed to save product ${product.name}: ${productError}`); } } await client.query('COMMIT'); logger.info('scraper', `✅ Saved ${products.length} products successfully`); } catch (error) { await client.query('ROLLBACK'); logger.error('scraper', `Error saving products: ${error}`); throw error; } finally { client.release(); } } export async function scrapeStore(storeId: number, parallel: number = 3, userAgent?: string): Promise { try { logger.info('scraper', `🏪 Starting scrape for store ID: ${storeId} (${parallel} parallel, UA: ${userAgent || 'random'})`); const categoriesResult = await pool.query(` SELECT c.id, c.name, c.slug, c.dutchie_url FROM categories c WHERE c.store_id = $1 AND c.scrape_enabled = true ORDER BY c.name `, [storeId]); logger.info('scraper', `Found ${categoriesResult.rows.length} categories to scrape`); for (const category of categoriesResult.rows) { try { logger.info('scraper', `━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`); logger.info('scraper', `📂 Scraping: ${category.name}`); logger.info('scraper', `━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`); const products = await scrapeCategory(storeId, category.id, userAgent); await saveProducts(storeId, category.id, products); logger.info('scraper', `✅ Completed ${category.name} - ${products.length} products saved`); } catch (error) { logger.error('scraper', `❌ Failed to scrape ${category.name}: ${error}`); } // No delays - scrape fast! } await pool.query(` UPDATE stores SET last_scraped_at = CURRENT_TIMESTAMP WHERE id = $1 `, [storeId]); logger.info('scraper', `🎉 Store scrape completed: ID ${storeId}`); } catch (error) { logger.error('scraper', `❌ Store scrape failed: ${error}`); throw error; } }