import { firefox } from 'playwright'; import { pool } from './src/db/migrate.js'; import { getRandomProxy } from './src/utils/proxyManager.js'; const workerNum = process.argv[2] || `P${Date.now().toString().slice(-4)}`; const dispensaryId = parseInt(process.argv[3] || '112', 10); const batchSize = 10; // Process 10 products per batch interface Product { id: number; slug: string; name: string; brand: string; dutchie_url: string; } async function getProductsNeedingPrices(limit: number): Promise { const result = await pool.query(` SELECT id, slug, name, brand, dutchie_url FROM products WHERE dispensary_id = $1 AND regular_price IS NULL AND dutchie_url IS NOT NULL ORDER BY id LIMIT $2 `, [dispensaryId, limit]); return result.rows; } async function extractPriceFromPage(page: any, productUrl: string): Promise<{ regularPrice?: number; salePrice?: number; }> { try { console.log(`[${workerNum}] Loading: ${productUrl}`); await page.goto(productUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }); await page.waitForTimeout(2000); // Extract price data from the page const priceData = await page.evaluate(() => { // Try JSON-LD structured data first const scripts = Array.from(document.querySelectorAll('script[type="application/ld+json"]')); for (const script of scripts) { try { const data = JSON.parse(script.textContent || ''); if (data['@type'] === 'Product' && data.offers) { return { regularPrice: parseFloat(data.offers.price) || undefined, salePrice: undefined }; } } catch (e) { // Continue to next script } } // Fallback: extract from page text const pageText = document.body.textContent || ''; // Look for price patterns like $30.00, $40.00 const priceMatches = pageText.match(/\$(\d+\.?\d*)/g); if (priceMatches && priceMatches.length > 0) { const prices = priceMatches.map(p => parseFloat(p.replace('$', ''))); // If we find multiple prices, assume first is sale, second is regular if (prices.length >= 2) { return { salePrice: Math.min(prices[0], prices[1]), regularPrice: Math.max(prices[0], prices[1]) }; } else if (prices.length === 1) { return { regularPrice: prices[0], salePrice: undefined }; } } return { regularPrice: undefined, salePrice: undefined }; }); return priceData; } catch (error: any) { console.log(`[${workerNum}] âš ī¸ Error loading page: ${error.message}`); return { regularPrice: undefined, salePrice: undefined }; } } async function updateProductPrice( productId: number, regularPrice?: number, salePrice?: number ): Promise { await pool.query(` UPDATE products SET regular_price = $1, sale_price = $2, updated_at = CURRENT_TIMESTAMP WHERE id = $3 `, [regularPrice || null, salePrice || null, productId]); } async function main() { console.log(`\n${'='.repeat(70)}`); console.log(`💰 PRICE ENRICHMENT WORKER - ${workerNum}`); console.log(` Dispensary ID: ${dispensaryId}`); console.log(` Batch Size: ${batchSize} products`); console.log(`${'='.repeat(70)}\n`); // Get dispensary info const dispensaryResult = await pool.query( "SELECT id, name, menu_url FROM dispensaries WHERE id = $1", [dispensaryId] ); if (dispensaryResult.rows.length === 0) { console.error(`[${workerNum}] ❌ Dispensary ID ${dispensaryId} not found`); process.exit(1); } console.log(`[${workerNum}] ✅ Dispensary: ${dispensaryResult.rows[0].name}\n`); // Get proxy const proxy = await getRandomProxy(); if (!proxy) { console.log(`[${workerNum}] ❌ No proxy available`); process.exit(1); } console.log(`[${workerNum}] 🔐 Using proxy: ${proxy.server}\n`); // Launch browser const browser = await firefox.launch({ headless: true }); const context = await browser.newContext({ viewport: { width: 1920, height: 1080 }, userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36', proxy: { server: proxy.server, username: proxy.username, password: proxy.password } }); const page = await context.newPage(); let totalProcessed = 0; let totalWithPrices = 0; let totalNoPrices = 0; let batchNum = 0; // Keep processing batches while (true) { const products = await getProductsNeedingPrices(batchSize); if (products.length === 0) { console.log(`[${workerNum}] â„šī¸ No more products need price enrichment`); break; } batchNum++; console.log(`[${workerNum}] ${'─'.repeat(70)}`); console.log(`[${workerNum}] đŸ“Ļ BATCH #${batchNum}: Processing ${products.length} products`); console.log(`[${workerNum}] ${'─'.repeat(70)}\n`); for (let i = 0; i < products.length; i++) { const product = products[i]; console.log(`[${workerNum}] [${i + 1}/${products.length}] ${product.brand} - ${product.name.substring(0, 40)}`); const { regularPrice, salePrice } = await extractPriceFromPage(page, product.dutchie_url); await updateProductPrice(product.id, regularPrice, salePrice); totalProcessed++; if (regularPrice || salePrice) { totalWithPrices++; const priceStr = salePrice ? `Sale: $${salePrice.toFixed(2)} (Reg: $${regularPrice?.toFixed(2) || 'N/A'})` : `Price: $${regularPrice?.toFixed(2)}`; console.log(`[${workerNum}] ✅ ${priceStr}`); } else { totalNoPrices++; console.log(`[${workerNum}] âš ī¸ No price found`); } // Small delay between products await page.waitForTimeout(500); } console.log(`\n[${workerNum}] ✅ Batch #${batchNum} complete\n`); // Delay between batches await page.waitForTimeout(2000); } console.log(`\n[${workerNum}] ${'='.repeat(70)}`); console.log(`[${workerNum}] ✅ PRICE ENRICHMENT COMPLETE`); console.log(`[${workerNum}] Products processed: ${totalProcessed}`); console.log(`[${workerNum}] Products with prices: ${totalWithPrices}`); console.log(`[${workerNum}] Products without prices: ${totalNoPrices}`); console.log(`[${workerNum}] ${'='.repeat(70)}\n`); await browser.close(); await pool.end(); } main().catch(console.error);