import { chromium } from 'playwright'; import { pool } from './src/db/migrate.js'; const workerNum = process.argv[2] || `T${Date.now().toString().slice(-4)}`; const dispensaryId = parseInt(process.argv[3] || '149', 10); const GOOGLE_UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'; interface Product { name: string; brand: string; variant: string; category: string; strain_type?: string; thc_percentage?: number; cbd_percentage?: number; regular_price?: number; sale_price?: number; price_per_unit?: string; dutchie_url: string; in_stock: boolean; } async function main() { console.log(`[${workerNum}] Starting Treez scraper for dispensary ${dispensaryId}`); // Get dispensary details const dispensaryResult = await pool.query(` SELECT id, name, menu_url FROM dispensaries WHERE id = $1 `, [dispensaryId]); if (dispensaryResult.rows.length === 0) { console.error(`[${workerNum}] Dispensary ${dispensaryId} not found`); await pool.end(); return; } const dispensary = dispensaryResult.rows[0]; const menuUrl = dispensary.menu_url; if (!menuUrl) { console.error(`[${workerNum}] No menu URL for dispensary ${dispensary.name}`); await pool.end(); return; } console.log(`[${workerNum}] Scraping: ${dispensary.name}`); console.log(`[${workerNum}] Menu URL: ${menuUrl}`); const browser = await chromium.launch({ headless: true }); const context = await browser.newContext({ userAgent: GOOGLE_UA }); const page = await context.newPage(); try { console.log(`[${workerNum}] Loading menu page...`); await page.goto(menuUrl, { waitUntil: 'networkidle', timeout: 30000 }); await page.waitForTimeout(3000); // Scroll to bottom to trigger lazy loading console.log(`[${workerNum}] Scrolling to load all products...`); let previousCount = 0; let currentCount = 0; let scrollAttempts = 0; const maxScrollAttempts = 50; // Increased to capture hundreds of products do { previousCount = currentCount; await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); await page.waitForTimeout(2000); currentCount = await page.locator('.menu-item').count(); scrollAttempts++; console.log(`[${workerNum}] Scroll ${scrollAttempts}: ${currentCount} items loaded`); } while (currentCount > previousCount && scrollAttempts < maxScrollAttempts); // Get all menu items const menuItems = await page.locator('.menu-item').all(); console.log(`[${workerNum}] Found ${menuItems.length} menu items after scrolling`); const products: Product[] = []; for (let i = 0; i < menuItems.length; i++) { try { const item = menuItems[i]; const link = item.locator('a.link').first(); const href = await link.getAttribute('href'); if (!href) continue; // Extract brand from .item-text-1 const brandElement = item.locator('.item-text-1').first(); const brand = await brandElement.textContent(); // Extract product name from .item-text-2 const nameElement = item.locator('.item-text-2').first(); const productLine = await nameElement.textContent(); // Parse product name and variant from productLine const name = productLine?.split(/\d+\.?\d*\s*G/i)[0].trim() || ''; const variant = productLine?.match(/\d+\.?\d*\s*G/i)?.[0] || ''; // Get full text for strain/cannabinoid extraction const text = await item.textContent(); // Extract strain type const strainMatch = text?.match(/(SATIVA|INDICA|HYBRID|I\/S|S\/I)/i); const strainType = strainMatch ? strainMatch[1] : null; // Extract THC percentage const thcMatch = text?.match(/THC\s+([\d.]+)%/i); const thcPercentage = thcMatch ? parseFloat(thcMatch[1]) : null; // Extract CBD percentage const cbdMatch = text?.match(/CBD\s+([\d.]+)%/i); const cbdPercentage = cbdMatch ? parseFloat(cbdMatch[1]) : null; // Extract price const priceMatch = text?.match(/\$(\d+\.?\d*)/); const price = priceMatch ? parseFloat(priceMatch[1]) : null; // Extract price per unit const pricePerMatch = text?.match(/\$\d+\.?\d*\/\s*([^\s]+)/); const pricePerUnit = pricePerMatch ? pricePerMatch[0] : null; // Extract category from URL const categoryMatch = href.match(/\/category\/([^\/]+)\//); const category = categoryMatch ? categoryMatch[1] : 'unknown'; const fullUrl = href.startsWith('http') ? href : `https://best.treez.io${href}`; const product: Product = { name: name || productLine || '', brand: brand?.trim() || '', variant: variant.trim(), category, strain_type: strainType || undefined, thc_percentage: thcPercentage || undefined, cbd_percentage: cbdPercentage || undefined, regular_price: price || undefined, sale_price: price || undefined, price_per_unit: pricePerUnit || undefined, dutchie_url: fullUrl, in_stock: true }; products.push(product); } catch (error: any) { console.error(`[${workerNum}] Error parsing item ${i}:`, error.message); } } console.log(`[${workerNum}] Parsed ${products.length} products`); // Save to database let saved = 0; for (const product of products) { try { // Create slug const slug = `${product.brand}-${product.name}-${product.variant}` .toLowerCase() .replace(/[^a-z0-9]+/g, '-') .replace(/^-|-$/g, ''); // Check if product already exists const existing = await pool.query(` SELECT id FROM products WHERE dispensary_id = $1 AND slug = $2 `, [dispensaryId, slug]); if (existing.rows.length > 0) { // Update existing product await pool.query(` UPDATE products SET name = $1, brand = $2, variant = $3, strain_type = $4, thc_percentage = $5, cbd_percentage = $6, regular_price = $7, sale_price = $8, dutchie_url = $9, in_stock = $10, updated_at = NOW() WHERE dispensary_id = $11 AND slug = $12 `, [ product.name, product.brand, product.variant, product.strain_type, product.thc_percentage, product.cbd_percentage, product.regular_price, product.sale_price, product.dutchie_url, product.in_stock, dispensaryId, slug ]); } else { // Insert new product await pool.query(` INSERT INTO products ( dispensary_id, slug, name, brand, variant, strain_type, thc_percentage, cbd_percentage, regular_price, sale_price, dutchie_url, in_stock, created_at, updated_at ) VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, NOW(), NOW() ) `, [ dispensaryId, slug, product.name, product.brand, product.variant, product.strain_type, product.thc_percentage, product.cbd_percentage, product.regular_price, product.sale_price, product.dutchie_url, product.in_stock ]); } saved++; } catch (error: any) { console.error(`[${workerNum}] Error saving product:`, error.message); } } console.log(`[${workerNum}] ✅ Saved ${saved}/${products.length} products`); } catch (error: any) { console.error(`[${workerNum}] ❌ Error:`, error.message); } finally { await browser.close(); await pool.end(); } } main().catch(console.error);