"use strict"; // ============================================================================ // DEPRECATED: This scraper writes to the LEGACY products table. // DO NOT USE - All Dutchie crawling must use the new dutchie-az pipeline. // // New pipeline location: src/dutchie-az/services/product-crawler.ts // - Uses fetch-based GraphQL (no Puppeteer needed) // - Writes to isolated dutchie_az_* tables with snapshot model // - Tracks stockStatus, isPresentInFeed, missing_from_feed // // The normalizer functions in this file (normalizeDutchieProduct) may still // be imported for reference, but do NOT call scrapeDutchieMenu() or upsertProducts(). // ============================================================================ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.normalizeDutchieProduct = normalizeDutchieProduct; exports.fetchDutchieMenuViaPuppeteer = fetchDutchieMenuViaPuppeteer; exports.upsertProducts = upsertProducts; exports.scrapeDutchieMenu = scrapeDutchieMenu; /** * @deprecated DEPRECATED - Use src/dutchie-az/services/product-crawler.ts instead. * This scraper writes to the legacy products table, not the new dutchie_az tables. * * Fetches product data via Puppeteer interception of Dutchie's GraphQL API. * This bypasses Cloudflare by using a real browser to load the menu page. * * GraphQL Operations: * - FilteredProducts: Returns paginated product list with full details * - GetAddressBasedDispensaryData: Resolves dispensary cName to dispensaryId */ const puppeteer_extra_1 = __importDefault(require("puppeteer-extra")); const puppeteer_extra_plugin_stealth_1 = __importDefault(require("puppeteer-extra-plugin-stealth")); puppeteer_extra_1.default.use((0, puppeteer_extra_plugin_stealth_1.default)()); // ===================================================== // NORMALIZER: Dutchie GraphQL → DB Schema // ===================================================== function normalizeDutchieProduct(product) { // Extract first special if exists const saleSpecial = product.specialData?.saleSpecials?.[0]; // Calculate inventory from POSMetaData children const children = product.POSMetaData?.children || []; const totalQuantity = children.reduce((sum, c) => sum + (c.quantity || 0), 0); const availableQuantity = children.reduce((sum, c) => sum + (c.quantityAvailable || 0), 0); // Parse timestamps let sourceCreatedAt; if (product.createdAt) { // createdAt is a timestamp string like "1729044510543" const ts = parseInt(product.createdAt, 10); if (!isNaN(ts)) { sourceCreatedAt = new Date(ts); } } let sourceUpdatedAt; if (product.updatedAt) { sourceUpdatedAt = new Date(product.updatedAt); } return { // Identity external_id: product._id || product.id, slug: product.cName, name: product.Name, enterprise_product_id: product.enterpriseProductId, // Brand brand: product.brandName || product.brand?.name, brand_external_id: product.brandId || product.brand?.id, brand_logo_url: product.brandLogo || product.brand?.imageUrl, // Category subcategory: product.subcategory, strain_type: product.strainType, canonical_category: product.POSMetaData?.canonicalCategory, // Pricing price: product.Prices?.[0], rec_price: product.recPrices?.[0], med_price: product.medicalPrices?.[0], rec_special_price: product.recSpecialPrices?.[0], med_special_price: product.medicalSpecialPrices?.[0], // Specials is_on_special: product.special === true, special_name: saleSpecial?.specialName, discount_percent: saleSpecial?.percentDiscount ? saleSpecial.discount : undefined, special_data: product.specialData, // Inventory sku: product.POSMetaData?.canonicalSKU, inventory_quantity: totalQuantity || undefined, inventory_available: availableQuantity || undefined, is_below_threshold: product.isBelowThreshold === true, status: product.Status, // Potency thc_percentage: product.THCContent?.range?.[0], cbd_percentage: product.CBDContent?.range?.[0], cannabinoids: product.cannabinoidsV2, // Weight/Options weight_mg: product.weight, net_weight_value: product.measurements?.netWeight?.values?.[0], net_weight_unit: product.measurements?.netWeight?.unit, options: product.Options, raw_options: product.rawOptions, // Images image_url: product.Image, additional_images: product.images?.length ? product.images : undefined, // Flags is_featured: product.featured === true, medical_only: product.medicalOnly === true, rec_only: product.recOnly === true, // Timestamps source_created_at: sourceCreatedAt, source_updated_at: sourceUpdatedAt, // Description description: typeof product.description === 'string' ? product.description : undefined, // Raw raw_data: product, }; } async function fetchDutchieMenuViaPuppeteer(menuUrl, options = {}) { const { headless = 'new', timeout = 90000, maxScrolls = 30, // Increased for full menu capture } = options; let browser; const capturedProducts = []; let dispensaryId = ''; try { browser = await puppeteer_extra_1.default.launch({ headless, args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-blink-features=AutomationControlled', ], }); const page = await browser.newPage(); // Stealth configuration await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'); await page.setViewport({ width: 1920, height: 1080 }); await page.evaluateOnNewDocument(() => { Object.defineProperty(navigator, 'webdriver', { get: () => false }); window.chrome = { runtime: {} }; }); // Track seen product IDs to avoid duplicates const seenIds = new Set(); // Intercept GraphQL responses page.on('response', async (response) => { const url = response.url(); if (!url.includes('graphql')) return; try { const contentType = response.headers()['content-type'] || ''; if (!contentType.includes('application/json')) return; const data = await response.json(); // Capture dispensary ID if (data?.data?.getAddressBasedDispensaryData?.dispensaryData?.dispensaryId) { dispensaryId = data.data.getAddressBasedDispensaryData.dispensaryData.dispensaryId; } // Capture products from FilteredProducts if (data?.data?.filteredProducts?.products) { const products = data.data.filteredProducts.products; for (const product of products) { if (!seenIds.has(product._id)) { seenIds.add(product._id); capturedProducts.push(product); } } } } catch { // Ignore parse errors } }); // Navigate to menu console.log('[DutchieGraphQL] Loading menu page...'); await page.goto(menuUrl, { waitUntil: 'networkidle2', timeout, }); // Get dispensary ID from window.reactEnv if not captured if (!dispensaryId) { dispensaryId = await page.evaluate(() => { const env = window.reactEnv; return env?.dispensaryId || env?.retailerId || ''; }); } // Helper function to scroll through a page until no more products load async function scrollToLoadAll(maxScrollAttempts = maxScrolls) { let scrollCount = 0; let previousCount = 0; let noNewProductsCount = 0; while (scrollCount < maxScrollAttempts && noNewProductsCount < 3) { await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); await new Promise((r) => setTimeout(r, 1500)); const currentCount = seenIds.size; if (currentCount === previousCount) { noNewProductsCount++; } else { noNewProductsCount = 0; } previousCount = currentCount; scrollCount++; } } // First, scroll through the main page (all products) console.log('[DutchieGraphQL] Scrolling main page...'); await scrollToLoadAll(); console.log(`[DutchieGraphQL] After main page: ${seenIds.size} products`); // Get category links from the navigation const categoryLinks = await page.evaluate(() => { const links = []; // Look for category navigation links const navLinks = document.querySelectorAll('a[href*="/products/"]'); navLinks.forEach((link) => { const href = link.href; if (href && !links.includes(href)) { links.push(href); } }); return links; }); console.log(`[DutchieGraphQL] Found ${categoryLinks.length} category links`); // Visit each category page to capture all products for (const categoryUrl of categoryLinks) { try { console.log(`[DutchieGraphQL] Visiting category: ${categoryUrl.split('/').pop()}`); await page.goto(categoryUrl, { waitUntil: 'networkidle2', timeout: 30000, }); await scrollToLoadAll(15); // Fewer scrolls per category console.log(`[DutchieGraphQL] Total products: ${seenIds.size}`); } catch (e) { console.log(`[DutchieGraphQL] Category error: ${e.message}`); } } // Wait for any final responses await new Promise((r) => setTimeout(r, 2000)); return { products: capturedProducts, dispensaryId, menuUrl, }; } finally { if (browser) { await browser.close(); } } } // ===================================================== // DATABASE OPERATIONS // ===================================================== async function upsertProducts(pool, storeId, products) { const client = await pool.connect(); let inserted = 0; let updated = 0; try { await client.query('BEGIN'); for (const product of products) { // Upsert product const result = await client.query(` INSERT INTO products ( store_id, external_id, slug, name, enterprise_product_id, brand, brand_external_id, brand_logo_url, subcategory, strain_type, canonical_category, price, rec_price, med_price, rec_special_price, med_special_price, is_on_special, special_name, discount_percent, special_data, sku, inventory_quantity, inventory_available, is_below_threshold, status, thc_percentage, cbd_percentage, cannabinoids, weight_mg, net_weight_value, net_weight_unit, options, raw_options, image_url, additional_images, is_featured, medical_only, rec_only, source_created_at, source_updated_at, description, raw_data, dutchie_url, last_seen_at, updated_at ) VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35, $36, $37, $38, $39, $40, $41, $42, '', NOW(), NOW() ) ON CONFLICT (store_id, slug) DO UPDATE SET name = EXCLUDED.name, enterprise_product_id = EXCLUDED.enterprise_product_id, brand = EXCLUDED.brand, brand_external_id = EXCLUDED.brand_external_id, brand_logo_url = EXCLUDED.brand_logo_url, subcategory = EXCLUDED.subcategory, strain_type = EXCLUDED.strain_type, canonical_category = EXCLUDED.canonical_category, price = EXCLUDED.price, rec_price = EXCLUDED.rec_price, med_price = EXCLUDED.med_price, rec_special_price = EXCLUDED.rec_special_price, med_special_price = EXCLUDED.med_special_price, is_on_special = EXCLUDED.is_on_special, special_name = EXCLUDED.special_name, discount_percent = EXCLUDED.discount_percent, special_data = EXCLUDED.special_data, sku = EXCLUDED.sku, inventory_quantity = EXCLUDED.inventory_quantity, inventory_available = EXCLUDED.inventory_available, is_below_threshold = EXCLUDED.is_below_threshold, status = EXCLUDED.status, thc_percentage = EXCLUDED.thc_percentage, cbd_percentage = EXCLUDED.cbd_percentage, cannabinoids = EXCLUDED.cannabinoids, weight_mg = EXCLUDED.weight_mg, net_weight_value = EXCLUDED.net_weight_value, net_weight_unit = EXCLUDED.net_weight_unit, options = EXCLUDED.options, raw_options = EXCLUDED.raw_options, image_url = EXCLUDED.image_url, additional_images = EXCLUDED.additional_images, is_featured = EXCLUDED.is_featured, medical_only = EXCLUDED.medical_only, rec_only = EXCLUDED.rec_only, source_created_at = EXCLUDED.source_created_at, source_updated_at = EXCLUDED.source_updated_at, description = EXCLUDED.description, raw_data = EXCLUDED.raw_data, last_seen_at = NOW(), updated_at = NOW() RETURNING (xmax = 0) AS was_inserted `, [ storeId, product.external_id, product.slug, product.name, product.enterprise_product_id, product.brand, product.brand_external_id, product.brand_logo_url, product.subcategory, product.strain_type, product.canonical_category, product.price, product.rec_price, product.med_price, product.rec_special_price, product.med_special_price, product.is_on_special, product.special_name, product.discount_percent, product.special_data ? JSON.stringify(product.special_data) : null, product.sku, product.inventory_quantity, product.inventory_available, product.is_below_threshold, product.status, product.thc_percentage, product.cbd_percentage, product.cannabinoids ? JSON.stringify(product.cannabinoids) : null, product.weight_mg, product.net_weight_value, product.net_weight_unit, product.options, product.raw_options, product.image_url, product.additional_images, product.is_featured, product.medical_only, product.rec_only, product.source_created_at, product.source_updated_at, product.description, product.raw_data ? JSON.stringify(product.raw_data) : null, ]); if (result.rows[0]?.was_inserted) { inserted++; } else { updated++; } } await client.query('COMMIT'); return { inserted, updated }; } catch (error) { await client.query('ROLLBACK'); throw error; } finally { client.release(); } } // ===================================================== // MAIN ENTRY POINT // ===================================================== /** * @deprecated DEPRECATED - Use src/dutchie-az/services/product-crawler.ts instead. * This function is disabled and will throw an error if called. */ async function scrapeDutchieMenu(pool, storeId, menuUrl) { // DEPRECATED: Throw error to prevent accidental use throw new Error('DEPRECATED: scrapeDutchieMenu() is deprecated. ' + 'Use src/dutchie-az/services/product-crawler.ts instead. ' + 'This scraper writes to the legacy products table.'); // Original code below is unreachable but kept for reference try { console.log(`[DutchieGraphQL] Scraping: ${menuUrl}`); // Fetch products via Puppeteer const { products, dispensaryId } = await fetchDutchieMenuViaPuppeteer(menuUrl); console.log(`[DutchieGraphQL] Captured ${products.length} products, dispensaryId: ${dispensaryId}`); if (products.length === 0) { return { success: false, productsFound: 0, inserted: 0, updated: 0, error: 'No products captured from GraphQL responses', }; } // Normalize products const normalized = products.map(normalizeDutchieProduct); // Upsert to database const { inserted, updated } = await upsertProducts(pool, storeId, normalized); console.log(`[DutchieGraphQL] Upsert complete: ${inserted} inserted, ${updated} updated`); return { success: true, productsFound: products.length, inserted, updated, }; } catch (error) { console.error(`[DutchieGraphQL] Error:`, error.message); return { success: false, productsFound: 0, inserted: 0, updated: 0, error: error.message, }; } }