"use strict"; // ============================================================================ // DEPRECATED: This scraper writes to the LEGACY products table. // DO NOT USE - All Dutchie crawling must use the new dutchie-az pipeline. // // New pipeline location: src/dutchie-az/services/product-crawler.ts // - Uses fetch-based GraphQL (no Puppeteer needed) // - Writes to isolated dutchie_az_* tables with snapshot model // - Tracks stockStatus, isPresentInFeed, missing_from_feed // ============================================================================ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.fetchAllDutchieProducts = fetchAllDutchieProducts; exports.upsertProductsDirect = upsertProductsDirect; exports.scrapeAllDutchieProducts = scrapeAllDutchieProducts; /** * @deprecated DEPRECATED - Use src/dutchie-az/services/product-crawler.ts instead. * This scraper writes to the legacy products table, not the new dutchie_az tables. * * Makes direct GraphQL requests from within the browser context to: * 1. Bypass Cloudflare (using browser session) * 2. Fetch ALL products including out-of-stock (Status: null) * 3. Paginate through complete menu */ const puppeteer_extra_1 = __importDefault(require("puppeteer-extra")); const puppeteer_extra_plugin_stealth_1 = __importDefault(require("puppeteer-extra-plugin-stealth")); const dutchie_graphql_1 = require("./dutchie-graphql"); puppeteer_extra_1.default.use((0, puppeteer_extra_plugin_stealth_1.default)()); // GraphQL persisted query hashes const GRAPHQL_HASHES = { FilteredProducts: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0', GetAddressBasedDispensaryData: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b', }; /** * Fetch all products via in-page GraphQL requests * This includes both in-stock and out-of-stock items */ async function fetchAllDutchieProducts(menuUrl, options = {}) { const { headless = 'new', timeout = 90000, perPage = 100, includeOutOfStock = true, } = options; let browser; try { browser = await puppeteer_extra_1.default.launch({ headless, args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-blink-features=AutomationControlled', ], }); const page = await browser.newPage(); // Stealth configuration await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'); await page.setViewport({ width: 1920, height: 1080 }); await page.evaluateOnNewDocument(() => { Object.defineProperty(navigator, 'webdriver', { get: () => false }); window.chrome = { runtime: {} }; }); // Navigate to menu page to establish session console.log('[DutchieGraphQL] Loading menu page to establish session...'); await page.goto(menuUrl, { waitUntil: 'networkidle2', timeout, }); // Get dispensary ID from page const dispensaryId = await page.evaluate(() => { const env = window.reactEnv; return env?.dispensaryId || env?.retailerId || ''; }); if (!dispensaryId) { throw new Error('Could not determine dispensaryId from page'); } console.log(`[DutchieGraphQL] Dispensary ID: ${dispensaryId}`); // Fetch all products via in-page GraphQL requests const allProducts = []; let page_num = 0; let hasMore = true; while (hasMore) { console.log(`[DutchieGraphQL] Fetching page ${page_num} (perPage=${perPage})...`); const result = await page.evaluate(async (dispensaryId, page_num, perPage, includeOutOfStock, hash) => { const variables = { includeEnterpriseSpecials: false, productsFilter: { dispensaryId, pricingType: 'rec', Status: includeOutOfStock ? null : 'Active', // null = include out-of-stock types: [], useCache: false, // Don't cache to get fresh data isDefaultSort: true, sortBy: 'popularSortIdx', sortDirection: 1, bypassOnlineThresholds: true, isKioskMenu: false, removeProductsBelowOptionThresholds: false, }, page: page_num, perPage, }; const qs = new URLSearchParams({ operationName: 'FilteredProducts', variables: JSON.stringify(variables), extensions: JSON.stringify({ persistedQuery: { version: 1, sha256Hash: hash }, }), }); const response = await fetch(`https://dutchie.com/graphql?${qs.toString()}`, { method: 'GET', headers: { 'content-type': 'application/json', 'apollographql-client-name': 'Marketplace (production)', }, credentials: 'include', // Include cookies/session }); if (!response.ok) { throw new Error(`HTTP ${response.status}`); } return response.json(); }, dispensaryId, page_num, perPage, includeOutOfStock, GRAPHQL_HASHES.FilteredProducts); if (result.errors) { console.error('[DutchieGraphQL] GraphQL errors:', result.errors); break; } const products = result?.data?.filteredProducts?.products || []; console.log(`[DutchieGraphQL] Page ${page_num}: ${products.length} products`); if (products.length === 0) { hasMore = false; } else { allProducts.push(...products); page_num++; // Safety limit if (page_num > 50) { console.log('[DutchieGraphQL] Reached page limit, stopping'); hasMore = false; } } } // Count active vs inactive const activeCount = allProducts.filter((p) => p.Status === 'Active').length; const inactiveCount = allProducts.filter((p) => p.Status !== 'Active').length; console.log(`[DutchieGraphQL] Total: ${allProducts.length} products (${activeCount} active, ${inactiveCount} inactive)`); return { products: allProducts, dispensaryId, totalProducts: allProducts.length, activeCount, inactiveCount, }; } finally { if (browser) { await browser.close(); } } } /** * Upsert products to database */ async function upsertProductsDirect(pool, storeId, products) { const client = await pool.connect(); let inserted = 0; let updated = 0; try { await client.query('BEGIN'); for (const product of products) { const result = await client.query(` INSERT INTO products ( store_id, external_id, slug, name, enterprise_product_id, brand, brand_external_id, brand_logo_url, subcategory, strain_type, canonical_category, price, rec_price, med_price, rec_special_price, med_special_price, is_on_special, special_name, discount_percent, special_data, sku, inventory_quantity, inventory_available, is_below_threshold, status, thc_percentage, cbd_percentage, cannabinoids, weight_mg, net_weight_value, net_weight_unit, options, raw_options, image_url, additional_images, is_featured, medical_only, rec_only, source_created_at, source_updated_at, description, raw_data, dutchie_url, last_seen_at, updated_at ) VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35, $36, $37, $38, $39, $40, $41, $42, '', NOW(), NOW() ) ON CONFLICT (store_id, slug) DO UPDATE SET name = EXCLUDED.name, enterprise_product_id = EXCLUDED.enterprise_product_id, brand = EXCLUDED.brand, brand_external_id = EXCLUDED.brand_external_id, brand_logo_url = EXCLUDED.brand_logo_url, subcategory = EXCLUDED.subcategory, strain_type = EXCLUDED.strain_type, canonical_category = EXCLUDED.canonical_category, price = EXCLUDED.price, rec_price = EXCLUDED.rec_price, med_price = EXCLUDED.med_price, rec_special_price = EXCLUDED.rec_special_price, med_special_price = EXCLUDED.med_special_price, is_on_special = EXCLUDED.is_on_special, special_name = EXCLUDED.special_name, discount_percent = EXCLUDED.discount_percent, special_data = EXCLUDED.special_data, sku = EXCLUDED.sku, inventory_quantity = EXCLUDED.inventory_quantity, inventory_available = EXCLUDED.inventory_available, is_below_threshold = EXCLUDED.is_below_threshold, status = EXCLUDED.status, thc_percentage = EXCLUDED.thc_percentage, cbd_percentage = EXCLUDED.cbd_percentage, cannabinoids = EXCLUDED.cannabinoids, weight_mg = EXCLUDED.weight_mg, net_weight_value = EXCLUDED.net_weight_value, net_weight_unit = EXCLUDED.net_weight_unit, options = EXCLUDED.options, raw_options = EXCLUDED.raw_options, image_url = EXCLUDED.image_url, additional_images = EXCLUDED.additional_images, is_featured = EXCLUDED.is_featured, medical_only = EXCLUDED.medical_only, rec_only = EXCLUDED.rec_only, source_created_at = EXCLUDED.source_created_at, source_updated_at = EXCLUDED.source_updated_at, description = EXCLUDED.description, raw_data = EXCLUDED.raw_data, last_seen_at = NOW(), updated_at = NOW() RETURNING (xmax = 0) AS was_inserted `, [ storeId, product.external_id, product.slug, product.name, product.enterprise_product_id, product.brand, product.brand_external_id, product.brand_logo_url, product.subcategory, product.strain_type, product.canonical_category, product.price, product.rec_price, product.med_price, product.rec_special_price, product.med_special_price, product.is_on_special, product.special_name, product.discount_percent, product.special_data ? JSON.stringify(product.special_data) : null, product.sku, product.inventory_quantity, product.inventory_available, product.is_below_threshold, product.status, product.thc_percentage, product.cbd_percentage, product.cannabinoids ? JSON.stringify(product.cannabinoids) : null, product.weight_mg, product.net_weight_value, product.net_weight_unit, product.options, product.raw_options, product.image_url, product.additional_images, product.is_featured, product.medical_only, product.rec_only, product.source_created_at, product.source_updated_at, product.description, product.raw_data ? JSON.stringify(product.raw_data) : null, ]); if (result.rows[0]?.was_inserted) { inserted++; } else { updated++; } } await client.query('COMMIT'); return { inserted, updated }; } catch (error) { await client.query('ROLLBACK'); throw error; } finally { client.release(); } } /** * @deprecated DEPRECATED - Use src/dutchie-az/services/product-crawler.ts instead. * This function is disabled and will throw an error if called. * Main entry point - scrape all products including out-of-stock */ async function scrapeAllDutchieProducts(pool, storeId, menuUrl) { // DEPRECATED: Throw error to prevent accidental use throw new Error('DEPRECATED: scrapeAllDutchieProducts() is deprecated. ' + 'Use src/dutchie-az/services/product-crawler.ts instead. ' + 'This scraper writes to the legacy products table.'); // Original code below is unreachable but kept for reference try { console.log(`[DutchieGraphQL] Scraping ALL products (including out-of-stock): ${menuUrl}`); // Fetch all products via direct GraphQL const { products, totalProducts, activeCount, inactiveCount } = await fetchAllDutchieProducts(menuUrl, { includeOutOfStock: true, perPage: 100, }); if (products.length === 0) { return { success: false, totalProducts: 0, activeCount: 0, inactiveCount: 0, inserted: 0, updated: 0, error: 'No products returned from GraphQL', }; } // Normalize products const normalized = products.map(dutchie_graphql_1.normalizeDutchieProduct); // Upsert to database const { inserted, updated } = await upsertProductsDirect(pool, storeId, normalized); console.log(`[DutchieGraphQL] Complete: ${totalProducts} products (${activeCount} active, ${inactiveCount} inactive)`); console.log(`[DutchieGraphQL] Database: ${inserted} inserted, ${updated} updated`); return { success: true, totalProducts, activeCount, inactiveCount, inserted, updated, }; } catch (error) { console.error(`[DutchieGraphQL] Error:`, error.message); return { success: false, totalProducts: 0, activeCount: 0, inactiveCount: 0, inserted: 0, updated: 0, error: error.message, }; } }