"use strict"; /** * Dutchie GraphQL Client * * Uses Puppeteer to establish a session (get CF cookies), then makes * SERVER-SIDE fetch calls to api-gw.dutchie.com with those cookies. * * DUTCHIE FETCH RULES: * 1. Server-side only - use axios (never browser fetch with CORS) * 2. Use dispensaryFilter.cNameOrID, NOT dispensaryId directly * 3. Headers must mimic Chrome: User-Agent, Origin, Referer * 4. If 403, extract CF cookies from Puppeteer session and include them * 5. Log status codes, error bodies, and product counts */ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.ARIZONA_CENTERPOINTS = exports.GRAPHQL_HASHES = void 0; exports.resolveDispensaryId = resolveDispensaryId; exports.resolveDispensaryIdWithDetails = resolveDispensaryIdWithDetails; exports.discoverArizonaDispensaries = discoverArizonaDispensaries; exports.fetchAllProducts = fetchAllProducts; exports.fetchAllProductsBothModes = fetchAllProductsBothModes; const axios_1 = __importDefault(require("axios")); const puppeteer_extra_1 = __importDefault(require("puppeteer-extra")); const puppeteer_extra_plugin_stealth_1 = __importDefault(require("puppeteer-extra-plugin-stealth")); const dutchie_1 = require("../config/dutchie"); Object.defineProperty(exports, "GRAPHQL_HASHES", { enumerable: true, get: function () { return dutchie_1.GRAPHQL_HASHES; } }); Object.defineProperty(exports, "ARIZONA_CENTERPOINTS", { enumerable: true, get: function () { return dutchie_1.ARIZONA_CENTERPOINTS; } }); puppeteer_extra_1.default.use((0, puppeteer_extra_plugin_stealth_1.default)()); /** * Create a session by navigating to the embedded menu page * and extracting CF clearance cookies for server-side requests. * Also extracts dispensaryId from window.reactEnv if available. */ async function createSession(cName) { const browser = await puppeteer_extra_1.default.launch({ headless: 'new', args: dutchie_1.dutchieConfig.browserArgs, }); const page = await browser.newPage(); const userAgent = dutchie_1.dutchieConfig.userAgent; await page.setUserAgent(userAgent); await page.setViewport({ width: 1920, height: 1080 }); await page.evaluateOnNewDocument(() => { Object.defineProperty(navigator, 'webdriver', { get: () => false }); window.chrome = { runtime: {} }; }); // Navigate to the embedded menu page for this dispensary const embeddedMenuUrl = `https://dutchie.com/embedded-menu/${cName}`; console.log(`[GraphQL Client] Loading ${embeddedMenuUrl} to get CF cookies...`); let httpStatus; let dispensaryId; try { const response = await page.goto(embeddedMenuUrl, { waitUntil: 'networkidle2', timeout: dutchie_1.dutchieConfig.navigationTimeout, }); httpStatus = response?.status(); await new Promise((r) => setTimeout(r, dutchie_1.dutchieConfig.pageLoadDelay)); // Try to extract dispensaryId from window.reactEnv try { dispensaryId = await page.evaluate(() => { return window.reactEnv?.dispensaryId || null; }); if (dispensaryId) { console.log(`[GraphQL Client] Extracted dispensaryId from reactEnv: ${dispensaryId}`); } } catch (evalError) { console.log(`[GraphQL Client] Could not extract dispensaryId from reactEnv: ${evalError.message}`); } } catch (error) { console.warn(`[GraphQL Client] Navigation warning: ${error.message}`); // Continue anyway - we may have gotten cookies } // Extract cookies const cookies = await page.cookies(); const cookieString = cookies.map((c) => `${c.name}=${c.value}`).join('; '); console.log(`[GraphQL Client] Got ${cookies.length} cookies, HTTP status: ${httpStatus}`); if (cookies.length > 0) { console.log(`[GraphQL Client] Cookie names: ${cookies.map(c => c.name).join(', ')}`); } return { cookies: cookieString, userAgent, browser, page, dispensaryId, httpStatus }; } /** * Close session (browser) */ async function closeSession(session) { await session.browser.close(); } // ============================================================ // SERVER-SIDE GRAPHQL FETCH USING AXIOS // ============================================================ /** * Build headers that mimic a real browser request */ function buildHeaders(session, cName) { const embeddedMenuUrl = `https://dutchie.com/embedded-menu/${cName}`; return { 'accept': 'application/json, text/plain, */*', 'accept-language': 'en-US,en;q=0.9', 'accept-encoding': 'gzip, deflate, br', 'content-type': 'application/json', 'origin': 'https://dutchie.com', 'referer': embeddedMenuUrl, 'user-agent': session.userAgent, 'apollographql-client-name': 'Marketplace (production)', 'sec-ch-ua': '"Chromium";v="120", "Google Chrome";v="120", "Not-A.Brand";v="99"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', 'sec-fetch-dest': 'empty', 'sec-fetch-mode': 'cors', 'sec-fetch-site': 'same-site', ...(session.cookies ? { 'cookie': session.cookies } : {}), }; } /** * Execute GraphQL query server-side using axios * Uses cookies from the browser session to bypass CF */ async function executeGraphQL(session, operationName, variables, hash, cName) { const endpoint = dutchie_1.dutchieConfig.graphqlEndpoint; const headers = buildHeaders(session, cName); // Build request body for POST const body = { operationName, variables, extensions: { persistedQuery: { version: 1, sha256Hash: hash }, }, }; console.log(`[GraphQL Client] POST: ${operationName} -> ${endpoint}`); console.log(`[GraphQL Client] Variables: ${JSON.stringify(variables).slice(0, 300)}...`); try { const response = await axios_1.default.post(endpoint, body, { headers, timeout: 30000, validateStatus: () => true, // Don't throw on non-2xx }); // Log response details console.log(`[GraphQL Client] Response status: ${response.status}`); if (response.status !== 200) { const bodyPreview = typeof response.data === 'string' ? response.data.slice(0, 500) : JSON.stringify(response.data).slice(0, 500); console.error(`[GraphQL Client] HTTP ${response.status}: ${bodyPreview}`); throw new Error(`HTTP ${response.status}`); } // Check for GraphQL errors if (response.data?.errors && response.data.errors.length > 0) { console.error(`[GraphQL Client] GraphQL errors: ${JSON.stringify(response.data.errors[0])}`); } return response.data; } catch (error) { if (axios_1.default.isAxiosError(error)) { const axiosError = error; console.error(`[GraphQL Client] Axios error: ${axiosError.message}`); if (axiosError.response) { console.error(`[GraphQL Client] Response status: ${axiosError.response.status}`); console.error(`[GraphQL Client] Response data: ${JSON.stringify(axiosError.response.data).slice(0, 500)}`); } if (axiosError.code) { console.error(`[GraphQL Client] Error code: ${axiosError.code}`); } } else { console.error(`[GraphQL Client] Error: ${error.message}`); } throw error; } } /** * Resolve a dispensary slug to its internal platform ID. * * STRATEGY: * 1. Navigate to embedded menu page and extract window.reactEnv.dispensaryId (preferred) * 2. Fall back to GraphQL GetAddressBasedDispensaryData query if reactEnv fails * * Returns the dispensaryId (platform_dispensary_id) or null if not found. * Throws if page returns 403/404 so caller can mark as not_crawlable. */ async function resolveDispensaryId(slug) { const result = await resolveDispensaryIdWithDetails(slug); return result.dispensaryId; } /** * Resolve a dispensary slug with full details (HTTP status, source, error). * Use this when you need to know WHY resolution failed. */ async function resolveDispensaryIdWithDetails(slug) { console.log(`[GraphQL Client] Resolving dispensary ID for slug: ${slug}`); const session = await createSession(slug); try { // Check HTTP status first - if 403/404, the store is not crawlable if (session.httpStatus && (session.httpStatus === 403 || session.httpStatus === 404)) { console.log(`[GraphQL Client] Page returned HTTP ${session.httpStatus} for ${slug} - not crawlable`); return { dispensaryId: null, httpStatus: session.httpStatus, error: `HTTP ${session.httpStatus}: Store removed or not accessible`, source: 'reactEnv', }; } // PREFERRED: Use dispensaryId from window.reactEnv (extracted during createSession) if (session.dispensaryId) { console.log(`[GraphQL Client] Resolved ${slug} -> ${session.dispensaryId} (from reactEnv)`); return { dispensaryId: session.dispensaryId, httpStatus: session.httpStatus, source: 'reactEnv', }; } // FALLBACK: Try GraphQL query console.log(`[GraphQL Client] reactEnv.dispensaryId not found for ${slug}, trying GraphQL...`); const variables = { dispensaryFilter: { cNameOrID: slug, }, }; const result = await executeGraphQL(session, 'GetAddressBasedDispensaryData', variables, dutchie_1.GRAPHQL_HASHES.GetAddressBasedDispensaryData, slug); const dispensaryId = result?.data?.dispensaryBySlug?.id || result?.data?.dispensary?.id || result?.data?.getAddressBasedDispensaryData?.dispensary?.id; if (dispensaryId) { console.log(`[GraphQL Client] Resolved ${slug} -> ${dispensaryId} (from GraphQL)`); return { dispensaryId, httpStatus: session.httpStatus, source: 'graphql', }; } console.log(`[GraphQL Client] Could not resolve ${slug}, GraphQL response:`, JSON.stringify(result).slice(0, 300)); return { dispensaryId: null, httpStatus: session.httpStatus, error: 'Could not extract dispensaryId from reactEnv or GraphQL', }; } finally { await closeSession(session); } } /** * Discover Arizona dispensaries via geo-based query */ async function discoverArizonaDispensaries() { console.log('[GraphQL Client] Discovering Arizona dispensaries...'); // Use Phoenix as the default center const session = await createSession('AZ-Deeply-Rooted'); const allDispensaries = []; const seenIds = new Set(); try { for (const centerpoint of dutchie_1.ARIZONA_CENTERPOINTS) { console.log(`[GraphQL Client] Scanning ${centerpoint.name}...`); const variables = { dispensariesFilter: { latitude: centerpoint.lat, longitude: centerpoint.lng, distance: 100, state: 'AZ', }, }; try { const result = await executeGraphQL(session, 'ConsumerDispensaries', variables, dutchie_1.GRAPHQL_HASHES.ConsumerDispensaries, 'AZ-Deeply-Rooted'); const dispensaries = result?.data?.consumerDispensaries || []; for (const d of dispensaries) { const id = d.id || d.dispensaryId; if (id && !seenIds.has(id)) { seenIds.add(id); allDispensaries.push(d); } } console.log(`[GraphQL Client] Found ${dispensaries.length} in ${centerpoint.name} (${allDispensaries.length} total unique)`); } catch (error) { console.warn(`[GraphQL Client] Error scanning ${centerpoint.name}: ${error.message}`); } // Delay between requests await new Promise((r) => setTimeout(r, 1000)); } } finally { await closeSession(session); } console.log(`[GraphQL Client] Discovery complete: ${allDispensaries.length} dispensaries`); return allDispensaries; } // ============================================================ // PRODUCT FILTERING VARIABLES // ============================================================ /** * Build filter variables for FilteredProducts query * * CRITICAL: Uses dispensaryId directly (the MongoDB ObjectId, e.g. "6405ef617056e8014d79101b") * NOT dispensaryFilter.cNameOrID! * * The actual browser request structure is: * { * "productsFilter": { * "dispensaryId": "6405ef617056e8014d79101b", * "pricingType": "rec", * "Status": "Active", // Mode A only * "strainTypes": [], * "subcategories": [], * "types": [], * "useCache": true, * ... * }, * "page": 0, * "perPage": 100 * } * * Mode A = UI parity (Status: "Active") * Mode B = MAX COVERAGE (no Status filter) */ function buildFilterVariables(platformDispensaryId, pricingType, crawlMode, page, perPage) { const isModeA = crawlMode === 'mode_a'; // Per CLAUDE.md Rule #11: Use simple productsFilter with dispensaryId directly // Do NOT use dispensaryFilter.cNameOrID - that's outdated const productsFilter = { dispensaryId: platformDispensaryId, pricingType: pricingType, }; // Mode A: Only active products (UI parity) - Status: "Active" // Mode B: MAX COVERAGE (OOS/inactive) - omit Status or set to null if (isModeA) { productsFilter.Status = 'Active'; } // Mode B: No Status filter = returns all products including OOS/inactive return { productsFilter, page, perPage, }; } // ============================================================ // PRODUCT FETCHING WITH PAGINATION // ============================================================ /** * Fetch products for a single mode with pagination */ async function fetchProductsForMode(session, platformDispensaryId, cName, pricingType, crawlMode) { const perPage = dutchie_1.dutchieConfig.perPage; const maxPages = dutchie_1.dutchieConfig.maxPages; const maxRetries = dutchie_1.dutchieConfig.maxRetries; const pageDelayMs = dutchie_1.dutchieConfig.pageDelayMs; const allProducts = []; let pageNum = 0; let totalCount = 0; let consecutiveEmptyPages = 0; console.log(`[GraphQL Client] Fetching products for ${cName} (platformId: ${platformDispensaryId}, ${pricingType}, ${crawlMode})...`); while (pageNum < maxPages) { const variables = buildFilterVariables(platformDispensaryId, pricingType, crawlMode, pageNum, perPage); let result = null; let lastError = null; // Retry logic for (let attempt = 0; attempt <= maxRetries; attempt++) { try { result = await executeGraphQL(session, 'FilteredProducts', variables, dutchie_1.GRAPHQL_HASHES.FilteredProducts, cName); lastError = null; break; } catch (error) { lastError = error; console.warn(`[GraphQL Client] Page ${pageNum} attempt ${attempt + 1} failed: ${error.message}`); if (attempt < maxRetries) { await new Promise((r) => setTimeout(r, 1000 * (attempt + 1))); } } } if (lastError) { console.error(`[GraphQL Client] Page ${pageNum} failed after ${maxRetries + 1} attempts`); break; } if (result?.errors) { console.error('[GraphQL Client] GraphQL errors:', JSON.stringify(result.errors)); break; } // Log response shape on first page if (pageNum === 0) { console.log(`[GraphQL Client] Response keys: ${Object.keys(result || {}).join(', ')}`); if (result?.data) { console.log(`[GraphQL Client] data keys: ${Object.keys(result.data || {}).join(', ')}`); } if (!result?.data?.filteredProducts) { console.log(`[GraphQL Client] WARNING: No filteredProducts in response!`); console.log(`[GraphQL Client] Full response: ${JSON.stringify(result).slice(0, 1000)}`); } } const products = result?.data?.filteredProducts?.products || []; const queryInfo = result?.data?.filteredProducts?.queryInfo; if (queryInfo?.totalCount) { totalCount = queryInfo.totalCount; } console.log(`[GraphQL Client] Page ${pageNum}: ${products.length} products (total so far: ${allProducts.length + products.length}/${totalCount})`); if (products.length === 0) { consecutiveEmptyPages++; if (consecutiveEmptyPages >= 2) { console.log('[GraphQL Client] Multiple empty pages, stopping pagination'); break; } } else { consecutiveEmptyPages = 0; allProducts.push(...products); } // Stop if incomplete page (last page) if (products.length < perPage) { console.log(`[GraphQL Client] Incomplete page (${products.length} < ${perPage}), stopping`); break; } pageNum++; await new Promise((r) => setTimeout(r, pageDelayMs)); } console.log(`[GraphQL Client] Fetched ${allProducts.length} total products (${crawlMode})`); return { products: allProducts, totalCount: totalCount || allProducts.length, crawlMode }; } // ============================================================ // LEGACY SINGLE-MODE INTERFACE // ============================================================ /** * Fetch all products for a dispensary (single mode) */ async function fetchAllProducts(platformDispensaryId, pricingType = 'rec', options = {}) { const { crawlMode = 'mode_a' } = options; // cName is now REQUIRED - no default fallback to avoid using wrong store's session const cName = options.cName; if (!cName) { throw new Error('[GraphQL Client] cName is required for fetchAllProducts - cannot use another store\'s session'); } const session = await createSession(cName); try { return await fetchProductsForMode(session, platformDispensaryId, cName, pricingType, crawlMode); } finally { await closeSession(session); } } // ============================================================ // MODE A+B MERGING // ============================================================ /** * Merge POSMetaData.children arrays from Mode A and Mode B products */ function mergeProductOptions(modeAProduct, modeBProduct) { const modeAChildren = modeAProduct.POSMetaData?.children || []; const modeBChildren = modeBProduct.POSMetaData?.children || []; const getOptionKey = (child) => { return child.canonicalID || child.canonicalSKU || child.canonicalPackageId || child.option || ''; }; const mergedMap = new Map(); for (const child of modeAChildren) { const key = getOptionKey(child); if (key) mergedMap.set(key, child); } for (const child of modeBChildren) { const key = getOptionKey(child); if (key && !mergedMap.has(key)) { mergedMap.set(key, child); } } return Array.from(mergedMap.values()); } /** * Merge a Mode A product with a Mode B product */ function mergeProducts(modeAProduct, modeBProduct) { if (!modeBProduct) { return modeAProduct; } const mergedChildren = mergeProductOptions(modeAProduct, modeBProduct); return { ...modeAProduct, POSMetaData: { ...modeAProduct.POSMetaData, children: mergedChildren, }, }; } // ============================================================ // MAIN EXPORT: TWO-MODE CRAWL // ============================================================ /** * Fetch products using BOTH crawl modes with SINGLE session * Runs Mode A then Mode B, merges results */ async function fetchAllProductsBothModes(platformDispensaryId, pricingType = 'rec', options = {}) { // cName is now REQUIRED - no default fallback to avoid using wrong store's session const cName = options.cName; if (!cName) { throw new Error('[GraphQL Client] cName is required for fetchAllProductsBothModes - cannot use another store\'s session'); } console.log(`[GraphQL Client] Running two-mode crawl for ${cName} (${pricingType})...`); console.log(`[GraphQL Client] Platform ID: ${platformDispensaryId}, cName: ${cName}`); const session = await createSession(cName); try { // Mode A (UI parity) const modeAResult = await fetchProductsForMode(session, platformDispensaryId, cName, pricingType, 'mode_a'); // Delay between modes await new Promise((r) => setTimeout(r, dutchie_1.dutchieConfig.modeDelayMs)); // Mode B (MAX COVERAGE) const modeBResult = await fetchProductsForMode(session, platformDispensaryId, cName, pricingType, 'mode_b'); // Merge results const modeBMap = new Map(); for (const product of modeBResult.products) { modeBMap.set(product._id, product); } const productMap = new Map(); // Add Mode A products, merging with Mode B if exists for (const product of modeAResult.products) { const modeBProduct = modeBMap.get(product._id); const mergedProduct = mergeProducts(product, modeBProduct); productMap.set(product._id, mergedProduct); } // Add Mode B products not in Mode A for (const product of modeBResult.products) { if (!productMap.has(product._id)) { productMap.set(product._id, product); } } const mergedProducts = Array.from(productMap.values()); console.log(`[GraphQL Client] Merged: ${mergedProducts.length} unique products`); console.log(`[GraphQL Client] Mode A: ${modeAResult.products.length}, Mode B: ${modeBResult.products.length}`); return { modeA: { products: modeAResult.products, totalCount: modeAResult.totalCount }, modeB: { products: modeBResult.products, totalCount: modeBResult.totalCount }, merged: { products: mergedProducts, totalCount: mergedProducts.length }, }; } finally { await closeSession(session); } }