"use strict"; /** * Menu Provider Detection Service * * Detects which menu platform a dispensary is using by analyzing: * - HTML content patterns (scripts, iframes, classes) * - URL patterns (embedded menu paths) * - API endpoint signatures * - Meta tags and headers */ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.detectMenuProvider = detectMenuProvider; exports.quickDutchieCheck = quickDutchieCheck; exports.detectProviderChange = detectProviderChange; const puppeteer_1 = __importDefault(require("puppeteer")); const logger_1 = require("./logger"); // Provider detection patterns const PROVIDER_PATTERNS = { dutchie: { scripts: [ /dutchie/i, /dutchie-plus/i, /dutchie\.com/i, /dutchie-embed/i, ], iframes: [ /dutchie\.com/i, /embed\.dutchie/i, /iframe\.dutchie/i, ], classes: [ /dutchie-/i, /DutchieEmbed/i, ], urls: [ /dutchie\.com/i, /\.dutchie\./i, ], meta: [ /dutchie/i, ], apiEndpoints: [ /graphql.*dutchie/i, /api\.dutchie/i, ], htmlPatterns: [ /data-dutchie/i, /__DUTCHIE__/i, /dutchie-plus-iframe/i, ], }, treez: { scripts: [ /treez/i, /treez\.io/i, /treezpay/i, ], iframes: [ /treez\.io/i, /menu\.treez/i, ], classes: [ /treez-/i, ], urls: [ /treez\.io/i, /\.treez\./i, ], meta: [ /treez/i, ], apiEndpoints: [ /api\.treez/i, ], htmlPatterns: [ /data-treez/i, /treez-embed/i, ], }, jane: { scripts: [ /jane\.co/i, /iheartjane/i, /jane-embed/i, /janetechnologies/i, ], iframes: [ /jane\.co/i, /iheartjane\.com/i, /menu\.jane/i, ], classes: [ /jane-/i, /iheartjane/i, ], urls: [ /jane\.co/i, /iheartjane\.com/i, ], meta: [ /jane/i, /iheartjane/i, ], apiEndpoints: [ /api\.iheartjane/i, /api\.jane\.co/i, ], htmlPatterns: [ /data-jane/i, /jane-root/i, /jane-embed/i, ], }, weedmaps: { scripts: [ /weedmaps/i, /wm\.com/i, ], iframes: [ /weedmaps\.com/i, /menu\.weedmaps/i, ], classes: [ /weedmaps-/i, /wm-/i, ], urls: [ /weedmaps\.com/i, ], meta: [ /weedmaps/i, ], apiEndpoints: [ /api.*weedmaps/i, ], htmlPatterns: [ /data-weedmaps/i, ], }, leafly: { scripts: [ /leafly/i, /leafly\.com/i, ], iframes: [ /leafly\.com/i, /menu\.leafly/i, ], classes: [ /leafly-/i, ], urls: [ /leafly\.com/i, ], meta: [ /leafly/i, ], apiEndpoints: [ /api\.leafly/i, ], htmlPatterns: [ /data-leafly/i, ], }, meadow: { scripts: [ /meadow/i, /getmeadow/i, ], iframes: [ /getmeadow\.com/i, ], classes: [ /meadow-/i, ], urls: [ /getmeadow\.com/i, ], meta: [], apiEndpoints: [ /api\.getmeadow/i, ], htmlPatterns: [], }, greenlight: { scripts: [ /greenlight/i, /greenlightmenu/i, ], iframes: [ /greenlight/i, ], classes: [ /greenlight-/i, ], urls: [ /greenlight/i, ], meta: [], apiEndpoints: [], htmlPatterns: [], }, blaze: { scripts: [ /blaze\.me/i, /blazepos/i, ], iframes: [ /blaze\.me/i, ], classes: [ /blaze-/i, ], urls: [ /blaze\.me/i, ], meta: [], apiEndpoints: [ /api\.blaze/i, ], htmlPatterns: [], }, flowhub: { scripts: [ /flowhub/i, ], iframes: [ /flowhub\.com/i, ], classes: [ /flowhub-/i, ], urls: [ /flowhub\.com/i, ], meta: [], apiEndpoints: [], htmlPatterns: [], }, dispense: { scripts: [ /dispenseapp/i, ], iframes: [ /dispenseapp\.com/i, ], classes: [ /dispense-/i, ], urls: [ /dispenseapp\.com/i, ], meta: [], apiEndpoints: [], htmlPatterns: [], }, cova: { scripts: [ /covasoftware/i, /cova\.software/i, ], iframes: [ /cova/i, ], classes: [ /cova-/i, ], urls: [ /cova/i, ], meta: [], apiEndpoints: [], htmlPatterns: [], }, }; // Common menu URL paths to check const MENU_PATHS = [ '/menu', '/shop', '/products', '/order', '/store', '/dispensary-menu', '/online-menu', '/shop-all', '/browse', '/catalog', ]; /** * Analyze a single page for provider signals */ async function analyzePageForProviders(page, url) { const signals = []; try { // Get page HTML const html = await page.content(); const lowerHtml = html.toLowerCase(); // Check each provider's patterns for (const [provider, patterns] of Object.entries(PROVIDER_PATTERNS)) { // Check script sources const scripts = await page.$$eval('script[src]', els => els.map(el => el.getAttribute('src') || '')); for (const script of scripts) { for (const pattern of patterns.scripts) { if (pattern.test(script)) { signals.push({ provider: provider, confidence: 90, source: 'script_src', details: script, }); } } } // Check inline scripts const inlineScripts = await page.$$eval('script:not([src])', els => els.map(el => el.textContent || '')); for (const scriptContent of inlineScripts) { for (const pattern of patterns.scripts) { if (pattern.test(scriptContent)) { signals.push({ provider: provider, confidence: 70, source: 'inline_script', details: `Pattern: ${pattern}`, }); } } } // Check iframes const iframes = await page.$$eval('iframe', els => els.map(el => el.getAttribute('src') || '')); for (const iframe of iframes) { for (const pattern of patterns.iframes) { if (pattern.test(iframe)) { signals.push({ provider: provider, confidence: 95, source: 'iframe_src', details: iframe, }); } } } // Check HTML patterns for (const pattern of patterns.htmlPatterns) { if (pattern.test(html)) { signals.push({ provider: provider, confidence: 85, source: 'html_pattern', details: `Pattern: ${pattern}`, }); } } // Check CSS classes for (const pattern of patterns.classes) { if (pattern.test(html)) { signals.push({ provider: provider, confidence: 60, source: 'css_class', details: `Pattern: ${pattern}`, }); } } // Check meta tags const metaTags = await page.$$eval('meta', els => els.map(el => `${el.getAttribute('name')} ${el.getAttribute('content')}`)); for (const meta of metaTags) { for (const pattern of patterns.meta) { if (pattern.test(meta)) { signals.push({ provider: provider, confidence: 80, source: 'meta_tag', details: meta, }); } } } } // Check for network requests (if we intercepted them) // This would be enhanced with request interception } catch (error) { logger_1.logger.error('provider-detection', `Error analyzing page ${url}: ${error}`); } return signals; } /** * Aggregate signals into a final detection result */ function aggregateSignals(signals) { if (signals.length === 0) { return { provider: 'unknown', confidence: 0 }; } // Group signals by provider const providerScores = {}; for (const signal of signals) { if (!providerScores[signal.provider]) { providerScores[signal.provider] = []; } providerScores[signal.provider].push(signal.confidence); } // Calculate weighted score for each provider const scores = []; for (const [provider, confidences] of Object.entries(providerScores)) { // Use max confidence + bonus for multiple signals const maxConf = Math.max(...confidences); const multiSignalBonus = Math.min(10, (confidences.length - 1) * 3); const score = Math.min(100, maxConf + multiSignalBonus); scores.push({ provider: provider, score }); } // Sort by score descending scores.sort((a, b) => b.score - a.score); const best = scores[0]; // If there's a clear winner (20+ point lead), use it if (scores.length === 1 || best.score - scores[1].score >= 20) { return { provider: best.provider, confidence: best.score }; } // Multiple contenders - reduce confidence return { provider: best.provider, confidence: Math.max(50, best.score - 20) }; } /** * Detect the menu provider for a dispensary */ async function detectMenuProvider(websiteUrl, options = {}) { const { checkMenuPaths = true, timeout = 30000 } = options; const result = { provider: 'unknown', confidence: 0, signals: [], urlsTested: [], menuEntryPoints: [], rawSignals: {}, }; let browser = null; try { // Normalize URL let baseUrl = websiteUrl.trim(); if (!baseUrl.startsWith('http')) { baseUrl = `https://${baseUrl}`; } baseUrl = baseUrl.replace(/\/$/, ''); // Remove trailing slash // Launch browser browser = await puppeteer_1.default.launch({ headless: true, args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu', ], }); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'); // Track network requests for API detection const apiRequests = []; await page.setRequestInterception(true); page.on('request', (request) => { const url = request.url(); if (url.includes('api') || url.includes('graphql')) { apiRequests.push(url); } request.continue(); }); // URLs to check const urlsToCheck = [baseUrl]; if (checkMenuPaths) { for (const path of MENU_PATHS) { urlsToCheck.push(`${baseUrl}${path}`); } } // Check each URL for (const url of urlsToCheck) { try { result.urlsTested.push(url); await page.goto(url, { waitUntil: 'networkidle2', timeout, }); // Wait a bit for dynamic content await new Promise(r => setTimeout(r, 2000)); // Analyze page const pageSignals = await analyzePageForProviders(page, url); result.signals.push(...pageSignals); // Track if this URL has menu content const hasMenuContent = await page.evaluate(() => { const text = document.body.innerText.toLowerCase(); return (text.includes('add to cart') || text.includes('add to bag') || text.includes('product') || text.includes('indica') || text.includes('sativa') || text.includes('hybrid') || text.includes('thc') || text.includes('cbd')); }); if (hasMenuContent && url !== baseUrl) { result.menuEntryPoints.push(url); } } catch (pageError) { // 404s are fine, just skip if (!pageError.message?.includes('404')) { logger_1.logger.warn('provider-detection', `Could not load ${url}: ${pageError.message}`); } } } // Check API requests for provider hints for (const apiUrl of apiRequests) { for (const [provider, patterns] of Object.entries(PROVIDER_PATTERNS)) { for (const pattern of patterns.apiEndpoints) { if (pattern.test(apiUrl)) { result.signals.push({ provider: provider, confidence: 95, source: 'api_request', details: apiUrl, }); } } } } // Record raw signals result.rawSignals = { apiRequestsFound: apiRequests.length, menuEntryPointsFound: result.menuEntryPoints.length, totalSignals: result.signals.length, uniqueProviders: [...new Set(result.signals.map(s => s.provider))].length, }; // Aggregate signals into final result const aggregated = aggregateSignals(result.signals); result.provider = aggregated.provider; result.confidence = aggregated.confidence; } catch (error) { result.error = error.message; logger_1.logger.error('provider-detection', `Detection failed for ${websiteUrl}: ${error.message}`); } finally { if (browser) { await browser.close(); } } return result; } /** * Quick check if a site has Dutchie - used during production crawls */ async function quickDutchieCheck(page) { try { const html = await page.content(); // Check for Dutchie-specific patterns const dutchiePatterns = [ /dutchie/i, /dutchie-plus/i, /__DUTCHIE__/i, /data-dutchie/i, /embed\.dutchie/i, ]; for (const pattern of dutchiePatterns) { if (pattern.test(html)) { return true; } } // Check iframes const iframes = await page.$$eval('iframe', els => els.map(el => el.getAttribute('src') || '')); for (const iframe of iframes) { if (/dutchie/i.test(iframe)) { return true; } } return false; } catch { return false; } } /** * Check if provider has changed from expected */ async function detectProviderChange(page, expectedProvider) { try { const signals = await analyzePageForProviders(page, page.url()); const aggregated = aggregateSignals(signals); // If we expected Dutchie but found something else with high confidence if (expectedProvider === 'dutchie' && aggregated.provider !== 'dutchie' && aggregated.confidence >= 70) { return { changed: true, newProvider: aggregated.provider, confidence: aggregated.confidence, }; } // If we expected Dutchie and found nothing/low confidence, might have switched if (expectedProvider === 'dutchie' && aggregated.confidence < 30) { // Check if Dutchie is definitely NOT present const hasDutchie = await quickDutchieCheck(page); if (!hasDutchie) { return { changed: true, newProvider: aggregated.provider !== 'unknown' ? aggregated.provider : 'other', confidence: Math.max(30, aggregated.confidence), }; } } return { changed: false }; } catch { return { changed: false }; } }