From 3ee09fbe84c950b9db353aae8c1f2219975c126f Mon Sep 17 00:00:00 2001 From: Kelly Date: Tue, 16 Dec 2025 19:22:04 -0700 Subject: [PATCH] feat: Treez SSR support, task improvements, worker geo display MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add SSR config extraction for Treez sites (BEST Dispensary) - Increase MAX_RETRIES from 3 to 5 for task failures - Update task list ordering: active > pending > failed > completed - Show detected proxy location in worker dashboard (from fingerprint) - Hardcode 'dutchie' menu_type in promotion.ts (remove deriveMenuType) - Update provider display to show actual provider names 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- backend/src/discovery/promotion.ts | 14 +--- backend/src/platforms/treez/client.ts | 106 ++++++++++++++++++++++++- backend/src/tasks/task-service.ts | 14 +++- backend/src/utils/provider-display.ts | 43 +++++----- cannaiq/src/lib/provider-display.ts | 46 ++++++----- cannaiq/src/pages/WorkersDashboard.tsx | 12 +-- 6 files changed, 173 insertions(+), 62 deletions(-) diff --git a/backend/src/discovery/promotion.ts b/backend/src/discovery/promotion.ts index 5bfedbe7..50c4d16e 100644 --- a/backend/src/discovery/promotion.ts +++ b/backend/src/discovery/promotion.ts @@ -151,18 +151,6 @@ function generateSlug(name: string, city: string, state: string): string { return base; } -/** - * Derive menu_type from platform_menu_url pattern - */ -function deriveMenuType(url: string | null): string { - if (!url) return 'unknown'; - if (url.includes('/dispensary/')) return 'standalone'; - if (url.includes('/embedded-menu/')) return 'embedded'; - if (url.includes('/stores/')) return 'standalone'; - // Custom domain = embedded widget on store's site - if (!url.includes('dutchie.com')) return 'embedded'; - return 'unknown'; -} /** * Log a promotion action to dutchie_promotion_log @@ -415,7 +403,7 @@ async function promoteLocation( loc.timezone, // $15 timezone loc.platform_location_id, // $16 platform_dispensary_id loc.platform_menu_url, // $17 menu_url - deriveMenuType(loc.platform_menu_url), // $18 menu_type + 'dutchie', // $18 menu_type loc.description, // $19 description loc.logo_image, // $20 logo_image loc.banner_image, // $21 banner_image diff --git a/backend/src/platforms/treez/client.ts b/backend/src/platforms/treez/client.ts index e85fec89..c6ea3e1d 100644 --- a/backend/src/platforms/treez/client.ts +++ b/backend/src/platforms/treez/client.ts @@ -289,6 +289,102 @@ export function getStoreConfig(): TreezStoreConfig | null { return currentStoreConfig; } +/** + * Extract store config from page HTML for SSR sites. + * + * SSR sites (like BEST Dispensary) pre-render data and don't make client-side + * API requests. The config is embedded in __NEXT_DATA__ or window variables. + * + * Looks for: + * - __NEXT_DATA__.props.pageProps.msoStoreConfig.orgId / entityId + * - window.__SETTINGS__.msoOrgId / msoStoreEntityId + * - treezStores config in page data + */ +async function extractConfigFromPage(page: Page): Promise { + console.log('[Treez Client] Attempting to extract config from page HTML (SSR fallback)...'); + + const config = await page.evaluate(() => { + // Try __NEXT_DATA__ first (Next.js SSR) + const nextDataEl = document.getElementById('__NEXT_DATA__'); + if (nextDataEl) { + try { + const nextData = JSON.parse(nextDataEl.textContent || '{}'); + const pageProps = nextData?.props?.pageProps; + + // Look for MSO config in various locations + const msoConfig = pageProps?.msoStoreConfig || pageProps?.storeConfig || {}; + const settings = pageProps?.settings || {}; + + // Extract org-id and entity-id + let orgId = msoConfig.orgId || msoConfig.msoOrgId || settings.msoOrgId; + let entityId = msoConfig.entityId || msoConfig.msoStoreEntityId || settings.msoStoreEntityId; + + // Also check treezStores array + if (!orgId || !entityId) { + const treezStores = pageProps?.treezStores || nextData?.props?.treezStores; + if (treezStores && Array.isArray(treezStores) && treezStores.length > 0) { + const store = treezStores[0]; + orgId = orgId || store.orgId || store.organization_id; + entityId = entityId || store.entityId || store.entity_id || store.storeId; + } + } + + // Check for API settings + const apiSettings = pageProps?.apiSettings || settings.api || {}; + + if (orgId && entityId) { + return { + orgId, + entityId, + esUrl: apiSettings.esUrl || null, + apiKey: apiSettings.apiKey || null, + }; + } + } catch (e) { + console.error('Error parsing __NEXT_DATA__:', e); + } + } + + // Try window variables + const win = window as any; + if (win.__SETTINGS__) { + const s = win.__SETTINGS__; + if (s.msoOrgId && s.msoStoreEntityId) { + return { + orgId: s.msoOrgId, + entityId: s.msoStoreEntityId, + esUrl: s.esUrl || null, + apiKey: s.apiKey || null, + }; + } + } + + return null; + }); + + if (!config || !config.orgId || !config.entityId) { + console.log('[Treez Client] Could not extract config from page'); + return null; + } + + // Build full config with defaults for missing values + const fullConfig: TreezStoreConfig = { + orgId: config.orgId, + entityId: config.entityId, + // Default ES URL pattern - gapcommerce is the common tenant + esUrl: config.esUrl || 'https://search-gapcommerce.gapcommerceapi.com/product/search', + // Use default API key from config + apiKey: config.apiKey || TREEZ_CONFIG.esApiKey, + }; + + console.log('[Treez Client] Extracted config from page (SSR):'); + console.log(` ES URL: ${fullConfig.esUrl}`); + console.log(` Org ID: ${fullConfig.orgId}`); + console.log(` Entity ID: ${fullConfig.entityId}`); + + return fullConfig; +} + // ============================================================ // PRODUCT FETCHING (Direct API Approach) // ============================================================ @@ -343,9 +439,15 @@ export async function fetchAllProducts( // Wait for initial page load to trigger first API request await sleep(3000); - // Check if we captured the store config + // Check if we captured the store config from network requests if (!currentStoreConfig) { - console.error('[Treez Client] Failed to capture store config from browser requests'); + console.log('[Treez Client] No API requests captured - trying SSR fallback...'); + // For SSR sites, extract config from page HTML + currentStoreConfig = await extractConfigFromPage(page); + } + + if (!currentStoreConfig) { + console.error('[Treez Client] Failed to capture store config from browser requests or page HTML'); throw new Error('Failed to capture Treez store config'); } diff --git a/backend/src/tasks/task-service.ts b/backend/src/tasks/task-service.ts index 7222a344..0076f9a4 100644 --- a/backend/src/tasks/task-service.ts +++ b/backend/src/tasks/task-service.ts @@ -267,7 +267,7 @@ class TaskService { async completeTask(taskId: number, result?: Record): Promise { await pool.query( `UPDATE worker_tasks - SET status = 'completed', completed_at = NOW(), result = $2 + SET status = 'completed', completed_at = NOW(), result = $2, error_message = NULL WHERE id = $1`, [taskId, result ? JSON.stringify(result) : null] ); @@ -351,7 +351,7 @@ class TaskService { * Hard failures: Auto-retry up to MAX_RETRIES with exponential backoff */ async failTask(taskId: number, errorMessage: string): Promise { - const MAX_RETRIES = 3; + const MAX_RETRIES = 5; const isSoft = this.isSoftFailure(errorMessage); // Get current retry count @@ -490,7 +490,15 @@ class TaskService { ${poolJoin} LEFT JOIN worker_registry w ON w.worker_id = t.worker_id ${whereClause} - ORDER BY t.created_at DESC + ORDER BY + CASE t.status + WHEN 'active' THEN 1 + WHEN 'pending' THEN 2 + WHEN 'failed' THEN 3 + WHEN 'completed' THEN 4 + ELSE 5 + END, + t.created_at DESC LIMIT ${limit} OFFSET ${offset}`, params ); diff --git a/backend/src/utils/provider-display.ts b/backend/src/utils/provider-display.ts index ba3f1da5..69090298 100644 --- a/backend/src/utils/provider-display.ts +++ b/backend/src/utils/provider-display.ts @@ -1,29 +1,36 @@ /** * Provider Display Names * - * Maps internal provider identifiers to safe display labels. - * Internal identifiers (menu_type, product_provider, crawler_type) remain unchanged. - * Only the display label shown to users is transformed. + * Maps internal menu_type values to display labels. + * - standalone/embedded → dutchie (both are Dutchie platform) + * - treez → treez + * - jane/iheartjane → jane */ export const ProviderDisplayNames: Record = { - // All menu providers map to anonymous "Menu Feed" label - dutchie: 'Menu Feed', - treez: 'Menu Feed', - jane: 'Menu Feed', - iheartjane: 'Menu Feed', - blaze: 'Menu Feed', - flowhub: 'Menu Feed', - weedmaps: 'Menu Feed', - leafly: 'Menu Feed', - leaflogix: 'Menu Feed', - tymber: 'Menu Feed', - dispense: 'Menu Feed', + // Dutchie (standalone and embedded are both Dutchie) + dutchie: 'dutchie', + standalone: 'dutchie', + embedded: 'dutchie', + + // Other platforms + treez: 'treez', + jane: 'jane', + iheartjane: 'jane', + + // Future platforms + blaze: 'blaze', + flowhub: 'flowhub', + weedmaps: 'weedmaps', + leafly: 'leafly', + leaflogix: 'leaflogix', + tymber: 'tymber', + dispense: 'dispense', // Catch-all - unknown: 'Menu Feed', - default: 'Menu Feed', - '': 'Menu Feed', + unknown: 'unknown', + default: 'unknown', + '': 'unknown', }; /** diff --git a/cannaiq/src/lib/provider-display.ts b/cannaiq/src/lib/provider-display.ts index 6c6e567d..a3f93288 100644 --- a/cannaiq/src/lib/provider-display.ts +++ b/cannaiq/src/lib/provider-display.ts @@ -1,32 +1,36 @@ /** * Provider Display Names * - * Maps internal provider identifiers to safe display labels. - * Internal identifiers (menu_type, product_provider, crawler_type) remain unchanged. - * Only the display label shown to users is transformed. - * - * IMPORTANT: Raw provider names (dutchie, treez, jane, etc.) must NEVER - * be displayed directly in the UI. Always use this utility. + * Maps internal menu_type values to display labels. + * - standalone/embedded → Dutchie (both are Dutchie platform) + * - treez → Treez + * - jane/iheartjane → Jane */ export const ProviderDisplayNames: Record = { - // All menu providers map to anonymous "Menu Feed" label - dutchie: 'Menu Feed', - treez: 'Menu Feed', - jane: 'Menu Feed', - iheartjane: 'Menu Feed', - blaze: 'Menu Feed', - flowhub: 'Menu Feed', - weedmaps: 'Menu Feed', - leafly: 'Menu Feed', - leaflogix: 'Menu Feed', - tymber: 'Menu Feed', - dispense: 'Menu Feed', + // Dutchie (standalone and embedded are both Dutchie) + dutchie: 'dutchie', + standalone: 'dutchie', + embedded: 'dutchie', + + // Other platforms + treez: 'treez', + jane: 'jane', + iheartjane: 'jane', + + // Future platforms + blaze: 'blaze', + flowhub: 'flowhub', + weedmaps: 'weedmaps', + leafly: 'leafly', + leaflogix: 'leaflogix', + tymber: 'tymber', + dispense: 'dispense', // Catch-all - unknown: 'Menu Feed', - default: 'Menu Feed', - '': 'Menu Feed', + unknown: 'unknown', + default: 'unknown', + '': 'unknown', }; /** diff --git a/cannaiq/src/pages/WorkersDashboard.tsx b/cannaiq/src/pages/WorkersDashboard.tsx index 0329585c..f899409a 100644 --- a/cannaiq/src/pages/WorkersDashboard.tsx +++ b/cannaiq/src/pages/WorkersDashboard.tsx @@ -383,9 +383,10 @@ function PreflightSummary({ worker, poolOpen = true }: { worker: Worker; poolOpe const fingerprint = worker.fingerprint_data; const httpError = worker.preflight_http_error; const httpMs = worker.preflight_http_ms; - // Geo from current_city/state columns, or fallback to fingerprint detected location - const geoState = worker.current_state || fingerprint?.detectedLocation?.region; - const geoCity = worker.current_city || fingerprint?.detectedLocation?.city; + // Show DETECTED proxy location (from fingerprint), not assigned state + // This lets us verify the proxy is geo-targeted correctly + const geoState = fingerprint?.detectedLocation?.region || worker.current_state; + const geoCity = fingerprint?.detectedLocation?.city || worker.current_city; // Worker is ONLY qualified if http preflight passed AND has geo assigned const hasGeo = Boolean(geoState); const isQualified = (worker.is_qualified || httpStatus === 'passed') && hasGeo; @@ -702,8 +703,9 @@ function WorkerSlot({ const httpIp = worker?.http_ip; const fingerprint = worker?.fingerprint_data; - const geoState = worker?.current_state || (fingerprint as any)?.detectedLocation?.region; - const geoCity = worker?.current_city || (fingerprint as any)?.detectedLocation?.city; + // Show DETECTED proxy location (from fingerprint), not assigned state + const geoState = (fingerprint as any)?.detectedLocation?.region || worker?.current_state; + const geoCity = (fingerprint as any)?.detectedLocation?.city || worker?.current_city; const isQualified = worker?.is_qualified; // Build fingerprint tooltip