From 3496be3064dd2c894bab1ae969758177afc26a79 Mon Sep 17 00:00:00 2001 From: Kelly Date: Sun, 14 Dec 2025 15:56:06 -0700 Subject: [PATCH] feat(treez): Fetch all products with match_all query (+19% more) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Update buildProductQuery() to use match_all by default - Captures hidden, below-threshold, and out-of-stock products - Add extractPrimaryImage() and extractImages() to normalizer - Add product_refresh_treez handler for platform-specific refresh - Add product_refresh_treez to TaskRole type Best Dispensary: 228 → 271 products (+43) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- backend/src/hydration/normalizers/treez.ts | 95 ++++-- backend/src/platforms/treez/client.ts | 308 +++++++++++------- .../tasks/handlers/product-refresh-treez.ts | 254 +++++++++++++++ backend/src/tasks/task-worker.ts | 12 +- 4 files changed, 526 insertions(+), 143 deletions(-) create mode 100644 backend/src/tasks/handlers/product-refresh-treez.ts diff --git a/backend/src/hydration/normalizers/treez.ts b/backend/src/hydration/normalizers/treez.ts index a92de299..c20382e1 100644 --- a/backend/src/hydration/normalizers/treez.ts +++ b/backend/src/hydration/normalizers/treez.ts @@ -31,7 +31,12 @@ export class TreezNormalizer extends BaseNormalizer { // ============================================================ extractProducts(rawJson: any): any[] { - // Treez payload format: { products: [...] } + // Prefer normalized array (already processed by client.ts normalizeProduct()) + if (rawJson?.normalized && Array.isArray(rawJson.normalized)) { + return rawJson.normalized; + } + + // Treez payload format: { products: [...] } - raw ES products if (rawJson?.products && Array.isArray(rawJson.products)) { return rawJson.products; } @@ -41,7 +46,7 @@ export class TreezNormalizer extends BaseNormalizer { return rawJson; } - // Hits array (normalized format) + // Hits array (ES response format) if (rawJson?.hits && Array.isArray(rawJson.hits)) { return rawJson.hits; } @@ -71,7 +76,8 @@ export class TreezNormalizer extends BaseNormalizer { // ============================================================ protected normalizeProduct(rawProduct: any, dispensaryId: number): NormalizedProduct | null { - const externalId = rawProduct.productId; + // Treez products use 'id' field from Elasticsearch API + const externalId = rawProduct.id; if (!externalId) { console.warn('[TreezNormalizer] Product missing ID, skipping'); return null; @@ -94,15 +100,15 @@ export class TreezNormalizer extends BaseNormalizer { brandName: rawProduct.brand || null, brandId: null, // Treez doesn't expose brand IDs category: this.normalizeCategory(rawProduct.category) || null, - subcategory: rawProduct.subcategory || null, + subcategory: rawProduct.subtype || null, type: rawProduct.category || null, - strainType: rawProduct.subcategory || null, // indica, sativa, hybrid + strainType: rawProduct.strainType || null, // Indica, Sativa, Hybrid - // Potency - thcPercent: rawProduct.thcPercent ?? null, - cbdPercent: rawProduct.cbdPercent ?? null, - thcContent: rawProduct.thcPercent ?? null, - cbdContent: rawProduct.cbdPercent ?? null, + // Potency - Treez ES API uses customTHCPercentageMinValue/customCBDPercentageMinValue + thcPercent: rawProduct.customTHCPercentageMinValue ?? rawProduct.thcPercent ?? null, + cbdPercent: rawProduct.customCBDPercentageMinValue ?? rawProduct.cbdPercent ?? null, + thcContent: rawProduct.customTHCPercentageMinValue ?? rawProduct.thcPercent ?? null, + cbdContent: rawProduct.customCBDPercentageMinValue ?? rawProduct.cbdPercent ?? null, // Status - scraped products are active status: 'Active', @@ -110,11 +116,9 @@ export class TreezNormalizer extends BaseNormalizer { medicalOnly: false, recOnly: false, - // Images - primaryImageUrl: rawProduct.imageUrl || null, - images: rawProduct.imageUrl - ? [{ url: rawProduct.imageUrl, position: 0 }] - : [], + // Images - Treez ES API has images in productData.images array + primaryImageUrl: this.extractPrimaryImage(rawProduct), + images: this.extractImages(rawProduct), // Raw reference rawProduct, @@ -122,21 +126,23 @@ export class TreezNormalizer extends BaseNormalizer { } protected normalizePricing(rawProduct: any): NormalizedPricing | null { - const externalId = rawProduct.productId; + const externalId = rawProduct.id; if (!externalId) return null; - const price = rawProduct.price; + // Treez ES API uses customMinPrice/customMaxPrice + const priceMin = rawProduct.customMinPrice ?? rawProduct.price; + const priceMax = rawProduct.customMaxPrice ?? priceMin; return { externalProductId: String(externalId), - // Treez typically shows a single price - priceRec: this.toCents(price), - priceRecMin: this.toCents(price), - priceRecMax: this.toCents(price), + // Treez pricing from ES API + priceRec: this.toCents(priceMin), + priceRecMin: this.toCents(priceMin), + priceRecMax: this.toCents(priceMax), priceRecSpecial: null, - // Treez doesn't distinguish med pricing in DOM + // Treez doesn't distinguish med pricing priceMed: null, priceMedMin: null, priceMedMax: null, @@ -149,7 +155,7 @@ export class TreezNormalizer extends BaseNormalizer { } protected normalizeAvailability(rawProduct: any): NormalizedAvailability | null { - const externalId = rawProduct.productId; + const externalId = rawProduct.id; if (!externalId) return null; const inStock = rawProduct.inStock !== false; @@ -224,4 +230,47 @@ export class TreezNormalizer extends BaseNormalizer { return categoryMap[categoryLower] || category; } + + /** + * Extract primary image URL from Treez product + * Images are in productData.images array + */ + private extractPrimaryImage(rawProduct: any): string | null { + const productData = rawProduct.productData || {}; + const images = productData.images || []; + + if (images.length > 0) { + // First image is primary + const firstImage = images[0]; + return firstImage?.url || firstImage?.imageUrl || null; + } + + // Fallback to other image fields + return rawProduct.imageUrl || rawProduct.image_url || null; + } + + /** + * Extract all image URLs from Treez product + */ + private extractImages(rawProduct: any): string[] { + const productData = rawProduct.productData || {}; + const images = productData.images || []; + + const urls: string[] = []; + + for (const img of images) { + const url = img?.url || img?.imageUrl; + if (url) urls.push(url); + } + + // Include any direct image field + if (rawProduct.imageUrl && !urls.includes(rawProduct.imageUrl)) { + urls.push(rawProduct.imageUrl); + } + if (rawProduct.image_url && !urls.includes(rawProduct.image_url)) { + urls.push(rawProduct.image_url); + } + + return urls; + } } diff --git a/backend/src/platforms/treez/client.ts b/backend/src/platforms/treez/client.ts index cfa51ed3..e85fec89 100644 --- a/backend/src/platforms/treez/client.ts +++ b/backend/src/platforms/treez/client.ts @@ -3,26 +3,30 @@ * TREEZ PLATFORM CLIENT * ============================================================ * - * Treez uses Cloudflare protection + headless detection on their - * Elasticsearch API. This client uses: + * Treez uses Cloudflare protection on their Elasticsearch API. + * This client uses: * - * 1. Puppeteer with Stealth plugin to bypass detection - * 2. CDP (Chrome DevTools Protocol) to intercept API responses - * 3. Scrolling/pagination to trigger all product loads + * 1. Puppeteer with Stealth plugin to bypass Cloudflare + * 2. Direct API calls from browser context after session established + * 3. Captures store-specific headers (org-id, entity-id) from first request * - * API Endpoints (intercepted, not called directly): + * DISCOVERY SCHEME: + * 1. Navigate to store menu page with Puppeteer (establishes CF session) + * 2. Capture first ES API request headers: + * - x-api-key: Store's API key + * - org-id: Organization ID (store-specific) + * - entity-id: Entity ID (store-specific) + * - ES URL with tenant ID: https://search-{tenant}.gapcommerceapi.com/product/search + * 3. Make direct API call from browser context with captured headers + * 4. Fetch ALL products in one request (size: 10000) + * + * API Endpoints: * - Products: POST https://search-{tenant}.gapcommerceapi.com/product/search * - Discounts: GET https://headless.treez.io/v2.0/dispensary/{storeId}/ecommerce/discounts * * Store ID Format: String slug (e.g., "best") * Menu URL: https://{storeId}.treez.io/onlinemenu/ or custom domain * - * Data captured includes: - * - Full product details (name, brand, category, subtype) - * - Inventory levels (availableUnits) - * - Pricing with discounts - * - Lab results (THC/CBD when available) - * * ============================================================ */ @@ -39,6 +43,25 @@ import type { TreezESResponse, } from './types'; +// ============================================================ +// STORE CONFIG (captured from first API request) +// ============================================================ + +/** + * Store-specific API configuration captured from browser requests. + * These are required for direct API calls. + */ +export interface TreezStoreConfig { + /** Full ES URL with tenant ID: https://search-{tenant}.gapcommerceapi.com/product/search */ + esUrl: string; + /** API key from x-api-key header */ + apiKey: string; + /** Organization ID from org-id header (store-specific) */ + orgId: string; + /** Entity ID from entity-id header (store-specific) */ + entityId: string; +} + // Register stealth plugin - REQUIRED for Treez puppeteer.use(StealthPlugin()); @@ -67,6 +90,7 @@ export const TREEZ_CONFIG: TreezConfig = { // ============================================================ let currentSession: TreezSession | null = null; +let currentStoreConfig: TreezStoreConfig | null = null; let crawlRotator: CrawlRotator | null = null; /** @@ -220,6 +244,7 @@ export async function endSession(): Promise { } currentSession = null; + currentStoreConfig = null; } } @@ -231,66 +256,62 @@ export function getCurrentSession(): TreezSession | null { } // ============================================================ -// CDP RESPONSE INTERCEPTION +// STORE CONFIG CAPTURE // ============================================================ /** - * Setup CDP listener to capture Elasticsearch product responses + * Setup request listener to capture store-specific API config from first ES request. + * This captures org-id, entity-id, api-key, and ES URL. */ -function setupProductCapture(session: TreezSession): void { - const { cdpClient } = session; - - cdpClient.on('Network.responseReceived', async (event: any) => { - const url = event.response.url; - - // Check if this is an ES product search response - if (url.includes('gapcommerceapi.com/product/search') && event.response.status === 200) { - try { - const response = await cdpClient.send('Network.getResponseBody', { - requestId: event.requestId, - }); - - const body = response.base64Encoded - ? Buffer.from(response.body, 'base64').toString('utf8') - : response.body; - - const json: TreezESResponse = JSON.parse(body); - const products = json.hits?.hits?.map((h) => h._source) || []; - - if (products.length > 0) { - session.capturedProducts.push(...products); - console.log( - `[Treez Client] Captured ${products.length} products (total: ${session.capturedProducts.length})` - ); - } - } catch { - // Response body may not be available, skip silently - } +function setupStoreConfigCapture(page: Page): void { + page.on('request', (req) => { + const url = req.url(); + if (url.includes('gapcommerceapi.com/product/search') && req.method() === 'POST' && !currentStoreConfig) { + const headers = req.headers(); + currentStoreConfig = { + esUrl: url, + apiKey: headers['x-api-key'] || '', + orgId: headers['org-id'] || '', + entityId: headers['entity-id'] || '', + }; + console.log('[Treez Client] Captured store config:'); + console.log(` ES URL: ${currentStoreConfig.esUrl}`); + console.log(` Org ID: ${currentStoreConfig.orgId}`); + console.log(` Entity ID: ${currentStoreConfig.entityId}`); } }); } +/** + * Get the current store config (captured from browser requests) + */ +export function getStoreConfig(): TreezStoreConfig | null { + return currentStoreConfig; +} + // ============================================================ -// PRODUCT FETCHING +// PRODUCT FETCHING (Direct API Approach) // ============================================================ /** - * Navigate to store menu and capture all products via CDP interception - * This is the main method for fetching products + * Navigate to store menu, capture API config, and fetch ALL products via direct API call. + * + * APPROACH: + * 1. Navigate to menu page (establishes Cloudflare session) + * 2. Capture store-specific headers from first ES request (org-id, entity-id) + * 3. Make direct API call from browser context with captured headers + * 4. Fetch ALL products in one request (size: 10000) + * + * This is MUCH faster and more reliable than CDP scroll interception. */ export async function fetchAllProducts( menuUrl: string, options: { - maxScrolls?: number; - scrollDelay?: number; bypassAgeGate?: boolean; + customerType?: 'ADULT' | 'MEDICAL' | 'BOTH'; } = {} ): Promise { - const { - maxScrolls = TREEZ_CONFIG.maxScrollAttempts, - scrollDelay = TREEZ_CONFIG.scrollDelay, - bypassAgeGate = true, - } = options; + const { bypassAgeGate = true, customerType = 'ADULT' } = options; if (!currentSession) { throw new Error('[Treez Client] No active session - call startSession() first'); @@ -298,11 +319,11 @@ export async function fetchAllProducts( const { page } = currentSession; - // Reset captured products - currentSession.capturedProducts = []; + // Reset store config for new fetch + currentStoreConfig = null; - // Setup CDP listener for product responses - setupProductCapture(currentSession); + // Setup listener to capture store config from first ES request + setupStoreConfigCapture(page); console.log(`[Treez Client] Navigating to ${menuUrl}`); @@ -319,97 +340,146 @@ export async function fetchAllProducts( await tryBypassAgeGate(page); } - // Wait for initial products to load + // Wait for initial page load to trigger first API request await sleep(3000); - console.log(`[Treez Client] Initial capture: ${currentSession.capturedProducts.length} products`); - // Scroll and click "Load More" to get all products - console.log('[Treez Client] Scrolling to load all products...'); - - let previousCount = 0; - let noNewDataCount = 0; - - for (let i = 0; i < maxScrolls; i++) { - // Scroll to bottom - await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); - await sleep(scrollDelay); - - // Try clicking "Load More" button - try { - const loadMoreBtn = await page.$('button.collection__load-more'); - if (loadMoreBtn) { - const isVisible = await page.evaluate((btn: Element) => { - const rect = btn.getBoundingClientRect(); - return rect.width > 0 && rect.height > 0; - }, loadMoreBtn); - - if (isVisible) { - await loadMoreBtn.click(); - await sleep(scrollDelay); - } - } - } catch { - // No load more button or click failed - } - - const currentCount = currentSession.capturedProducts.length; - if (currentCount === previousCount) { - noNewDataCount++; - if (noNewDataCount >= 5) { - console.log(`[Treez Client] No new products for 5 scrolls, stopping`); - break; - } - } else { - noNewDataCount = 0; - if ((i + 1) % 5 === 0) { - console.log(`[Treez Client] Scroll ${i + 1}: ${currentCount} products`); - } - } - previousCount = currentCount; + // Check if we captured the store config + if (!currentStoreConfig) { + console.error('[Treez Client] Failed to capture store config from browser requests'); + throw new Error('Failed to capture Treez store config'); } + + console.log('[Treez Client] Making direct API call for ALL products...'); + + // Build query for ALL products + const query = buildProductQuery(customerType); + + // Make direct API call from browser context (has Cloudflare session) + const products = await page.evaluate( + async (config: TreezStoreConfig, queryBody: any) => { + try { + const response = await fetch(config.esUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-api-key': config.apiKey, + 'org-id': config.orgId, + 'entity-id': config.entityId, + }, + body: JSON.stringify(queryBody), + }); + + if (!response.ok) { + throw new Error(`API returned HTTP ${response.status}`); + } + + const data = await response.json(); + return { + success: true, + total: data.hits?.total?.value || 0, + products: (data.hits?.hits || []).map((h: any) => h._source), + }; + } catch (e: any) { + return { success: false, error: e.message, products: [] }; + } + }, + currentStoreConfig, + query + ); + + if (!products.success) { + throw new Error(`Direct API call failed: ${products.error}`); + } + + console.log(`[Treez Client] Fetched ${products.products.length} of ${products.total} products`); + + // Store in session for reference + currentSession.capturedProducts = products.products; + + // Record success with rotator + if (crawlRotator && products.products.length > 0) { + await crawlRotator.recordSuccess(); + } + + return products.products; } catch (error: any) { - console.error(`[Treez Client] Navigation error: ${error.message}`); + console.error(`[Treez Client] Error: ${error.message}`); throw error; } +} - // Deduplicate products by ID - const seen = new Set(); - const uniqueProducts = currentSession.capturedProducts.filter((p) => { - if (!p.id || seen.has(p.id)) return false; - seen.add(p.id); - return true; - }); - - console.log(`[Treez Client] Total unique products: ${uniqueProducts.length}`); - - // Record success with rotator - if (crawlRotator && uniqueProducts.length > 0) { - await crawlRotator.recordSuccess(); +/** + * Build Elasticsearch query for fetching all products + * + * Options: + * - includeHidden: Include hidden and below-threshold products (default: true for complete inventory) + * - customerType: Filter by customer type (ADULT, MEDICAL, BOTH) + * + * NOTE: Using match_all (includeHidden=true) returns ~20% more products than filtered query + */ +function buildProductQuery( + customerType: 'ADULT' | 'MEDICAL' | 'BOTH' = 'ADULT', + includeHidden: boolean = true +): any { + // Use match_all to get ALL products including hidden, below threshold, and out of stock + // This returns ~20% more products than the filtered query + if (includeHidden) { + return { + from: 0, + size: 10000, + query: { match_all: {} }, + }; } - return uniqueProducts; + // Filtered query (original behavior) - excludes hidden/below-threshold products + const customerTypeFilter = + customerType === 'BOTH' + ? [{ match: { customCustomerType: 'BOTH' } }] + : [ + { match: { customCustomerType: 'BOTH' } }, + { match: { customCustomerType: customerType } }, + ]; + + return { + from: 0, + size: 10000, + query: { + bool: { + must: [ + { bool: { filter: { range: { customMinPrice: { gte: 0.01, lte: 500000 } } } } }, + { bool: { should: [{ match: { isAboveThreshold: true } }] } }, + { bool: { should: [{ match: { isHideFromMenu: false } }] } }, + { bool: { should: customerTypeFilter } }, + ], + }, + }, + }; } /** * Fetch products from a specific brand page + * Note: With direct API, this fetches ALL products then filters locally would be needed. + * For now, it just fetches all products from the brand page URL. */ export async function fetchBrandProducts( storeUrl: string, brandSlug: string ): Promise { const brandUrl = `${storeUrl}/brand/${encodeURIComponent(brandSlug)}`; - return fetchAllProducts(brandUrl, { maxScrolls: 30 }); + return fetchAllProducts(brandUrl); } /** * Fetch products from a specific category page + * Note: With direct API, this fetches ALL products then filters locally would be needed. + * For now, it just fetches all products from the category page URL. */ export async function fetchCategoryProducts( storeUrl: string, categorySlug: string ): Promise { const categoryUrl = `${storeUrl}/collection/${encodeURIComponent(categorySlug)}`; - return fetchAllProducts(categoryUrl, { maxScrolls: 30 }); + return fetchAllProducts(categoryUrl); } // ============================================================ diff --git a/backend/src/tasks/handlers/product-refresh-treez.ts b/backend/src/tasks/handlers/product-refresh-treez.ts new file mode 100644 index 00000000..2a6012df --- /dev/null +++ b/backend/src/tasks/handlers/product-refresh-treez.ts @@ -0,0 +1,254 @@ +/** + * ============================================================ + * TREEZ PRODUCT REFRESH HANDLER + * ============================================================ + * + * Normalizes raw Treez payload data and upserts to store_products. + * + * This is the Treez-specific product refresh handler. + * It uses the TreezNormalizer to transform ES API data into + * the canonical store_products format. + * + * ============================================================ + */ + +import { TaskContext, TaskResult } from '../task-worker'; +import { loadRawPayloadById } from '../../utils/payload-storage'; +import { TreezNormalizer } from '../../hydration/normalizers/treez'; + +const normalizer = new TreezNormalizer(); + +export async function handleProductRefreshTreez(ctx: TaskContext): Promise { + const { pool, task, updateStep } = ctx; + const dispensaryId = task.dispensary_id; + const payloadId = task.payload?.payload_id as number | undefined; + + if (!dispensaryId) { + return { success: false, error: 'Missing dispensary_id in task' }; + } + + if (!payloadId) { + return { success: false, error: 'Missing payload_id in task payload' }; + } + + console.log(`[TreezProductRefresh] Starting for dispensary ${dispensaryId}, payload ${payloadId}`); + + try { + // ============================================================ + // STEP 1: Load dispensary + // ============================================================ + updateStep('loading', 'Loading dispensary info'); + + const dispResult = await pool.query( + `SELECT id, name, platform, platform_dispensary_id, menu_url + FROM dispensaries WHERE id = $1`, + [dispensaryId] + ); + + if (dispResult.rows.length === 0) { + return { success: false, error: `Dispensary ${dispensaryId} not found` }; + } + + const dispensary = dispResult.rows[0]; + console.log(`[TreezProductRefresh] Processing ${dispensary.name}`); + + // ============================================================ + // STEP 2: Load payload + // ============================================================ + updateStep('loading', 'Loading payload from storage'); + + const loadResult = await loadRawPayloadById(pool, payloadId); + + if (!loadResult) { + return { success: false, error: `Payload ${payloadId} not found` }; + } + + const payloadData = loadResult.payload; + + // Treez payload structure: { products: [...], normalized: [...], ... } + const rawProducts = payloadData.products || payloadData.normalized || []; + + if (rawProducts.length === 0) { + return { + success: false, + error: 'Payload contains no products', + payloadId, + productsProcessed: 0, + }; + } + + console.log(`[TreezProductRefresh] Loaded ${rawProducts.length} products from payload`); + + // ============================================================ + // STEP 3: Normalize products using TreezNormalizer methods directly + // ============================================================ + updateStep('normalizing', `Normalizing ${rawProducts.length} products`); + + // Extract and normalize products directly + const normalizedProducts = []; + const normalizedPricing = new Map(); + const normalizedAvailability = new Map(); + + for (const rawProduct of rawProducts) { + // Use normalizer's internal methods via normalize() on single-product payload + const singlePayload = { + id: `treez-${rawProduct.id}`, + dispensary_id: dispensaryId, + platform_dispensary_id: dispensary.platform_dispensary_id, + raw_json: { products: [rawProduct] }, + source_type: 'api' as const, + pricing_type: 'dual' as const, + crawl_mode: 'api' as const, + crawl_run_id: null, + platform: 'treez', + payload_version: 1, + product_count: 1, + fetched_at: new Date(), + processed: false, + normalized_at: null, + hydration_error: null, + hydration_attempts: 0, + created_at: new Date(), + }; + + const result = normalizer.normalize(singlePayload); + if (result.products.length > 0) { + const product = result.products[0]; + normalizedProducts.push(product); + + const pricing = result.pricing.get(product.externalProductId); + if (pricing) normalizedPricing.set(product.externalProductId, pricing); + + const availability = result.availability.get(product.externalProductId); + if (availability) normalizedAvailability.set(product.externalProductId, availability); + } + } + + if (normalizedProducts.length === 0) { + return { + success: false, + error: 'Normalization produced no products', + payloadId, + productsProcessed: 0, + }; + } + + console.log(`[TreezProductRefresh] Normalized ${normalizedProducts.length} products`); + + // ============================================================ + // STEP 4: Upsert to store_products + // ============================================================ + updateStep('upserting', `Saving ${normalizedProducts.length} products to DB`); + + let newCount = 0; + let updatedCount = 0; + + for (const product of normalizedProducts) { + const pricing = normalizedPricing.get(product.externalProductId); + const availability = normalizedAvailability.get(product.externalProductId); + + const upsertResult = await pool.query( + `INSERT INTO store_products ( + dispensary_id, + provider, + provider_product_id, + platform_dispensary_id, + name_raw, + brand_name_raw, + category_raw, + subcategory_raw, + strain_type, + price_rec, + price_med, + thc_percent, + cbd_percent, + image_url, + is_in_stock, + stock_quantity, + provider_data, + first_seen_at, + last_seen_at, + created_at, + updated_at + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, NOW(), NOW(), NOW(), NOW()) + ON CONFLICT (dispensary_id, provider, provider_product_id) + DO UPDATE SET + name_raw = EXCLUDED.name_raw, + brand_name_raw = EXCLUDED.brand_name_raw, + category_raw = EXCLUDED.category_raw, + subcategory_raw = EXCLUDED.subcategory_raw, + strain_type = EXCLUDED.strain_type, + price_rec = EXCLUDED.price_rec, + price_med = EXCLUDED.price_med, + thc_percent = EXCLUDED.thc_percent, + cbd_percent = EXCLUDED.cbd_percent, + image_url = EXCLUDED.image_url, + is_in_stock = EXCLUDED.is_in_stock, + stock_quantity = EXCLUDED.stock_quantity, + provider_data = EXCLUDED.provider_data, + last_seen_at = NOW(), + updated_at = NOW() + RETURNING (xmax = 0) AS is_new`, + [ + dispensaryId, + 'treez', + product.externalProductId, + dispensary.platform_dispensary_id, + product.name, + product.brandName, + product.category, + product.subcategory, + product.strainType, + pricing?.priceRec ? pricing.priceRec / 100 : null, + pricing?.priceMed ? pricing.priceMed / 100 : null, + product.thcPercent, + product.cbdPercent, + product.primaryImageUrl, + availability?.inStock ?? true, + availability?.quantity, + JSON.stringify(product.rawProduct), + ] + ); + + if (upsertResult.rows[0]?.is_new) { + newCount++; + } else { + updatedCount++; + } + } + + console.log(`[TreezProductRefresh] Upserted: ${normalizedProducts.length} (${newCount} new, ${updatedCount} updated)`); + + // ============================================================ + // STEP 5: Update dispensary + // ============================================================ + await pool.query( + `UPDATE dispensaries + SET product_count = $2, + stage = 'production', + updated_at = NOW() + WHERE id = $1`, + [dispensaryId, normalizedProducts.length] + ); + + console.log(`[TreezProductRefresh] Completed ${dispensary.name}`); + + return { + success: true, + payloadId, + productsProcessed: normalizedProducts.length, + newProducts: newCount, + updatedProducts: updatedCount, + }; + + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : 'Unknown error'; + console.error(`[TreezProductRefresh] Error:`, errorMessage); + + return { + success: false, + error: errorMessage, + payloadId, + }; + } +} diff --git a/backend/src/tasks/task-worker.ts b/backend/src/tasks/task-worker.ts index 260bcffe..b25a78a8 100644 --- a/backend/src/tasks/task-worker.ts +++ b/backend/src/tasks/task-worker.ts @@ -109,6 +109,7 @@ import { handleProductDiscoveryJane } from './handlers/product-discovery-jane'; // Treez Platform Handlers import { handleProductDiscoveryTreez } from './handlers/product-discovery-treez'; +import { handleProductRefreshTreez } from './handlers/product-refresh-treez'; const POLL_INTERVAL_MS = parseInt(process.env.POLL_INTERVAL_MS || '5000'); const HEARTBEAT_INTERVAL_MS = parseInt(process.env.HEARTBEAT_INTERVAL_MS || '30000'); @@ -242,7 +243,16 @@ function getHandlerForTask(task: WorkerTask): TaskHandler | undefined { console.log(`[TaskWorker] Using Treez handler for product_discovery`); return handleProductDiscoveryTreez; } - // Treez uses shared product_refresh handler via normalizer registry + if (role === 'product_refresh_treez') { + console.log(`[TaskWorker] Using Treez handler for product_refresh_treez`); + return handleProductRefreshTreez; + } + } + + // Also route product_refresh_treez regardless of platform field + if (role === 'product_refresh_treez') { + console.log(`[TaskWorker] Using Treez handler for product_refresh_treez`); + return handleProductRefreshTreez; } // ==========================================================================