/** * Payload Fetch Handler * * Per TASK_WORKFLOW_2024-12-10.md: Separates API fetch from data processing. * * This handler ONLY: * 1. Hits Dutchie GraphQL API * 2. Saves raw payload to filesystem (gzipped) * 3. Records metadata in raw_crawl_payloads table * 4. Queues a product_refresh task to process the payload * * Benefits of separation: * - Retry-friendly: If normalize fails, re-run refresh without re-crawling * - Faster refreshes: Local file read vs network call * - Replay-able: Run refresh against any historical payload * - Less API pressure: Only this role hits Dutchie */ import { TaskContext, TaskResult } from '../task-worker'; import { executeGraphQL, startSession, endSession, GRAPHQL_HASHES, DUTCHIE_CONFIG, } from '../../platforms/dutchie'; import { saveRawPayload } from '../../utils/payload-storage'; import { taskService } from '../task-service'; export async function handlePayloadFetch(ctx: TaskContext): Promise { const { pool, task } = ctx; const dispensaryId = task.dispensary_id; if (!dispensaryId) { return { success: false, error: 'No dispensary_id specified for payload_fetch task' }; } try { // ============================================================ // STEP 1: Load dispensary info // ============================================================ const dispResult = await pool.query(` SELECT id, name, platform_dispensary_id, menu_url, menu_type, city, state FROM dispensaries WHERE id = $1 AND crawl_enabled = true `, [dispensaryId]); if (dispResult.rows.length === 0) { return { success: false, error: `Dispensary ${dispensaryId} not found or not crawl_enabled` }; } const dispensary = dispResult.rows[0]; const platformId = dispensary.platform_dispensary_id; if (!platformId) { return { success: false, error: `Dispensary ${dispensaryId} has no platform_dispensary_id` }; } // Extract cName from menu_url const cNameMatch = dispensary.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/); const cName = cNameMatch ? cNameMatch[1] : 'dispensary'; console.log(`[PayloadFetch] Starting fetch for ${dispensary.name} (ID: ${dispensaryId})`); console.log(`[PayloadFetch] Platform ID: ${platformId}, cName: ${cName}`); // ============================================================ // STEP 2: Start stealth session // ============================================================ const session = startSession(); console.log(`[PayloadFetch] Session started: ${session.sessionId}`); await ctx.heartbeat(); // ============================================================ // STEP 3: Fetch products via GraphQL (Status: 'All') // ============================================================ const allProducts: any[] = []; let page = 0; let totalCount = 0; const perPage = DUTCHIE_CONFIG.perPage; const maxPages = DUTCHIE_CONFIG.maxPages; try { while (page < maxPages) { const variables = { includeEnterpriseSpecials: false, productsFilter: { dispensaryId: platformId, pricingType: 'rec', Status: 'All', types: [], useCache: false, isDefaultSort: true, sortBy: 'popularSortIdx', sortDirection: 1, bypassOnlineThresholds: true, isKioskMenu: false, removeProductsBelowOptionThresholds: false, }, page, perPage, }; console.log(`[PayloadFetch] Fetching page ${page + 1}...`); const result = await executeGraphQL( 'FilteredProducts', variables, GRAPHQL_HASHES.FilteredProducts, { cName, maxRetries: 3 } ); const data = result?.data?.filteredProducts; if (!data || !data.products) { if (page === 0) { throw new Error('No product data returned from GraphQL'); } break; } const products = data.products; allProducts.push(...products); if (page === 0) { totalCount = data.queryInfo?.totalCount || products.length; console.log(`[PayloadFetch] Total products reported: ${totalCount}`); } if (allProducts.length >= totalCount || products.length < perPage) { break; } page++; if (page < maxPages) { await new Promise(r => setTimeout(r, DUTCHIE_CONFIG.pageDelayMs)); } if (page % 5 === 0) { await ctx.heartbeat(); } } console.log(`[PayloadFetch] Fetched ${allProducts.length} products in ${page + 1} pages`); } finally { endSession(); } if (allProducts.length === 0) { return { success: false, error: 'No products returned from GraphQL', productsProcessed: 0, }; } await ctx.heartbeat(); // ============================================================ // STEP 4: Save raw payload to filesystem // Per TASK_WORKFLOW_2024-12-10.md: Metadata/Payload separation // ============================================================ const rawPayload = { dispensaryId, platformId, cName, fetchedAt: new Date().toISOString(), productCount: allProducts.length, products: allProducts, }; const payloadResult = await saveRawPayload( pool, dispensaryId, rawPayload, null, // crawl_run_id - not using crawl_runs in new system allProducts.length ); console.log(`[PayloadFetch] Saved payload #${payloadResult.id} (${(payloadResult.sizeBytes / 1024).toFixed(1)}KB)`); // ============================================================ // STEP 5: Update dispensary last_fetch_at // ============================================================ await pool.query(` UPDATE dispensaries SET last_fetch_at = NOW() WHERE id = $1 `, [dispensaryId]); // ============================================================ // STEP 6: Queue product_refresh task to process the payload // Per TASK_WORKFLOW_2024-12-10.md: Task chaining // ============================================================ await taskService.createTask({ role: 'product_refresh', dispensary_id: dispensaryId, priority: task.priority || 0, payload: { payload_id: payloadResult.id }, }); console.log(`[PayloadFetch] Queued product_refresh task for payload #${payloadResult.id}`); return { success: true, payloadId: payloadResult.id, productCount: allProducts.length, sizeBytes: payloadResult.sizeBytes, }; } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : 'Unknown error'; console.error(`[PayloadFetch] Error for dispensary ${dispensaryId}:`, errorMessage); return { success: false, error: errorMessage, }; } }