/** * Jane Product Discovery Handler * * Fetches all products from a Jane store via Puppeteer + network interception. * * Flow: * 1. Load dispensary with platform_dispensary_id * 2. Navigate to menu URL and capture Algolia product responses * 3. Save raw payload to filesystem * 4. Queue product_refresh task for normalization */ import { TaskContext, TaskResult } from '../task-worker'; import { setCrawlRotator, fetchProductsByStoreIdDirect, } from '../../platforms/jane'; import { saveRawPayload } from '../../utils/payload-storage'; import { taskService } from '../task-service'; export async function handleProductDiscoveryJane(ctx: TaskContext): Promise { const { pool, task, crawlRotator } = ctx; const dispensaryId = task.dispensary_id; if (!dispensaryId) { return { success: false, error: 'Missing dispensary_id in task', }; } console.log(`[JaneProductDiscovery] Starting for dispensary ${dispensaryId}`); try { // Load dispensary const dispResult = await pool.query( `SELECT id, name, menu_url, platform_dispensary_id, menu_type FROM dispensaries WHERE id = $1`, [dispensaryId] ); if (dispResult.rows.length === 0) { return { success: false, error: `Dispensary ${dispensaryId} not found`, }; } const dispensary = dispResult.rows[0]; if (!dispensary.platform_dispensary_id) { return { success: false, error: `Dispensary ${dispensaryId} has no platform_dispensary_id (Jane store ID)`, }; } console.log(`[JaneProductDiscovery] Fetching products for Jane store ${dispensary.platform_dispensary_id}`); // Attach crawl rotator if (crawlRotator) { setCrawlRotator(crawlRotator); } // Fetch products directly via Algolia const result = await fetchProductsByStoreIdDirect(dispensary.platform_dispensary_id); if (result.products.length === 0) { console.warn(`[JaneProductDiscovery] No products captured for dispensary ${dispensaryId}`); // Update dispensary with failure await pool.query( `UPDATE dispensaries SET consecutive_failures = consecutive_failures + 1, updated_at = NOW() WHERE id = $1`, [dispensaryId] ); return { success: false, error: 'No products captured from Jane menu page', productCount: 0, }; } console.log(`[JaneProductDiscovery] Captured ${result.products.length} products`); // Build payload for storage // Store the raw Algolia hits for the normalizer const rawPayload = { hits: result.products.map(p => p.raw), // Use raw product data store: result.store?.raw || null, capturedAt: new Date().toISOString(), platform: 'jane', dispensaryId, storeId: dispensary.platform_dispensary_id, }; // Save raw payload to filesystem (platform = 'jane') const { id: payloadId, sizeBytes } = await saveRawPayload( pool, dispensaryId, rawPayload, null, // crawl_run_id result.products.length, 'jane', // platform task.id // task ID for traceability ); console.log(`[JaneProductDiscovery] Saved payload ${payloadId} (${Math.round(sizeBytes / 1024)}KB)`); // Update dispensary stage and timestamps await pool.query( `UPDATE dispensaries SET stage = 'hydrating', last_fetch_at = NOW(), consecutive_successes = consecutive_successes + 1, consecutive_failures = 0, updated_at = NOW() WHERE id = $1`, [dispensaryId] ); // Queue product_refresh task for normalization console.log(`[JaneProductDiscovery] Queuing product_refresh for payload ${payloadId}`); await taskService.createTask({ role: 'product_refresh', dispensary_id: dispensaryId, platform: 'jane', // method undefined = any worker can process (product_refresh is local) priority: task.priority || 0, payload: { payload_id: payloadId }, }); return { success: true, productCount: result.products.length, payloadId, payloadSizeKB: Math.round(sizeBytes / 1024), storeInfo: result.store ? { id: result.store.id, name: result.store.name, productCount: result.store.product_count, } : null, queuedProductRefresh: true, }; } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : 'Unknown error'; console.error(`[JaneProductDiscovery] Error:`, errorMessage); // Update dispensary with failure await pool.query( `UPDATE dispensaries SET consecutive_failures = consecutive_failures + 1, stage = CASE WHEN consecutive_failures >= 2 THEN 'failing' ELSE stage END, updated_at = NOW() WHERE id = $1`, [dispensaryId] ).catch(() => {}); return { success: false, error: errorMessage, }; } }