/** * Entry Point Discovery Handler * * Resolves platform IDs for a discovered store using Dutchie GraphQL. * This is the step between store_discovery and product_discovery. * * Flow: * 1. Load dispensary info from database * 2. Extract slug from menu_url * 3. Start stealth session (fingerprint + optional proxy) * 4. Query Dutchie GraphQL to resolve slug → platform_dispensary_id * 5. Update dispensary record with resolved ID * 6. Queue product_discovery task if successful */ import { TaskContext, TaskResult } from '../task-worker'; import { startSession, endSession } from '../../platforms/dutchie'; import { resolveDispensaryIdWithDetails } from '../../platforms/dutchie/queries'; export async function handleEntryPointDiscovery(ctx: TaskContext): Promise { const { pool, task } = ctx; const dispensaryId = task.dispensary_id; if (!dispensaryId) { return { success: false, error: 'No dispensary_id specified for entry_point_discovery task' }; } try { // ============================================================ // STEP 1: Load dispensary info // ============================================================ const dispResult = await pool.query(` SELECT id, name, menu_url, platform_dispensary_id, menu_type, state FROM dispensaries WHERE id = $1 `, [dispensaryId]); if (dispResult.rows.length === 0) { return { success: false, error: `Dispensary ${dispensaryId} not found` }; } const dispensary = dispResult.rows[0]; // If already has platform_dispensary_id, we're done if (dispensary.platform_dispensary_id) { console.log(`[EntryPointDiscovery] Dispensary ${dispensaryId} already has platform ID: ${dispensary.platform_dispensary_id}`); return { success: true, alreadyResolved: true, platformId: dispensary.platform_dispensary_id, }; } const menuUrl = dispensary.menu_url; if (!menuUrl) { return { success: false, error: `Dispensary ${dispensaryId} has no menu_url` }; } console.log(`[EntryPointDiscovery] Resolving platform ID for ${dispensary.name}`); console.log(`[EntryPointDiscovery] Menu URL: ${menuUrl}`); // ============================================================ // STEP 2: Extract slug from menu URL // ============================================================ let slug: string | null = null; const embeddedMatch = menuUrl.match(/\/embedded-menu\/([^/?]+)/); const dispensaryMatch = menuUrl.match(/\/dispensary\/([^/?]+)/); if (embeddedMatch) { slug = embeddedMatch[1]; } else if (dispensaryMatch) { slug = dispensaryMatch[1]; } if (!slug) { // Mark as non-dutchie menu type await pool.query(` UPDATE dispensaries SET menu_type = 'unknown', updated_at = NOW() WHERE id = $1 `, [dispensaryId]); return { success: false, error: `Could not extract slug from menu_url: ${menuUrl}`, }; } console.log(`[EntryPointDiscovery] Extracted slug: ${slug}`); await ctx.heartbeat(); // ============================================================ // STEP 3: Start stealth session // ============================================================ // Per workflow-12102025.md: session identity comes from proxy location, not task params const session = startSession(); console.log(`[EntryPointDiscovery] Session started: ${session.sessionId}`); try { // ============================================================ // STEP 4: Resolve platform ID via GraphQL // ============================================================ console.log(`[EntryPointDiscovery] Querying Dutchie GraphQL for slug: ${slug}`); const result = await resolveDispensaryIdWithDetails(slug); if (!result.dispensaryId) { // Resolution failed - could be 403, 404, or invalid response const reason = result.httpStatus ? `HTTP ${result.httpStatus}` : result.error || 'Unknown error'; console.log(`[EntryPointDiscovery] Failed to resolve ${slug}: ${reason}`); // Mark as failed resolution but keep menu_type as dutchie await pool.query(` UPDATE dispensaries SET menu_type = CASE WHEN $2 = 404 THEN 'removed' WHEN $2 = 403 THEN 'blocked' ELSE 'dutchie' END, updated_at = NOW() WHERE id = $1 `, [dispensaryId, result.httpStatus || 0]); return { success: false, error: `Could not resolve platform ID: ${reason}`, slug, httpStatus: result.httpStatus, }; } const platformId = result.dispensaryId; console.log(`[EntryPointDiscovery] Resolved ${slug} -> ${platformId}`); await ctx.heartbeat(); // ============================================================ // STEP 5: Update dispensary with resolved ID // ============================================================ await pool.query(` UPDATE dispensaries SET platform_dispensary_id = $2, menu_type = 'dutchie', crawl_enabled = true, updated_at = NOW() WHERE id = $1 `, [dispensaryId, platformId]); console.log(`[EntryPointDiscovery] Updated dispensary ${dispensaryId} with platform ID`); // ============================================================ // STEP 6: Queue product_discovery task // ============================================================ await pool.query(` INSERT INTO worker_tasks (role, dispensary_id, priority, scheduled_for) VALUES ('product_discovery', $1, 5, NOW()) ON CONFLICT DO NOTHING `, [dispensaryId]); console.log(`[EntryPointDiscovery] Queued product_discovery task for dispensary ${dispensaryId}`); return { success: true, platformId, slug, queuedProductDiscovery: true, }; } finally { // Always end session endSession(); } } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : 'Unknown error'; console.error(`[EntryPointDiscovery] Error for dispensary ${dispensaryId}:`, errorMessage); return { success: false, error: errorMessage, }; } }