"use strict"; /** * Worker Service * * Polls the job queue and processes crawl jobs. * Each worker instance runs independently, claiming jobs atomically. */ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.startWorker = startWorker; exports.stopWorker = stopWorker; exports.getWorkerStatus = getWorkerStatus; const job_queue_1 = require("./job-queue"); const product_crawler_1 = require("./product-crawler"); const discovery_1 = require("./discovery"); const connection_1 = require("../db/connection"); // Explicit column list for dispensaries table (avoids SELECT * issues with schema differences) // NOTE: failed_at is included for worker compatibility checks const DISPENSARY_COLUMNS = ` id, name, slug, city, state, zip, address, latitude, longitude, menu_type, menu_url, platform_dispensary_id, website, provider_detection_data, created_at, updated_at, failed_at `; // ============================================================ // WORKER CONFIG // ============================================================ const POLL_INTERVAL_MS = 5000; // Check for jobs every 5 seconds const HEARTBEAT_INTERVAL_MS = 60000; // Send heartbeat every 60 seconds const STALE_CHECK_INTERVAL_MS = 300000; // Check for stale jobs every 5 minutes const SHUTDOWN_GRACE_PERIOD_MS = 30000; // Wait 30s for job to complete on shutdown // ============================================================ // WORKER STATE // ============================================================ let isRunning = false; let currentJob = null; let pollTimer = null; let heartbeatTimer = null; let staleCheckTimer = null; let shutdownPromise = null; // ============================================================ // WORKER LIFECYCLE // ============================================================ /** * Start the worker */ async function startWorker() { if (isRunning) { console.log('[Worker] Already running'); return; } const workerId = (0, job_queue_1.getWorkerId)(); const hostname = (0, job_queue_1.getWorkerHostname)(); console.log(`[Worker] Starting worker ${workerId} on ${hostname}`); isRunning = true; // Set up graceful shutdown setupShutdownHandlers(); // Start polling for jobs pollTimer = setInterval(pollForJobs, POLL_INTERVAL_MS); // Start stale job recovery (only one worker should do this, but it's idempotent) staleCheckTimer = setInterval(async () => { try { await (0, job_queue_1.recoverStaleJobs)(15); } catch (error) { console.error('[Worker] Error recovering stale jobs:', error); } }, STALE_CHECK_INTERVAL_MS); // Immediately poll for a job await pollForJobs(); console.log(`[Worker] Worker ${workerId} started, polling every ${POLL_INTERVAL_MS}ms`); } /** * Stop the worker gracefully */ async function stopWorker() { if (!isRunning) return; console.log('[Worker] Stopping worker...'); isRunning = false; // Clear timers if (pollTimer) { clearInterval(pollTimer); pollTimer = null; } if (heartbeatTimer) { clearInterval(heartbeatTimer); heartbeatTimer = null; } if (staleCheckTimer) { clearInterval(staleCheckTimer); staleCheckTimer = null; } // Wait for current job to complete if (currentJob) { console.log(`[Worker] Waiting for job ${currentJob.id} to complete...`); const startWait = Date.now(); while (currentJob && Date.now() - startWait < SHUTDOWN_GRACE_PERIOD_MS) { await new Promise(r => setTimeout(r, 1000)); } if (currentJob) { console.log(`[Worker] Job ${currentJob.id} did not complete in time, marking for retry`); await (0, job_queue_1.failJob)(currentJob.id, 'Worker shutdown'); } } console.log('[Worker] Worker stopped'); } /** * Get worker status */ function getWorkerStatus() { return { isRunning, workerId: (0, job_queue_1.getWorkerId)(), hostname: (0, job_queue_1.getWorkerHostname)(), currentJob, }; } // ============================================================ // JOB PROCESSING // ============================================================ /** * Poll for and process the next available job */ async function pollForJobs() { if (!isRunning || currentJob) { return; // Already processing a job } try { const workerId = (0, job_queue_1.getWorkerId)(); // Try to claim a job const job = await (0, job_queue_1.claimNextJob)({ workerId, jobTypes: ['dutchie_product_crawl', 'menu_detection', 'menu_detection_single'], lockDurationMinutes: 30, }); if (!job) { return; // No jobs available } currentJob = job; console.log(`[Worker] Processing job ${job.id} (type=${job.jobType}, dispensary=${job.dispensaryId})`); // Start heartbeat for this job heartbeatTimer = setInterval(async () => { if (currentJob) { try { await (0, job_queue_1.heartbeat)(currentJob.id); } catch (error) { console.error('[Worker] Heartbeat error:', error); } } }, HEARTBEAT_INTERVAL_MS); // Process the job await processJob(job); } catch (error) { console.error('[Worker] Error polling for jobs:', error); if (currentJob) { try { await (0, job_queue_1.failJob)(currentJob.id, error.message); } catch (failError) { console.error('[Worker] Error failing job:', failError); } } } finally { // Clear heartbeat timer if (heartbeatTimer) { clearInterval(heartbeatTimer); heartbeatTimer = null; } currentJob = null; } } /** * Process a single job */ async function processJob(job) { try { switch (job.jobType) { case 'dutchie_product_crawl': await processProductCrawlJob(job); break; case 'menu_detection': await processMenuDetectionJob(job); break; case 'menu_detection_single': await processSingleDetectionJob(job); break; default: throw new Error(`Unknown job type: ${job.jobType}`); } } catch (error) { console.error(`[Worker] Job ${job.id} failed:`, error); await (0, job_queue_1.failJob)(job.id, error.message); } } // Maximum consecutive failures before flagging a dispensary const MAX_CONSECUTIVE_FAILURES = 3; /** * Record a successful crawl - resets failure counter */ async function recordCrawlSuccess(dispensaryId) { await (0, connection_1.query)(`UPDATE dispensaries SET consecutive_failures = 0, last_crawl_at = NOW(), updated_at = NOW() WHERE id = $1`, [dispensaryId]); } /** * Record a crawl failure - increments counter and may flag dispensary * Returns true if dispensary was flagged as failed */ async function recordCrawlFailure(dispensaryId, errorMessage) { // Increment failure counter const { rows } = await (0, connection_1.query)(`UPDATE dispensaries SET consecutive_failures = consecutive_failures + 1, last_failure_at = NOW(), last_failure_reason = $2, updated_at = NOW() WHERE id = $1 RETURNING consecutive_failures`, [dispensaryId, errorMessage]); const failures = rows[0]?.consecutive_failures || 0; // If we've hit the threshold, flag the dispensary as failed if (failures >= MAX_CONSECUTIVE_FAILURES) { await (0, connection_1.query)(`UPDATE dispensaries SET failed_at = NOW(), menu_type = NULL, platform_dispensary_id = NULL, failure_notes = $2, updated_at = NOW() WHERE id = $1`, [dispensaryId, `Auto-flagged after ${failures} consecutive failures. Last error: ${errorMessage}`]); console.log(`[Worker] Dispensary ${dispensaryId} flagged as FAILED after ${failures} consecutive failures`); return true; } console.log(`[Worker] Dispensary ${dispensaryId} failure recorded (${failures}/${MAX_CONSECUTIVE_FAILURES})`); return false; } /** * Process a product crawl job for a single dispensary */ async function processProductCrawlJob(job) { if (!job.dispensaryId) { throw new Error('Product crawl job requires dispensary_id'); } // Get dispensary details const { rows } = await (0, connection_1.query)(`SELECT ${DISPENSARY_COLUMNS} FROM dispensaries WHERE id = $1`, [job.dispensaryId]); if (rows.length === 0) { throw new Error(`Dispensary ${job.dispensaryId} not found`); } const dispensary = (0, discovery_1.mapDbRowToDispensary)(rows[0]); // Check if dispensary is already flagged as failed if (rows[0].failed_at) { console.log(`[Worker] Skipping dispensary ${job.dispensaryId} - already flagged as failed`); await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 0 }); return; } if (!dispensary.platformDispensaryId) { // Record failure and potentially flag await recordCrawlFailure(job.dispensaryId, 'Missing platform_dispensary_id'); throw new Error(`Dispensary ${job.dispensaryId} has no platform_dispensary_id`); } // Get crawl options from job metadata const pricingType = job.metadata?.pricingType || 'rec'; const useBothModes = job.metadata?.useBothModes !== false; try { // Crawl the dispensary const result = await (0, product_crawler_1.crawlDispensaryProducts)(dispensary, pricingType, { useBothModes, onProgress: async (progress) => { // Update progress for live monitoring await (0, job_queue_1.updateJobProgress)(job.id, { productsFound: progress.productsFound, productsUpserted: progress.productsUpserted, snapshotsCreated: progress.snapshotsCreated, currentPage: progress.currentPage, totalPages: progress.totalPages, }); }, }); if (result.success) { // Success! Reset failure counter await recordCrawlSuccess(job.dispensaryId); await (0, job_queue_1.completeJob)(job.id, { productsFound: result.productsFetched, productsUpserted: result.productsUpserted, snapshotsCreated: result.snapshotsCreated, }); } else { // Crawl returned failure - record it const wasFlagged = await recordCrawlFailure(job.dispensaryId, result.errorMessage || 'Crawl failed'); if (wasFlagged) { // Don't throw - the dispensary is now flagged, job is "complete" await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 0 }); } else { throw new Error(result.errorMessage || 'Crawl failed'); } } } catch (error) { // Record the failure const wasFlagged = await recordCrawlFailure(job.dispensaryId, error.message); if (wasFlagged) { // Dispensary is now flagged - complete the job rather than fail it await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 0 }); } else { throw error; } } } /** * Process a menu detection job (bulk) */ async function processMenuDetectionJob(job) { const { executeMenuDetectionJob } = await Promise.resolve().then(() => __importStar(require('./menu-detection'))); const config = job.metadata || {}; const result = await executeMenuDetectionJob(config); if (result.status === 'error') { throw new Error(result.errorMessage || 'Menu detection failed'); } await (0, job_queue_1.completeJob)(job.id, { productsFound: result.itemsProcessed, productsUpserted: result.itemsSucceeded, }); } /** * Process a single dispensary menu detection job * This is the parallelizable version - each worker can detect one dispensary at a time */ async function processSingleDetectionJob(job) { if (!job.dispensaryId) { throw new Error('Single detection job requires dispensary_id'); } const { detectAndResolveDispensary } = await Promise.resolve().then(() => __importStar(require('./menu-detection'))); // Get dispensary details const { rows } = await (0, connection_1.query)(`SELECT ${DISPENSARY_COLUMNS} FROM dispensaries WHERE id = $1`, [job.dispensaryId]); if (rows.length === 0) { throw new Error(`Dispensary ${job.dispensaryId} not found`); } const dispensary = rows[0]; // Skip if already detected or failed if (dispensary.failed_at) { console.log(`[Worker] Skipping dispensary ${job.dispensaryId} - already flagged as failed`); await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 0 }); return; } if (dispensary.menu_type && dispensary.menu_type !== 'unknown') { console.log(`[Worker] Skipping dispensary ${job.dispensaryId} - already detected as ${dispensary.menu_type}`); await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 1 }); return; } console.log(`[Worker] Detecting menu for dispensary ${job.dispensaryId} (${dispensary.name})...`); try { const result = await detectAndResolveDispensary(job.dispensaryId); if (result.success) { console.log(`[Worker] Dispensary ${job.dispensaryId}: detected ${result.detectedProvider}, platformId=${result.platformDispensaryId || 'none'}`); await (0, job_queue_1.completeJob)(job.id, { productsFound: 1, productsUpserted: result.platformDispensaryId ? 1 : 0, }); } else { // Detection failed - record failure await recordCrawlFailure(job.dispensaryId, result.error || 'Detection failed'); throw new Error(result.error || 'Detection failed'); } } catch (error) { // Record the failure const wasFlagged = await recordCrawlFailure(job.dispensaryId, error.message); if (wasFlagged) { // Dispensary is now flagged - complete the job rather than fail it await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 0 }); } else { throw error; } } } // ============================================================ // SHUTDOWN HANDLING // ============================================================ function setupShutdownHandlers() { const shutdown = async (signal) => { if (shutdownPromise) return shutdownPromise; console.log(`\n[Worker] Received ${signal}, shutting down...`); shutdownPromise = stopWorker(); await shutdownPromise; process.exit(0); }; process.on('SIGTERM', () => shutdown('SIGTERM')); process.on('SIGINT', () => shutdown('SIGINT')); } // ============================================================ // STANDALONE WORKER ENTRY POINT // ============================================================ if (require.main === module) { // Run as standalone worker startWorker().catch((error) => { console.error('[Worker] Fatal error:', error); process.exit(1); }); }