Files
cannaiq/backend/dist/dutchie-az/services/worker.js
Kelly 66e07b2009 fix(monitor): remove non-existent worker columns from job_run_logs query
The job_run_logs table tracks scheduled job orchestration, not individual
worker jobs. Worker info (worker_id, worker_hostname) belongs on
dispensary_crawl_jobs, not job_run_logs.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-03 18:45:05 -07:00

441 lines
17 KiB
JavaScript

"use strict";
/**
* Worker Service
*
* Polls the job queue and processes crawl jobs.
* Each worker instance runs independently, claiming jobs atomically.
*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.startWorker = startWorker;
exports.stopWorker = stopWorker;
exports.getWorkerStatus = getWorkerStatus;
const job_queue_1 = require("./job-queue");
const product_crawler_1 = require("./product-crawler");
const discovery_1 = require("./discovery");
const connection_1 = require("../db/connection");
// Explicit column list for dispensaries table (avoids SELECT * issues with schema differences)
// NOTE: failed_at is included for worker compatibility checks
const DISPENSARY_COLUMNS = `
id, name, slug, city, state, zip, address, latitude, longitude,
menu_type, menu_url, platform_dispensary_id, website,
provider_detection_data, created_at, updated_at, failed_at
`;
// ============================================================
// WORKER CONFIG
// ============================================================
const POLL_INTERVAL_MS = 5000; // Check for jobs every 5 seconds
const HEARTBEAT_INTERVAL_MS = 60000; // Send heartbeat every 60 seconds
const STALE_CHECK_INTERVAL_MS = 300000; // Check for stale jobs every 5 minutes
const SHUTDOWN_GRACE_PERIOD_MS = 30000; // Wait 30s for job to complete on shutdown
// ============================================================
// WORKER STATE
// ============================================================
let isRunning = false;
let currentJob = null;
let pollTimer = null;
let heartbeatTimer = null;
let staleCheckTimer = null;
let shutdownPromise = null;
// ============================================================
// WORKER LIFECYCLE
// ============================================================
/**
* Start the worker
*/
async function startWorker() {
if (isRunning) {
console.log('[Worker] Already running');
return;
}
const workerId = (0, job_queue_1.getWorkerId)();
const hostname = (0, job_queue_1.getWorkerHostname)();
console.log(`[Worker] Starting worker ${workerId} on ${hostname}`);
isRunning = true;
// Set up graceful shutdown
setupShutdownHandlers();
// Start polling for jobs
pollTimer = setInterval(pollForJobs, POLL_INTERVAL_MS);
// Start stale job recovery (only one worker should do this, but it's idempotent)
staleCheckTimer = setInterval(async () => {
try {
await (0, job_queue_1.recoverStaleJobs)(15);
}
catch (error) {
console.error('[Worker] Error recovering stale jobs:', error);
}
}, STALE_CHECK_INTERVAL_MS);
// Immediately poll for a job
await pollForJobs();
console.log(`[Worker] Worker ${workerId} started, polling every ${POLL_INTERVAL_MS}ms`);
}
/**
* Stop the worker gracefully
*/
async function stopWorker() {
if (!isRunning)
return;
console.log('[Worker] Stopping worker...');
isRunning = false;
// Clear timers
if (pollTimer) {
clearInterval(pollTimer);
pollTimer = null;
}
if (heartbeatTimer) {
clearInterval(heartbeatTimer);
heartbeatTimer = null;
}
if (staleCheckTimer) {
clearInterval(staleCheckTimer);
staleCheckTimer = null;
}
// Wait for current job to complete
if (currentJob) {
console.log(`[Worker] Waiting for job ${currentJob.id} to complete...`);
const startWait = Date.now();
while (currentJob && Date.now() - startWait < SHUTDOWN_GRACE_PERIOD_MS) {
await new Promise(r => setTimeout(r, 1000));
}
if (currentJob) {
console.log(`[Worker] Job ${currentJob.id} did not complete in time, marking for retry`);
await (0, job_queue_1.failJob)(currentJob.id, 'Worker shutdown');
}
}
console.log('[Worker] Worker stopped');
}
/**
* Get worker status
*/
function getWorkerStatus() {
return {
isRunning,
workerId: (0, job_queue_1.getWorkerId)(),
hostname: (0, job_queue_1.getWorkerHostname)(),
currentJob,
};
}
// ============================================================
// JOB PROCESSING
// ============================================================
/**
* Poll for and process the next available job
*/
async function pollForJobs() {
if (!isRunning || currentJob) {
return; // Already processing a job
}
try {
const workerId = (0, job_queue_1.getWorkerId)();
// Try to claim a job
const job = await (0, job_queue_1.claimNextJob)({
workerId,
jobTypes: ['dutchie_product_crawl', 'menu_detection', 'menu_detection_single'],
lockDurationMinutes: 30,
});
if (!job) {
return; // No jobs available
}
currentJob = job;
console.log(`[Worker] Processing job ${job.id} (type=${job.jobType}, dispensary=${job.dispensaryId})`);
// Start heartbeat for this job
heartbeatTimer = setInterval(async () => {
if (currentJob) {
try {
await (0, job_queue_1.heartbeat)(currentJob.id);
}
catch (error) {
console.error('[Worker] Heartbeat error:', error);
}
}
}, HEARTBEAT_INTERVAL_MS);
// Process the job
await processJob(job);
}
catch (error) {
console.error('[Worker] Error polling for jobs:', error);
if (currentJob) {
try {
await (0, job_queue_1.failJob)(currentJob.id, error.message);
}
catch (failError) {
console.error('[Worker] Error failing job:', failError);
}
}
}
finally {
// Clear heartbeat timer
if (heartbeatTimer) {
clearInterval(heartbeatTimer);
heartbeatTimer = null;
}
currentJob = null;
}
}
/**
* Process a single job
*/
async function processJob(job) {
try {
switch (job.jobType) {
case 'dutchie_product_crawl':
await processProductCrawlJob(job);
break;
case 'menu_detection':
await processMenuDetectionJob(job);
break;
case 'menu_detection_single':
await processSingleDetectionJob(job);
break;
default:
throw new Error(`Unknown job type: ${job.jobType}`);
}
}
catch (error) {
console.error(`[Worker] Job ${job.id} failed:`, error);
await (0, job_queue_1.failJob)(job.id, error.message);
}
}
// Maximum consecutive failures before flagging a dispensary
const MAX_CONSECUTIVE_FAILURES = 3;
/**
* Record a successful crawl - resets failure counter
*/
async function recordCrawlSuccess(dispensaryId) {
await (0, connection_1.query)(`UPDATE dispensaries
SET consecutive_failures = 0,
last_crawl_at = NOW(),
updated_at = NOW()
WHERE id = $1`, [dispensaryId]);
}
/**
* Record a crawl failure - increments counter and may flag dispensary
* Returns true if dispensary was flagged as failed
*/
async function recordCrawlFailure(dispensaryId, errorMessage) {
// Increment failure counter
const { rows } = await (0, connection_1.query)(`UPDATE dispensaries
SET consecutive_failures = consecutive_failures + 1,
last_failure_at = NOW(),
last_failure_reason = $2,
updated_at = NOW()
WHERE id = $1
RETURNING consecutive_failures`, [dispensaryId, errorMessage]);
const failures = rows[0]?.consecutive_failures || 0;
// If we've hit the threshold, flag the dispensary as failed
if (failures >= MAX_CONSECUTIVE_FAILURES) {
await (0, connection_1.query)(`UPDATE dispensaries
SET failed_at = NOW(),
menu_type = NULL,
platform_dispensary_id = NULL,
failure_notes = $2,
updated_at = NOW()
WHERE id = $1`, [dispensaryId, `Auto-flagged after ${failures} consecutive failures. Last error: ${errorMessage}`]);
console.log(`[Worker] Dispensary ${dispensaryId} flagged as FAILED after ${failures} consecutive failures`);
return true;
}
console.log(`[Worker] Dispensary ${dispensaryId} failure recorded (${failures}/${MAX_CONSECUTIVE_FAILURES})`);
return false;
}
/**
* Process a product crawl job for a single dispensary
*/
async function processProductCrawlJob(job) {
if (!job.dispensaryId) {
throw new Error('Product crawl job requires dispensary_id');
}
// Get dispensary details
const { rows } = await (0, connection_1.query)(`SELECT ${DISPENSARY_COLUMNS} FROM dispensaries WHERE id = $1`, [job.dispensaryId]);
if (rows.length === 0) {
throw new Error(`Dispensary ${job.dispensaryId} not found`);
}
const dispensary = (0, discovery_1.mapDbRowToDispensary)(rows[0]);
// Check if dispensary is already flagged as failed
if (rows[0].failed_at) {
console.log(`[Worker] Skipping dispensary ${job.dispensaryId} - already flagged as failed`);
await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 0 });
return;
}
if (!dispensary.platformDispensaryId) {
// Record failure and potentially flag
await recordCrawlFailure(job.dispensaryId, 'Missing platform_dispensary_id');
throw new Error(`Dispensary ${job.dispensaryId} has no platform_dispensary_id`);
}
// Get crawl options from job metadata
const pricingType = job.metadata?.pricingType || 'rec';
const useBothModes = job.metadata?.useBothModes !== false;
try {
// Crawl the dispensary
const result = await (0, product_crawler_1.crawlDispensaryProducts)(dispensary, pricingType, {
useBothModes,
onProgress: async (progress) => {
// Update progress for live monitoring
await (0, job_queue_1.updateJobProgress)(job.id, {
productsFound: progress.productsFound,
productsUpserted: progress.productsUpserted,
snapshotsCreated: progress.snapshotsCreated,
currentPage: progress.currentPage,
totalPages: progress.totalPages,
});
},
});
if (result.success) {
// Success! Reset failure counter
await recordCrawlSuccess(job.dispensaryId);
await (0, job_queue_1.completeJob)(job.id, {
productsFound: result.productsFetched,
productsUpserted: result.productsUpserted,
snapshotsCreated: result.snapshotsCreated,
});
}
else {
// Crawl returned failure - record it
const wasFlagged = await recordCrawlFailure(job.dispensaryId, result.errorMessage || 'Crawl failed');
if (wasFlagged) {
// Don't throw - the dispensary is now flagged, job is "complete"
await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 0 });
}
else {
throw new Error(result.errorMessage || 'Crawl failed');
}
}
}
catch (error) {
// Record the failure
const wasFlagged = await recordCrawlFailure(job.dispensaryId, error.message);
if (wasFlagged) {
// Dispensary is now flagged - complete the job rather than fail it
await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 0 });
}
else {
throw error;
}
}
}
/**
* Process a menu detection job (bulk)
*/
async function processMenuDetectionJob(job) {
const { executeMenuDetectionJob } = await Promise.resolve().then(() => __importStar(require('./menu-detection')));
const config = job.metadata || {};
const result = await executeMenuDetectionJob(config);
if (result.status === 'error') {
throw new Error(result.errorMessage || 'Menu detection failed');
}
await (0, job_queue_1.completeJob)(job.id, {
productsFound: result.itemsProcessed,
productsUpserted: result.itemsSucceeded,
});
}
/**
* Process a single dispensary menu detection job
* This is the parallelizable version - each worker can detect one dispensary at a time
*/
async function processSingleDetectionJob(job) {
if (!job.dispensaryId) {
throw new Error('Single detection job requires dispensary_id');
}
const { detectAndResolveDispensary } = await Promise.resolve().then(() => __importStar(require('./menu-detection')));
// Get dispensary details
const { rows } = await (0, connection_1.query)(`SELECT ${DISPENSARY_COLUMNS} FROM dispensaries WHERE id = $1`, [job.dispensaryId]);
if (rows.length === 0) {
throw new Error(`Dispensary ${job.dispensaryId} not found`);
}
const dispensary = rows[0];
// Skip if already detected or failed
if (dispensary.failed_at) {
console.log(`[Worker] Skipping dispensary ${job.dispensaryId} - already flagged as failed`);
await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 0 });
return;
}
if (dispensary.menu_type && dispensary.menu_type !== 'unknown') {
console.log(`[Worker] Skipping dispensary ${job.dispensaryId} - already detected as ${dispensary.menu_type}`);
await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 1 });
return;
}
console.log(`[Worker] Detecting menu for dispensary ${job.dispensaryId} (${dispensary.name})...`);
try {
const result = await detectAndResolveDispensary(job.dispensaryId);
if (result.success) {
console.log(`[Worker] Dispensary ${job.dispensaryId}: detected ${result.detectedProvider}, platformId=${result.platformDispensaryId || 'none'}`);
await (0, job_queue_1.completeJob)(job.id, {
productsFound: 1,
productsUpserted: result.platformDispensaryId ? 1 : 0,
});
}
else {
// Detection failed - record failure
await recordCrawlFailure(job.dispensaryId, result.error || 'Detection failed');
throw new Error(result.error || 'Detection failed');
}
}
catch (error) {
// Record the failure
const wasFlagged = await recordCrawlFailure(job.dispensaryId, error.message);
if (wasFlagged) {
// Dispensary is now flagged - complete the job rather than fail it
await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 0 });
}
else {
throw error;
}
}
}
// ============================================================
// SHUTDOWN HANDLING
// ============================================================
function setupShutdownHandlers() {
const shutdown = async (signal) => {
if (shutdownPromise)
return shutdownPromise;
console.log(`\n[Worker] Received ${signal}, shutting down...`);
shutdownPromise = stopWorker();
await shutdownPromise;
process.exit(0);
};
process.on('SIGTERM', () => shutdown('SIGTERM'));
process.on('SIGINT', () => shutdown('SIGINT'));
}
// ============================================================
// STANDALONE WORKER ENTRY POINT
// ============================================================
if (require.main === module) {
// Run as standalone worker
startWorker().catch((error) => {
console.error('[Worker] Fatal error:', error);
process.exit(1);
});
}