fix(monitor): remove non-existent worker columns from job_run_logs query
The job_run_logs table tracks scheduled job orchestration, not individual worker jobs. Worker info (worker_id, worker_hostname) belongs on dispensary_crawl_jobs, not job_run_logs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
440
backend/dist/dutchie-az/services/worker.js
vendored
Normal file
440
backend/dist/dutchie-az/services/worker.js
vendored
Normal file
@@ -0,0 +1,440 @@
|
||||
"use strict";
|
||||
/**
|
||||
* Worker Service
|
||||
*
|
||||
* Polls the job queue and processes crawl jobs.
|
||||
* Each worker instance runs independently, claiming jobs atomically.
|
||||
*/
|
||||
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
||||
if (k2 === undefined) k2 = k;
|
||||
var desc = Object.getOwnPropertyDescriptor(m, k);
|
||||
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
||||
desc = { enumerable: true, get: function() { return m[k]; } };
|
||||
}
|
||||
Object.defineProperty(o, k2, desc);
|
||||
}) : (function(o, m, k, k2) {
|
||||
if (k2 === undefined) k2 = k;
|
||||
o[k2] = m[k];
|
||||
}));
|
||||
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
||||
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
||||
}) : function(o, v) {
|
||||
o["default"] = v;
|
||||
});
|
||||
var __importStar = (this && this.__importStar) || (function () {
|
||||
var ownKeys = function(o) {
|
||||
ownKeys = Object.getOwnPropertyNames || function (o) {
|
||||
var ar = [];
|
||||
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
||||
return ar;
|
||||
};
|
||||
return ownKeys(o);
|
||||
};
|
||||
return function (mod) {
|
||||
if (mod && mod.__esModule) return mod;
|
||||
var result = {};
|
||||
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
||||
__setModuleDefault(result, mod);
|
||||
return result;
|
||||
};
|
||||
})();
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.startWorker = startWorker;
|
||||
exports.stopWorker = stopWorker;
|
||||
exports.getWorkerStatus = getWorkerStatus;
|
||||
const job_queue_1 = require("./job-queue");
|
||||
const product_crawler_1 = require("./product-crawler");
|
||||
const discovery_1 = require("./discovery");
|
||||
const connection_1 = require("../db/connection");
|
||||
// Explicit column list for dispensaries table (avoids SELECT * issues with schema differences)
|
||||
// NOTE: failed_at is included for worker compatibility checks
|
||||
const DISPENSARY_COLUMNS = `
|
||||
id, name, slug, city, state, zip, address, latitude, longitude,
|
||||
menu_type, menu_url, platform_dispensary_id, website,
|
||||
provider_detection_data, created_at, updated_at, failed_at
|
||||
`;
|
||||
// ============================================================
|
||||
// WORKER CONFIG
|
||||
// ============================================================
|
||||
const POLL_INTERVAL_MS = 5000; // Check for jobs every 5 seconds
|
||||
const HEARTBEAT_INTERVAL_MS = 60000; // Send heartbeat every 60 seconds
|
||||
const STALE_CHECK_INTERVAL_MS = 300000; // Check for stale jobs every 5 minutes
|
||||
const SHUTDOWN_GRACE_PERIOD_MS = 30000; // Wait 30s for job to complete on shutdown
|
||||
// ============================================================
|
||||
// WORKER STATE
|
||||
// ============================================================
|
||||
let isRunning = false;
|
||||
let currentJob = null;
|
||||
let pollTimer = null;
|
||||
let heartbeatTimer = null;
|
||||
let staleCheckTimer = null;
|
||||
let shutdownPromise = null;
|
||||
// ============================================================
|
||||
// WORKER LIFECYCLE
|
||||
// ============================================================
|
||||
/**
|
||||
* Start the worker
|
||||
*/
|
||||
async function startWorker() {
|
||||
if (isRunning) {
|
||||
console.log('[Worker] Already running');
|
||||
return;
|
||||
}
|
||||
const workerId = (0, job_queue_1.getWorkerId)();
|
||||
const hostname = (0, job_queue_1.getWorkerHostname)();
|
||||
console.log(`[Worker] Starting worker ${workerId} on ${hostname}`);
|
||||
isRunning = true;
|
||||
// Set up graceful shutdown
|
||||
setupShutdownHandlers();
|
||||
// Start polling for jobs
|
||||
pollTimer = setInterval(pollForJobs, POLL_INTERVAL_MS);
|
||||
// Start stale job recovery (only one worker should do this, but it's idempotent)
|
||||
staleCheckTimer = setInterval(async () => {
|
||||
try {
|
||||
await (0, job_queue_1.recoverStaleJobs)(15);
|
||||
}
|
||||
catch (error) {
|
||||
console.error('[Worker] Error recovering stale jobs:', error);
|
||||
}
|
||||
}, STALE_CHECK_INTERVAL_MS);
|
||||
// Immediately poll for a job
|
||||
await pollForJobs();
|
||||
console.log(`[Worker] Worker ${workerId} started, polling every ${POLL_INTERVAL_MS}ms`);
|
||||
}
|
||||
/**
|
||||
* Stop the worker gracefully
|
||||
*/
|
||||
async function stopWorker() {
|
||||
if (!isRunning)
|
||||
return;
|
||||
console.log('[Worker] Stopping worker...');
|
||||
isRunning = false;
|
||||
// Clear timers
|
||||
if (pollTimer) {
|
||||
clearInterval(pollTimer);
|
||||
pollTimer = null;
|
||||
}
|
||||
if (heartbeatTimer) {
|
||||
clearInterval(heartbeatTimer);
|
||||
heartbeatTimer = null;
|
||||
}
|
||||
if (staleCheckTimer) {
|
||||
clearInterval(staleCheckTimer);
|
||||
staleCheckTimer = null;
|
||||
}
|
||||
// Wait for current job to complete
|
||||
if (currentJob) {
|
||||
console.log(`[Worker] Waiting for job ${currentJob.id} to complete...`);
|
||||
const startWait = Date.now();
|
||||
while (currentJob && Date.now() - startWait < SHUTDOWN_GRACE_PERIOD_MS) {
|
||||
await new Promise(r => setTimeout(r, 1000));
|
||||
}
|
||||
if (currentJob) {
|
||||
console.log(`[Worker] Job ${currentJob.id} did not complete in time, marking for retry`);
|
||||
await (0, job_queue_1.failJob)(currentJob.id, 'Worker shutdown');
|
||||
}
|
||||
}
|
||||
console.log('[Worker] Worker stopped');
|
||||
}
|
||||
/**
|
||||
* Get worker status
|
||||
*/
|
||||
function getWorkerStatus() {
|
||||
return {
|
||||
isRunning,
|
||||
workerId: (0, job_queue_1.getWorkerId)(),
|
||||
hostname: (0, job_queue_1.getWorkerHostname)(),
|
||||
currentJob,
|
||||
};
|
||||
}
|
||||
// ============================================================
|
||||
// JOB PROCESSING
|
||||
// ============================================================
|
||||
/**
|
||||
* Poll for and process the next available job
|
||||
*/
|
||||
async function pollForJobs() {
|
||||
if (!isRunning || currentJob) {
|
||||
return; // Already processing a job
|
||||
}
|
||||
try {
|
||||
const workerId = (0, job_queue_1.getWorkerId)();
|
||||
// Try to claim a job
|
||||
const job = await (0, job_queue_1.claimNextJob)({
|
||||
workerId,
|
||||
jobTypes: ['dutchie_product_crawl', 'menu_detection', 'menu_detection_single'],
|
||||
lockDurationMinutes: 30,
|
||||
});
|
||||
if (!job) {
|
||||
return; // No jobs available
|
||||
}
|
||||
currentJob = job;
|
||||
console.log(`[Worker] Processing job ${job.id} (type=${job.jobType}, dispensary=${job.dispensaryId})`);
|
||||
// Start heartbeat for this job
|
||||
heartbeatTimer = setInterval(async () => {
|
||||
if (currentJob) {
|
||||
try {
|
||||
await (0, job_queue_1.heartbeat)(currentJob.id);
|
||||
}
|
||||
catch (error) {
|
||||
console.error('[Worker] Heartbeat error:', error);
|
||||
}
|
||||
}
|
||||
}, HEARTBEAT_INTERVAL_MS);
|
||||
// Process the job
|
||||
await processJob(job);
|
||||
}
|
||||
catch (error) {
|
||||
console.error('[Worker] Error polling for jobs:', error);
|
||||
if (currentJob) {
|
||||
try {
|
||||
await (0, job_queue_1.failJob)(currentJob.id, error.message);
|
||||
}
|
||||
catch (failError) {
|
||||
console.error('[Worker] Error failing job:', failError);
|
||||
}
|
||||
}
|
||||
}
|
||||
finally {
|
||||
// Clear heartbeat timer
|
||||
if (heartbeatTimer) {
|
||||
clearInterval(heartbeatTimer);
|
||||
heartbeatTimer = null;
|
||||
}
|
||||
currentJob = null;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Process a single job
|
||||
*/
|
||||
async function processJob(job) {
|
||||
try {
|
||||
switch (job.jobType) {
|
||||
case 'dutchie_product_crawl':
|
||||
await processProductCrawlJob(job);
|
||||
break;
|
||||
case 'menu_detection':
|
||||
await processMenuDetectionJob(job);
|
||||
break;
|
||||
case 'menu_detection_single':
|
||||
await processSingleDetectionJob(job);
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unknown job type: ${job.jobType}`);
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
console.error(`[Worker] Job ${job.id} failed:`, error);
|
||||
await (0, job_queue_1.failJob)(job.id, error.message);
|
||||
}
|
||||
}
|
||||
// Maximum consecutive failures before flagging a dispensary
|
||||
const MAX_CONSECUTIVE_FAILURES = 3;
|
||||
/**
|
||||
* Record a successful crawl - resets failure counter
|
||||
*/
|
||||
async function recordCrawlSuccess(dispensaryId) {
|
||||
await (0, connection_1.query)(`UPDATE dispensaries
|
||||
SET consecutive_failures = 0,
|
||||
last_crawl_at = NOW(),
|
||||
updated_at = NOW()
|
||||
WHERE id = $1`, [dispensaryId]);
|
||||
}
|
||||
/**
|
||||
* Record a crawl failure - increments counter and may flag dispensary
|
||||
* Returns true if dispensary was flagged as failed
|
||||
*/
|
||||
async function recordCrawlFailure(dispensaryId, errorMessage) {
|
||||
// Increment failure counter
|
||||
const { rows } = await (0, connection_1.query)(`UPDATE dispensaries
|
||||
SET consecutive_failures = consecutive_failures + 1,
|
||||
last_failure_at = NOW(),
|
||||
last_failure_reason = $2,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
RETURNING consecutive_failures`, [dispensaryId, errorMessage]);
|
||||
const failures = rows[0]?.consecutive_failures || 0;
|
||||
// If we've hit the threshold, flag the dispensary as failed
|
||||
if (failures >= MAX_CONSECUTIVE_FAILURES) {
|
||||
await (0, connection_1.query)(`UPDATE dispensaries
|
||||
SET failed_at = NOW(),
|
||||
menu_type = NULL,
|
||||
platform_dispensary_id = NULL,
|
||||
failure_notes = $2,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1`, [dispensaryId, `Auto-flagged after ${failures} consecutive failures. Last error: ${errorMessage}`]);
|
||||
console.log(`[Worker] Dispensary ${dispensaryId} flagged as FAILED after ${failures} consecutive failures`);
|
||||
return true;
|
||||
}
|
||||
console.log(`[Worker] Dispensary ${dispensaryId} failure recorded (${failures}/${MAX_CONSECUTIVE_FAILURES})`);
|
||||
return false;
|
||||
}
|
||||
/**
|
||||
* Process a product crawl job for a single dispensary
|
||||
*/
|
||||
async function processProductCrawlJob(job) {
|
||||
if (!job.dispensaryId) {
|
||||
throw new Error('Product crawl job requires dispensary_id');
|
||||
}
|
||||
// Get dispensary details
|
||||
const { rows } = await (0, connection_1.query)(`SELECT ${DISPENSARY_COLUMNS} FROM dispensaries WHERE id = $1`, [job.dispensaryId]);
|
||||
if (rows.length === 0) {
|
||||
throw new Error(`Dispensary ${job.dispensaryId} not found`);
|
||||
}
|
||||
const dispensary = (0, discovery_1.mapDbRowToDispensary)(rows[0]);
|
||||
// Check if dispensary is already flagged as failed
|
||||
if (rows[0].failed_at) {
|
||||
console.log(`[Worker] Skipping dispensary ${job.dispensaryId} - already flagged as failed`);
|
||||
await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 0 });
|
||||
return;
|
||||
}
|
||||
if (!dispensary.platformDispensaryId) {
|
||||
// Record failure and potentially flag
|
||||
await recordCrawlFailure(job.dispensaryId, 'Missing platform_dispensary_id');
|
||||
throw new Error(`Dispensary ${job.dispensaryId} has no platform_dispensary_id`);
|
||||
}
|
||||
// Get crawl options from job metadata
|
||||
const pricingType = job.metadata?.pricingType || 'rec';
|
||||
const useBothModes = job.metadata?.useBothModes !== false;
|
||||
try {
|
||||
// Crawl the dispensary
|
||||
const result = await (0, product_crawler_1.crawlDispensaryProducts)(dispensary, pricingType, {
|
||||
useBothModes,
|
||||
onProgress: async (progress) => {
|
||||
// Update progress for live monitoring
|
||||
await (0, job_queue_1.updateJobProgress)(job.id, {
|
||||
productsFound: progress.productsFound,
|
||||
productsUpserted: progress.productsUpserted,
|
||||
snapshotsCreated: progress.snapshotsCreated,
|
||||
currentPage: progress.currentPage,
|
||||
totalPages: progress.totalPages,
|
||||
});
|
||||
},
|
||||
});
|
||||
if (result.success) {
|
||||
// Success! Reset failure counter
|
||||
await recordCrawlSuccess(job.dispensaryId);
|
||||
await (0, job_queue_1.completeJob)(job.id, {
|
||||
productsFound: result.productsFetched,
|
||||
productsUpserted: result.productsUpserted,
|
||||
snapshotsCreated: result.snapshotsCreated,
|
||||
});
|
||||
}
|
||||
else {
|
||||
// Crawl returned failure - record it
|
||||
const wasFlagged = await recordCrawlFailure(job.dispensaryId, result.errorMessage || 'Crawl failed');
|
||||
if (wasFlagged) {
|
||||
// Don't throw - the dispensary is now flagged, job is "complete"
|
||||
await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 0 });
|
||||
}
|
||||
else {
|
||||
throw new Error(result.errorMessage || 'Crawl failed');
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
// Record the failure
|
||||
const wasFlagged = await recordCrawlFailure(job.dispensaryId, error.message);
|
||||
if (wasFlagged) {
|
||||
// Dispensary is now flagged - complete the job rather than fail it
|
||||
await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 0 });
|
||||
}
|
||||
else {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Process a menu detection job (bulk)
|
||||
*/
|
||||
async function processMenuDetectionJob(job) {
|
||||
const { executeMenuDetectionJob } = await Promise.resolve().then(() => __importStar(require('./menu-detection')));
|
||||
const config = job.metadata || {};
|
||||
const result = await executeMenuDetectionJob(config);
|
||||
if (result.status === 'error') {
|
||||
throw new Error(result.errorMessage || 'Menu detection failed');
|
||||
}
|
||||
await (0, job_queue_1.completeJob)(job.id, {
|
||||
productsFound: result.itemsProcessed,
|
||||
productsUpserted: result.itemsSucceeded,
|
||||
});
|
||||
}
|
||||
/**
|
||||
* Process a single dispensary menu detection job
|
||||
* This is the parallelizable version - each worker can detect one dispensary at a time
|
||||
*/
|
||||
async function processSingleDetectionJob(job) {
|
||||
if (!job.dispensaryId) {
|
||||
throw new Error('Single detection job requires dispensary_id');
|
||||
}
|
||||
const { detectAndResolveDispensary } = await Promise.resolve().then(() => __importStar(require('./menu-detection')));
|
||||
// Get dispensary details
|
||||
const { rows } = await (0, connection_1.query)(`SELECT ${DISPENSARY_COLUMNS} FROM dispensaries WHERE id = $1`, [job.dispensaryId]);
|
||||
if (rows.length === 0) {
|
||||
throw new Error(`Dispensary ${job.dispensaryId} not found`);
|
||||
}
|
||||
const dispensary = rows[0];
|
||||
// Skip if already detected or failed
|
||||
if (dispensary.failed_at) {
|
||||
console.log(`[Worker] Skipping dispensary ${job.dispensaryId} - already flagged as failed`);
|
||||
await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 0 });
|
||||
return;
|
||||
}
|
||||
if (dispensary.menu_type && dispensary.menu_type !== 'unknown') {
|
||||
console.log(`[Worker] Skipping dispensary ${job.dispensaryId} - already detected as ${dispensary.menu_type}`);
|
||||
await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 1 });
|
||||
return;
|
||||
}
|
||||
console.log(`[Worker] Detecting menu for dispensary ${job.dispensaryId} (${dispensary.name})...`);
|
||||
try {
|
||||
const result = await detectAndResolveDispensary(job.dispensaryId);
|
||||
if (result.success) {
|
||||
console.log(`[Worker] Dispensary ${job.dispensaryId}: detected ${result.detectedProvider}, platformId=${result.platformDispensaryId || 'none'}`);
|
||||
await (0, job_queue_1.completeJob)(job.id, {
|
||||
productsFound: 1,
|
||||
productsUpserted: result.platformDispensaryId ? 1 : 0,
|
||||
});
|
||||
}
|
||||
else {
|
||||
// Detection failed - record failure
|
||||
await recordCrawlFailure(job.dispensaryId, result.error || 'Detection failed');
|
||||
throw new Error(result.error || 'Detection failed');
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
// Record the failure
|
||||
const wasFlagged = await recordCrawlFailure(job.dispensaryId, error.message);
|
||||
if (wasFlagged) {
|
||||
// Dispensary is now flagged - complete the job rather than fail it
|
||||
await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 0 });
|
||||
}
|
||||
else {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
// ============================================================
|
||||
// SHUTDOWN HANDLING
|
||||
// ============================================================
|
||||
function setupShutdownHandlers() {
|
||||
const shutdown = async (signal) => {
|
||||
if (shutdownPromise)
|
||||
return shutdownPromise;
|
||||
console.log(`\n[Worker] Received ${signal}, shutting down...`);
|
||||
shutdownPromise = stopWorker();
|
||||
await shutdownPromise;
|
||||
process.exit(0);
|
||||
};
|
||||
process.on('SIGTERM', () => shutdown('SIGTERM'));
|
||||
process.on('SIGINT', () => shutdown('SIGINT'));
|
||||
}
|
||||
// ============================================================
|
||||
// STANDALONE WORKER ENTRY POINT
|
||||
// ============================================================
|
||||
if (require.main === module) {
|
||||
// Run as standalone worker
|
||||
startWorker().catch((error) => {
|
||||
console.error('[Worker] Fatal error:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user