The job_run_logs table tracks scheduled job orchestration, not individual worker jobs. Worker info (worker_id, worker_hostname) belongs on dispensary_crawl_jobs, not job_run_logs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
441 lines
17 KiB
JavaScript
441 lines
17 KiB
JavaScript
"use strict";
|
|
/**
|
|
* Worker Service
|
|
*
|
|
* Polls the job queue and processes crawl jobs.
|
|
* Each worker instance runs independently, claiming jobs atomically.
|
|
*/
|
|
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
if (k2 === undefined) k2 = k;
|
|
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
}
|
|
Object.defineProperty(o, k2, desc);
|
|
}) : (function(o, m, k, k2) {
|
|
if (k2 === undefined) k2 = k;
|
|
o[k2] = m[k];
|
|
}));
|
|
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
}) : function(o, v) {
|
|
o["default"] = v;
|
|
});
|
|
var __importStar = (this && this.__importStar) || (function () {
|
|
var ownKeys = function(o) {
|
|
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
var ar = [];
|
|
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
return ar;
|
|
};
|
|
return ownKeys(o);
|
|
};
|
|
return function (mod) {
|
|
if (mod && mod.__esModule) return mod;
|
|
var result = {};
|
|
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
__setModuleDefault(result, mod);
|
|
return result;
|
|
};
|
|
})();
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.startWorker = startWorker;
|
|
exports.stopWorker = stopWorker;
|
|
exports.getWorkerStatus = getWorkerStatus;
|
|
const job_queue_1 = require("./job-queue");
|
|
const product_crawler_1 = require("./product-crawler");
|
|
const discovery_1 = require("./discovery");
|
|
const connection_1 = require("../db/connection");
|
|
// Explicit column list for dispensaries table (avoids SELECT * issues with schema differences)
|
|
// NOTE: failed_at is included for worker compatibility checks
|
|
const DISPENSARY_COLUMNS = `
|
|
id, name, slug, city, state, zip, address, latitude, longitude,
|
|
menu_type, menu_url, platform_dispensary_id, website,
|
|
provider_detection_data, created_at, updated_at, failed_at
|
|
`;
|
|
// ============================================================
|
|
// WORKER CONFIG
|
|
// ============================================================
|
|
const POLL_INTERVAL_MS = 5000; // Check for jobs every 5 seconds
|
|
const HEARTBEAT_INTERVAL_MS = 60000; // Send heartbeat every 60 seconds
|
|
const STALE_CHECK_INTERVAL_MS = 300000; // Check for stale jobs every 5 minutes
|
|
const SHUTDOWN_GRACE_PERIOD_MS = 30000; // Wait 30s for job to complete on shutdown
|
|
// ============================================================
|
|
// WORKER STATE
|
|
// ============================================================
|
|
let isRunning = false;
|
|
let currentJob = null;
|
|
let pollTimer = null;
|
|
let heartbeatTimer = null;
|
|
let staleCheckTimer = null;
|
|
let shutdownPromise = null;
|
|
// ============================================================
|
|
// WORKER LIFECYCLE
|
|
// ============================================================
|
|
/**
|
|
* Start the worker
|
|
*/
|
|
async function startWorker() {
|
|
if (isRunning) {
|
|
console.log('[Worker] Already running');
|
|
return;
|
|
}
|
|
const workerId = (0, job_queue_1.getWorkerId)();
|
|
const hostname = (0, job_queue_1.getWorkerHostname)();
|
|
console.log(`[Worker] Starting worker ${workerId} on ${hostname}`);
|
|
isRunning = true;
|
|
// Set up graceful shutdown
|
|
setupShutdownHandlers();
|
|
// Start polling for jobs
|
|
pollTimer = setInterval(pollForJobs, POLL_INTERVAL_MS);
|
|
// Start stale job recovery (only one worker should do this, but it's idempotent)
|
|
staleCheckTimer = setInterval(async () => {
|
|
try {
|
|
await (0, job_queue_1.recoverStaleJobs)(15);
|
|
}
|
|
catch (error) {
|
|
console.error('[Worker] Error recovering stale jobs:', error);
|
|
}
|
|
}, STALE_CHECK_INTERVAL_MS);
|
|
// Immediately poll for a job
|
|
await pollForJobs();
|
|
console.log(`[Worker] Worker ${workerId} started, polling every ${POLL_INTERVAL_MS}ms`);
|
|
}
|
|
/**
|
|
* Stop the worker gracefully
|
|
*/
|
|
async function stopWorker() {
|
|
if (!isRunning)
|
|
return;
|
|
console.log('[Worker] Stopping worker...');
|
|
isRunning = false;
|
|
// Clear timers
|
|
if (pollTimer) {
|
|
clearInterval(pollTimer);
|
|
pollTimer = null;
|
|
}
|
|
if (heartbeatTimer) {
|
|
clearInterval(heartbeatTimer);
|
|
heartbeatTimer = null;
|
|
}
|
|
if (staleCheckTimer) {
|
|
clearInterval(staleCheckTimer);
|
|
staleCheckTimer = null;
|
|
}
|
|
// Wait for current job to complete
|
|
if (currentJob) {
|
|
console.log(`[Worker] Waiting for job ${currentJob.id} to complete...`);
|
|
const startWait = Date.now();
|
|
while (currentJob && Date.now() - startWait < SHUTDOWN_GRACE_PERIOD_MS) {
|
|
await new Promise(r => setTimeout(r, 1000));
|
|
}
|
|
if (currentJob) {
|
|
console.log(`[Worker] Job ${currentJob.id} did not complete in time, marking for retry`);
|
|
await (0, job_queue_1.failJob)(currentJob.id, 'Worker shutdown');
|
|
}
|
|
}
|
|
console.log('[Worker] Worker stopped');
|
|
}
|
|
/**
|
|
* Get worker status
|
|
*/
|
|
function getWorkerStatus() {
|
|
return {
|
|
isRunning,
|
|
workerId: (0, job_queue_1.getWorkerId)(),
|
|
hostname: (0, job_queue_1.getWorkerHostname)(),
|
|
currentJob,
|
|
};
|
|
}
|
|
// ============================================================
|
|
// JOB PROCESSING
|
|
// ============================================================
|
|
/**
|
|
* Poll for and process the next available job
|
|
*/
|
|
async function pollForJobs() {
|
|
if (!isRunning || currentJob) {
|
|
return; // Already processing a job
|
|
}
|
|
try {
|
|
const workerId = (0, job_queue_1.getWorkerId)();
|
|
// Try to claim a job
|
|
const job = await (0, job_queue_1.claimNextJob)({
|
|
workerId,
|
|
jobTypes: ['dutchie_product_crawl', 'menu_detection', 'menu_detection_single'],
|
|
lockDurationMinutes: 30,
|
|
});
|
|
if (!job) {
|
|
return; // No jobs available
|
|
}
|
|
currentJob = job;
|
|
console.log(`[Worker] Processing job ${job.id} (type=${job.jobType}, dispensary=${job.dispensaryId})`);
|
|
// Start heartbeat for this job
|
|
heartbeatTimer = setInterval(async () => {
|
|
if (currentJob) {
|
|
try {
|
|
await (0, job_queue_1.heartbeat)(currentJob.id);
|
|
}
|
|
catch (error) {
|
|
console.error('[Worker] Heartbeat error:', error);
|
|
}
|
|
}
|
|
}, HEARTBEAT_INTERVAL_MS);
|
|
// Process the job
|
|
await processJob(job);
|
|
}
|
|
catch (error) {
|
|
console.error('[Worker] Error polling for jobs:', error);
|
|
if (currentJob) {
|
|
try {
|
|
await (0, job_queue_1.failJob)(currentJob.id, error.message);
|
|
}
|
|
catch (failError) {
|
|
console.error('[Worker] Error failing job:', failError);
|
|
}
|
|
}
|
|
}
|
|
finally {
|
|
// Clear heartbeat timer
|
|
if (heartbeatTimer) {
|
|
clearInterval(heartbeatTimer);
|
|
heartbeatTimer = null;
|
|
}
|
|
currentJob = null;
|
|
}
|
|
}
|
|
/**
|
|
* Process a single job
|
|
*/
|
|
async function processJob(job) {
|
|
try {
|
|
switch (job.jobType) {
|
|
case 'dutchie_product_crawl':
|
|
await processProductCrawlJob(job);
|
|
break;
|
|
case 'menu_detection':
|
|
await processMenuDetectionJob(job);
|
|
break;
|
|
case 'menu_detection_single':
|
|
await processSingleDetectionJob(job);
|
|
break;
|
|
default:
|
|
throw new Error(`Unknown job type: ${job.jobType}`);
|
|
}
|
|
}
|
|
catch (error) {
|
|
console.error(`[Worker] Job ${job.id} failed:`, error);
|
|
await (0, job_queue_1.failJob)(job.id, error.message);
|
|
}
|
|
}
|
|
// Maximum consecutive failures before flagging a dispensary
|
|
const MAX_CONSECUTIVE_FAILURES = 3;
|
|
/**
|
|
* Record a successful crawl - resets failure counter
|
|
*/
|
|
async function recordCrawlSuccess(dispensaryId) {
|
|
await (0, connection_1.query)(`UPDATE dispensaries
|
|
SET consecutive_failures = 0,
|
|
last_crawl_at = NOW(),
|
|
updated_at = NOW()
|
|
WHERE id = $1`, [dispensaryId]);
|
|
}
|
|
/**
|
|
* Record a crawl failure - increments counter and may flag dispensary
|
|
* Returns true if dispensary was flagged as failed
|
|
*/
|
|
async function recordCrawlFailure(dispensaryId, errorMessage) {
|
|
// Increment failure counter
|
|
const { rows } = await (0, connection_1.query)(`UPDATE dispensaries
|
|
SET consecutive_failures = consecutive_failures + 1,
|
|
last_failure_at = NOW(),
|
|
last_failure_reason = $2,
|
|
updated_at = NOW()
|
|
WHERE id = $1
|
|
RETURNING consecutive_failures`, [dispensaryId, errorMessage]);
|
|
const failures = rows[0]?.consecutive_failures || 0;
|
|
// If we've hit the threshold, flag the dispensary as failed
|
|
if (failures >= MAX_CONSECUTIVE_FAILURES) {
|
|
await (0, connection_1.query)(`UPDATE dispensaries
|
|
SET failed_at = NOW(),
|
|
menu_type = NULL,
|
|
platform_dispensary_id = NULL,
|
|
failure_notes = $2,
|
|
updated_at = NOW()
|
|
WHERE id = $1`, [dispensaryId, `Auto-flagged after ${failures} consecutive failures. Last error: ${errorMessage}`]);
|
|
console.log(`[Worker] Dispensary ${dispensaryId} flagged as FAILED after ${failures} consecutive failures`);
|
|
return true;
|
|
}
|
|
console.log(`[Worker] Dispensary ${dispensaryId} failure recorded (${failures}/${MAX_CONSECUTIVE_FAILURES})`);
|
|
return false;
|
|
}
|
|
/**
|
|
* Process a product crawl job for a single dispensary
|
|
*/
|
|
async function processProductCrawlJob(job) {
|
|
if (!job.dispensaryId) {
|
|
throw new Error('Product crawl job requires dispensary_id');
|
|
}
|
|
// Get dispensary details
|
|
const { rows } = await (0, connection_1.query)(`SELECT ${DISPENSARY_COLUMNS} FROM dispensaries WHERE id = $1`, [job.dispensaryId]);
|
|
if (rows.length === 0) {
|
|
throw new Error(`Dispensary ${job.dispensaryId} not found`);
|
|
}
|
|
const dispensary = (0, discovery_1.mapDbRowToDispensary)(rows[0]);
|
|
// Check if dispensary is already flagged as failed
|
|
if (rows[0].failed_at) {
|
|
console.log(`[Worker] Skipping dispensary ${job.dispensaryId} - already flagged as failed`);
|
|
await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 0 });
|
|
return;
|
|
}
|
|
if (!dispensary.platformDispensaryId) {
|
|
// Record failure and potentially flag
|
|
await recordCrawlFailure(job.dispensaryId, 'Missing platform_dispensary_id');
|
|
throw new Error(`Dispensary ${job.dispensaryId} has no platform_dispensary_id`);
|
|
}
|
|
// Get crawl options from job metadata
|
|
const pricingType = job.metadata?.pricingType || 'rec';
|
|
const useBothModes = job.metadata?.useBothModes !== false;
|
|
try {
|
|
// Crawl the dispensary
|
|
const result = await (0, product_crawler_1.crawlDispensaryProducts)(dispensary, pricingType, {
|
|
useBothModes,
|
|
onProgress: async (progress) => {
|
|
// Update progress for live monitoring
|
|
await (0, job_queue_1.updateJobProgress)(job.id, {
|
|
productsFound: progress.productsFound,
|
|
productsUpserted: progress.productsUpserted,
|
|
snapshotsCreated: progress.snapshotsCreated,
|
|
currentPage: progress.currentPage,
|
|
totalPages: progress.totalPages,
|
|
});
|
|
},
|
|
});
|
|
if (result.success) {
|
|
// Success! Reset failure counter
|
|
await recordCrawlSuccess(job.dispensaryId);
|
|
await (0, job_queue_1.completeJob)(job.id, {
|
|
productsFound: result.productsFetched,
|
|
productsUpserted: result.productsUpserted,
|
|
snapshotsCreated: result.snapshotsCreated,
|
|
});
|
|
}
|
|
else {
|
|
// Crawl returned failure - record it
|
|
const wasFlagged = await recordCrawlFailure(job.dispensaryId, result.errorMessage || 'Crawl failed');
|
|
if (wasFlagged) {
|
|
// Don't throw - the dispensary is now flagged, job is "complete"
|
|
await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 0 });
|
|
}
|
|
else {
|
|
throw new Error(result.errorMessage || 'Crawl failed');
|
|
}
|
|
}
|
|
}
|
|
catch (error) {
|
|
// Record the failure
|
|
const wasFlagged = await recordCrawlFailure(job.dispensaryId, error.message);
|
|
if (wasFlagged) {
|
|
// Dispensary is now flagged - complete the job rather than fail it
|
|
await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 0 });
|
|
}
|
|
else {
|
|
throw error;
|
|
}
|
|
}
|
|
}
|
|
/**
|
|
* Process a menu detection job (bulk)
|
|
*/
|
|
async function processMenuDetectionJob(job) {
|
|
const { executeMenuDetectionJob } = await Promise.resolve().then(() => __importStar(require('./menu-detection')));
|
|
const config = job.metadata || {};
|
|
const result = await executeMenuDetectionJob(config);
|
|
if (result.status === 'error') {
|
|
throw new Error(result.errorMessage || 'Menu detection failed');
|
|
}
|
|
await (0, job_queue_1.completeJob)(job.id, {
|
|
productsFound: result.itemsProcessed,
|
|
productsUpserted: result.itemsSucceeded,
|
|
});
|
|
}
|
|
/**
|
|
* Process a single dispensary menu detection job
|
|
* This is the parallelizable version - each worker can detect one dispensary at a time
|
|
*/
|
|
async function processSingleDetectionJob(job) {
|
|
if (!job.dispensaryId) {
|
|
throw new Error('Single detection job requires dispensary_id');
|
|
}
|
|
const { detectAndResolveDispensary } = await Promise.resolve().then(() => __importStar(require('./menu-detection')));
|
|
// Get dispensary details
|
|
const { rows } = await (0, connection_1.query)(`SELECT ${DISPENSARY_COLUMNS} FROM dispensaries WHERE id = $1`, [job.dispensaryId]);
|
|
if (rows.length === 0) {
|
|
throw new Error(`Dispensary ${job.dispensaryId} not found`);
|
|
}
|
|
const dispensary = rows[0];
|
|
// Skip if already detected or failed
|
|
if (dispensary.failed_at) {
|
|
console.log(`[Worker] Skipping dispensary ${job.dispensaryId} - already flagged as failed`);
|
|
await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 0 });
|
|
return;
|
|
}
|
|
if (dispensary.menu_type && dispensary.menu_type !== 'unknown') {
|
|
console.log(`[Worker] Skipping dispensary ${job.dispensaryId} - already detected as ${dispensary.menu_type}`);
|
|
await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 1 });
|
|
return;
|
|
}
|
|
console.log(`[Worker] Detecting menu for dispensary ${job.dispensaryId} (${dispensary.name})...`);
|
|
try {
|
|
const result = await detectAndResolveDispensary(job.dispensaryId);
|
|
if (result.success) {
|
|
console.log(`[Worker] Dispensary ${job.dispensaryId}: detected ${result.detectedProvider}, platformId=${result.platformDispensaryId || 'none'}`);
|
|
await (0, job_queue_1.completeJob)(job.id, {
|
|
productsFound: 1,
|
|
productsUpserted: result.platformDispensaryId ? 1 : 0,
|
|
});
|
|
}
|
|
else {
|
|
// Detection failed - record failure
|
|
await recordCrawlFailure(job.dispensaryId, result.error || 'Detection failed');
|
|
throw new Error(result.error || 'Detection failed');
|
|
}
|
|
}
|
|
catch (error) {
|
|
// Record the failure
|
|
const wasFlagged = await recordCrawlFailure(job.dispensaryId, error.message);
|
|
if (wasFlagged) {
|
|
// Dispensary is now flagged - complete the job rather than fail it
|
|
await (0, job_queue_1.completeJob)(job.id, { productsFound: 0, productsUpserted: 0 });
|
|
}
|
|
else {
|
|
throw error;
|
|
}
|
|
}
|
|
}
|
|
// ============================================================
|
|
// SHUTDOWN HANDLING
|
|
// ============================================================
|
|
function setupShutdownHandlers() {
|
|
const shutdown = async (signal) => {
|
|
if (shutdownPromise)
|
|
return shutdownPromise;
|
|
console.log(`\n[Worker] Received ${signal}, shutting down...`);
|
|
shutdownPromise = stopWorker();
|
|
await shutdownPromise;
|
|
process.exit(0);
|
|
};
|
|
process.on('SIGTERM', () => shutdown('SIGTERM'));
|
|
process.on('SIGINT', () => shutdown('SIGINT'));
|
|
}
|
|
// ============================================================
|
|
// STANDALONE WORKER ENTRY POINT
|
|
// ============================================================
|
|
if (require.main === module) {
|
|
// Run as standalone worker
|
|
startWorker().catch((error) => {
|
|
console.error('[Worker] Fatal error:', error);
|
|
process.exit(1);
|
|
});
|
|
}
|