Files
cannaiq/backend/dist/services/crawler-logger.js
Kelly 66e07b2009 fix(monitor): remove non-existent worker columns from job_run_logs query
The job_run_logs table tracks scheduled job orchestration, not individual
worker jobs. Worker info (worker_id, worker_hostname) belongs on
dispensary_crawl_jobs, not job_run_logs.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-03 18:45:05 -07:00

203 lines
6.3 KiB
JavaScript

"use strict";
/**
* CrawlerLogger - Structured logging for crawler operations
*
* High-signal, low-noise logging with JSON output for:
* - Job lifecycle (one summary per job)
* - Provider/mode changes
* - Sandbox events
* - Queue failures
*
* NO per-product logging - that's too noisy.
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.crawlerLogger = void 0;
class CrawlerLoggerService {
formatLog(payload) {
return JSON.stringify(payload);
}
log(payload) {
const formatted = this.formatLog(payload);
switch (payload.level) {
case 'error':
console.error(`[CRAWLER] ${formatted}`);
break;
case 'warn':
console.warn(`[CRAWLER] ${formatted}`);
break;
case 'debug':
console.debug(`[CRAWLER] ${formatted}`);
break;
default:
console.log(`[CRAWLER] ${formatted}`);
}
}
/**
* Log when a crawl job starts
*/
jobStarted(params) {
this.log({
timestamp: new Date().toISOString(),
level: 'info',
event: 'job_started',
job_id: params.job_id,
store_id: params.store_id,
store_name: params.store_name,
job_type: params.job_type,
trigger_type: params.trigger_type,
provider: params.provider,
});
}
/**
* Log when a crawl job completes successfully
*/
jobCompleted(params) {
this.log({
timestamp: new Date().toISOString(),
level: 'info',
event: 'job_completed',
job_id: params.job_id,
store_id: params.store_id,
store_name: params.store_name,
duration_ms: params.duration_ms,
products_found: params.products_found,
products_new: params.products_new,
products_updated: params.products_updated,
products_marked_oos: params.products_marked_oos,
provider: params.provider,
});
}
/**
* Log when a crawl job fails
*/
jobFailed(params) {
this.log({
timestamp: new Date().toISOString(),
level: 'error',
event: 'job_failed',
job_id: params.job_id,
store_id: params.store_id,
store_name: params.store_name,
duration_ms: params.duration_ms,
error_message: params.error_message,
error_code: params.error_code,
provider: params.provider,
});
}
/**
* Log when a provider is detected for a dispensary
*/
providerDetected(params) {
this.log({
timestamp: new Date().toISOString(),
level: 'info',
event: 'provider_detected',
dispensary_id: params.dispensary_id,
dispensary_name: params.dispensary_name,
detected_provider: params.detected_provider,
confidence: params.confidence,
detection_method: params.detection_method,
menu_url: params.menu_url,
category: params.category,
});
}
/**
* Log when a dispensary's provider changes
*/
providerChanged(params) {
this.log({
timestamp: new Date().toISOString(),
level: 'info',
event: 'provider_changed',
dispensary_id: params.dispensary_id,
dispensary_name: params.dispensary_name,
old_provider: params.old_provider,
new_provider: params.new_provider,
old_confidence: params.old_confidence,
new_confidence: params.new_confidence,
category: params.category,
});
}
/**
* Log when a dispensary's crawler mode changes (sandbox -> production, etc.)
*/
modeChanged(params) {
this.log({
timestamp: new Date().toISOString(),
level: 'info',
event: 'mode_changed',
dispensary_id: params.dispensary_id,
dispensary_name: params.dispensary_name,
old_mode: params.old_mode,
new_mode: params.new_mode,
reason: params.reason,
category: params.category,
provider: params.provider,
});
}
/**
* Log sandbox crawl events
*/
sandboxEvent(params) {
const level = params.event === 'sandbox_failed' ? 'error' : 'info';
this.log({
timestamp: new Date().toISOString(),
level,
event: params.event,
dispensary_id: params.dispensary_id,
dispensary_name: params.dispensary_name,
template_name: params.template_name,
category: params.category,
quality_score: params.quality_score,
products_extracted: params.products_extracted,
fields_missing: params.fields_missing,
error_message: params.error_message,
provider: params.provider,
});
}
/**
* Log queue processing failures
*/
queueFailure(params) {
this.log({
timestamp: new Date().toISOString(),
level: 'error',
event: 'queue_failure',
queue_type: params.queue_type,
error_message: params.error_message,
affected_items: params.affected_items,
});
}
/**
* Log detection scan summary
*/
detectionScan(params) {
this.log({
timestamp: new Date().toISOString(),
level: 'info',
event: 'detection_scan',
total_scanned: params.total_scanned,
detected: params.detected,
failed: params.failed,
skipped: params.skipped,
duration_ms: params.duration_ms,
});
}
/**
* Log intelligence run summary
*/
intelligenceRun(params) {
this.log({
timestamp: new Date().toISOString(),
level: 'info',
event: 'intelligence_run',
run_type: params.run_type,
dispensaries_processed: params.dispensaries_processed,
jobs_queued: params.jobs_queued,
duration_ms: params.duration_ms,
});
}
}
// Export singleton instance
exports.crawlerLogger = new CrawlerLoggerService();