The job_run_logs table tracks scheduled job orchestration, not individual worker jobs. Worker info (worker_id, worker_hostname) belongs on dispensary_crawl_jobs, not job_run_logs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
203 lines
6.3 KiB
JavaScript
203 lines
6.3 KiB
JavaScript
"use strict";
|
|
/**
|
|
* CrawlerLogger - Structured logging for crawler operations
|
|
*
|
|
* High-signal, low-noise logging with JSON output for:
|
|
* - Job lifecycle (one summary per job)
|
|
* - Provider/mode changes
|
|
* - Sandbox events
|
|
* - Queue failures
|
|
*
|
|
* NO per-product logging - that's too noisy.
|
|
*/
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.crawlerLogger = void 0;
|
|
class CrawlerLoggerService {
|
|
formatLog(payload) {
|
|
return JSON.stringify(payload);
|
|
}
|
|
log(payload) {
|
|
const formatted = this.formatLog(payload);
|
|
switch (payload.level) {
|
|
case 'error':
|
|
console.error(`[CRAWLER] ${formatted}`);
|
|
break;
|
|
case 'warn':
|
|
console.warn(`[CRAWLER] ${formatted}`);
|
|
break;
|
|
case 'debug':
|
|
console.debug(`[CRAWLER] ${formatted}`);
|
|
break;
|
|
default:
|
|
console.log(`[CRAWLER] ${formatted}`);
|
|
}
|
|
}
|
|
/**
|
|
* Log when a crawl job starts
|
|
*/
|
|
jobStarted(params) {
|
|
this.log({
|
|
timestamp: new Date().toISOString(),
|
|
level: 'info',
|
|
event: 'job_started',
|
|
job_id: params.job_id,
|
|
store_id: params.store_id,
|
|
store_name: params.store_name,
|
|
job_type: params.job_type,
|
|
trigger_type: params.trigger_type,
|
|
provider: params.provider,
|
|
});
|
|
}
|
|
/**
|
|
* Log when a crawl job completes successfully
|
|
*/
|
|
jobCompleted(params) {
|
|
this.log({
|
|
timestamp: new Date().toISOString(),
|
|
level: 'info',
|
|
event: 'job_completed',
|
|
job_id: params.job_id,
|
|
store_id: params.store_id,
|
|
store_name: params.store_name,
|
|
duration_ms: params.duration_ms,
|
|
products_found: params.products_found,
|
|
products_new: params.products_new,
|
|
products_updated: params.products_updated,
|
|
products_marked_oos: params.products_marked_oos,
|
|
provider: params.provider,
|
|
});
|
|
}
|
|
/**
|
|
* Log when a crawl job fails
|
|
*/
|
|
jobFailed(params) {
|
|
this.log({
|
|
timestamp: new Date().toISOString(),
|
|
level: 'error',
|
|
event: 'job_failed',
|
|
job_id: params.job_id,
|
|
store_id: params.store_id,
|
|
store_name: params.store_name,
|
|
duration_ms: params.duration_ms,
|
|
error_message: params.error_message,
|
|
error_code: params.error_code,
|
|
provider: params.provider,
|
|
});
|
|
}
|
|
/**
|
|
* Log when a provider is detected for a dispensary
|
|
*/
|
|
providerDetected(params) {
|
|
this.log({
|
|
timestamp: new Date().toISOString(),
|
|
level: 'info',
|
|
event: 'provider_detected',
|
|
dispensary_id: params.dispensary_id,
|
|
dispensary_name: params.dispensary_name,
|
|
detected_provider: params.detected_provider,
|
|
confidence: params.confidence,
|
|
detection_method: params.detection_method,
|
|
menu_url: params.menu_url,
|
|
category: params.category,
|
|
});
|
|
}
|
|
/**
|
|
* Log when a dispensary's provider changes
|
|
*/
|
|
providerChanged(params) {
|
|
this.log({
|
|
timestamp: new Date().toISOString(),
|
|
level: 'info',
|
|
event: 'provider_changed',
|
|
dispensary_id: params.dispensary_id,
|
|
dispensary_name: params.dispensary_name,
|
|
old_provider: params.old_provider,
|
|
new_provider: params.new_provider,
|
|
old_confidence: params.old_confidence,
|
|
new_confidence: params.new_confidence,
|
|
category: params.category,
|
|
});
|
|
}
|
|
/**
|
|
* Log when a dispensary's crawler mode changes (sandbox -> production, etc.)
|
|
*/
|
|
modeChanged(params) {
|
|
this.log({
|
|
timestamp: new Date().toISOString(),
|
|
level: 'info',
|
|
event: 'mode_changed',
|
|
dispensary_id: params.dispensary_id,
|
|
dispensary_name: params.dispensary_name,
|
|
old_mode: params.old_mode,
|
|
new_mode: params.new_mode,
|
|
reason: params.reason,
|
|
category: params.category,
|
|
provider: params.provider,
|
|
});
|
|
}
|
|
/**
|
|
* Log sandbox crawl events
|
|
*/
|
|
sandboxEvent(params) {
|
|
const level = params.event === 'sandbox_failed' ? 'error' : 'info';
|
|
this.log({
|
|
timestamp: new Date().toISOString(),
|
|
level,
|
|
event: params.event,
|
|
dispensary_id: params.dispensary_id,
|
|
dispensary_name: params.dispensary_name,
|
|
template_name: params.template_name,
|
|
category: params.category,
|
|
quality_score: params.quality_score,
|
|
products_extracted: params.products_extracted,
|
|
fields_missing: params.fields_missing,
|
|
error_message: params.error_message,
|
|
provider: params.provider,
|
|
});
|
|
}
|
|
/**
|
|
* Log queue processing failures
|
|
*/
|
|
queueFailure(params) {
|
|
this.log({
|
|
timestamp: new Date().toISOString(),
|
|
level: 'error',
|
|
event: 'queue_failure',
|
|
queue_type: params.queue_type,
|
|
error_message: params.error_message,
|
|
affected_items: params.affected_items,
|
|
});
|
|
}
|
|
/**
|
|
* Log detection scan summary
|
|
*/
|
|
detectionScan(params) {
|
|
this.log({
|
|
timestamp: new Date().toISOString(),
|
|
level: 'info',
|
|
event: 'detection_scan',
|
|
total_scanned: params.total_scanned,
|
|
detected: params.detected,
|
|
failed: params.failed,
|
|
skipped: params.skipped,
|
|
duration_ms: params.duration_ms,
|
|
});
|
|
}
|
|
/**
|
|
* Log intelligence run summary
|
|
*/
|
|
intelligenceRun(params) {
|
|
this.log({
|
|
timestamp: new Date().toISOString(),
|
|
level: 'info',
|
|
event: 'intelligence_run',
|
|
run_type: params.run_type,
|
|
dispensaries_processed: params.dispensaries_processed,
|
|
jobs_queued: params.jobs_queued,
|
|
duration_ms: params.duration_ms,
|
|
});
|
|
}
|
|
}
|
|
// Export singleton instance
|
|
exports.crawlerLogger = new CrawlerLoggerService();
|