/** * CrawlerLogger - Structured logging for crawler operations * * High-signal, low-noise logging with JSON output for: * - Job lifecycle (one summary per job) * - Provider/mode changes * - Sandbox events * - Queue failures * * NO per-product logging - that's too noisy. */ export type LogLevel = 'info' | 'warn' | 'error' | 'debug'; export type LogEvent = | 'job_started' | 'job_completed' | 'job_failed' | 'job_cancelled' | 'provider_detected' | 'provider_changed' | 'mode_changed' | 'sandbox_started' | 'sandbox_completed' | 'sandbox_failed' | 'queue_failure' | 'detection_scan' | 'crawl_batch' | 'intelligence_run'; interface BaseLogPayload { timestamp: string; level: LogLevel; event: LogEvent; dispensary_id?: number; store_id?: number; job_id?: number; provider?: string; category?: 'product' | 'specials' | 'brand' | 'metadata'; } interface JobStartedPayload extends BaseLogPayload { event: 'job_started'; job_type: string; trigger_type: string; store_name: string; } interface JobCompletedPayload extends BaseLogPayload { event: 'job_completed'; store_name: string; duration_ms: number; products_found: number; products_new: number; products_updated: number; products_marked_oos?: number; } interface JobFailedPayload extends BaseLogPayload { event: 'job_failed'; store_name: string; duration_ms: number; error_message: string; error_code?: string; } interface ProviderDetectedPayload extends BaseLogPayload { event: 'provider_detected'; dispensary_name: string; detected_provider: string; confidence: number; detection_method: string; menu_url?: string; } interface ProviderChangedPayload extends BaseLogPayload { event: 'provider_changed'; dispensary_name: string; old_provider: string | null; new_provider: string; old_confidence: number; new_confidence: number; } interface ModeChangedPayload extends BaseLogPayload { event: 'mode_changed'; dispensary_name: string; old_mode: string; new_mode: string; reason: string; } interface SandboxEventPayload extends BaseLogPayload { event: 'sandbox_started' | 'sandbox_completed' | 'sandbox_failed'; dispensary_name: string; template_name: string; quality_score?: number; products_extracted?: number; fields_missing?: number; error_message?: string; } interface QueueFailurePayload extends BaseLogPayload { event: 'queue_failure'; queue_type: string; error_message: string; affected_items?: number; } interface DetectionScanPayload extends BaseLogPayload { event: 'detection_scan'; total_scanned: number; detected: number; failed: number; skipped: number; duration_ms: number; } interface IntelligenceRunPayload extends BaseLogPayload { event: 'intelligence_run'; run_type: 'detection' | 'production' | 'sandbox' | 'full'; dispensaries_processed: number; jobs_queued: number; duration_ms: number; } type LogPayload = | JobStartedPayload | JobCompletedPayload | JobFailedPayload | ProviderDetectedPayload | ProviderChangedPayload | ModeChangedPayload | SandboxEventPayload | QueueFailurePayload | DetectionScanPayload | IntelligenceRunPayload; class CrawlerLoggerService { private formatLog(payload: LogPayload): string { return JSON.stringify(payload); } private log(payload: LogPayload): void { const formatted = this.formatLog(payload); switch (payload.level) { case 'error': console.error(`[CRAWLER] ${formatted}`); break; case 'warn': console.warn(`[CRAWLER] ${formatted}`); break; case 'debug': console.debug(`[CRAWLER] ${formatted}`); break; default: console.log(`[CRAWLER] ${formatted}`); } } /** * Log when a crawl job starts */ jobStarted(params: { job_id: number; store_id: number; store_name: string; job_type: string; trigger_type: string; provider?: string; }): void { this.log({ timestamp: new Date().toISOString(), level: 'info', event: 'job_started', job_id: params.job_id, store_id: params.store_id, store_name: params.store_name, job_type: params.job_type, trigger_type: params.trigger_type, provider: params.provider, }); } /** * Log when a crawl job completes successfully */ jobCompleted(params: { job_id: number; store_id: number; store_name: string; duration_ms: number; products_found: number; products_new: number; products_updated: number; products_marked_oos?: number; provider?: string; }): void { this.log({ timestamp: new Date().toISOString(), level: 'info', event: 'job_completed', job_id: params.job_id, store_id: params.store_id, store_name: params.store_name, duration_ms: params.duration_ms, products_found: params.products_found, products_new: params.products_new, products_updated: params.products_updated, products_marked_oos: params.products_marked_oos, provider: params.provider, }); } /** * Log when a crawl job fails */ jobFailed(params: { job_id: number; store_id: number; store_name: string; duration_ms: number; error_message: string; error_code?: string; provider?: string; }): void { this.log({ timestamp: new Date().toISOString(), level: 'error', event: 'job_failed', job_id: params.job_id, store_id: params.store_id, store_name: params.store_name, duration_ms: params.duration_ms, error_message: params.error_message, error_code: params.error_code, provider: params.provider, }); } /** * Log when a provider is detected for a dispensary */ providerDetected(params: { dispensary_id: number; dispensary_name: string; detected_provider: string; confidence: number; detection_method: string; menu_url?: string; category?: 'product' | 'specials' | 'brand' | 'metadata'; }): void { this.log({ timestamp: new Date().toISOString(), level: 'info', event: 'provider_detected', dispensary_id: params.dispensary_id, dispensary_name: params.dispensary_name, detected_provider: params.detected_provider, confidence: params.confidence, detection_method: params.detection_method, menu_url: params.menu_url, category: params.category, }); } /** * Log when a dispensary's provider changes */ providerChanged(params: { dispensary_id: number; dispensary_name: string; old_provider: string | null; new_provider: string; old_confidence: number; new_confidence: number; category?: 'product' | 'specials' | 'brand' | 'metadata'; }): void { this.log({ timestamp: new Date().toISOString(), level: 'info', event: 'provider_changed', dispensary_id: params.dispensary_id, dispensary_name: params.dispensary_name, old_provider: params.old_provider, new_provider: params.new_provider, old_confidence: params.old_confidence, new_confidence: params.new_confidence, category: params.category, }); } /** * Log when a dispensary's crawler mode changes (sandbox -> production, etc.) */ modeChanged(params: { dispensary_id: number; dispensary_name: string; old_mode: string; new_mode: string; reason: string; category?: 'product' | 'specials' | 'brand' | 'metadata'; provider?: string; }): void { this.log({ timestamp: new Date().toISOString(), level: 'info', event: 'mode_changed', dispensary_id: params.dispensary_id, dispensary_name: params.dispensary_name, old_mode: params.old_mode, new_mode: params.new_mode, reason: params.reason, category: params.category, provider: params.provider, }); } /** * Log sandbox crawl events */ sandboxEvent(params: { event: 'sandbox_started' | 'sandbox_completed' | 'sandbox_failed'; dispensary_id: number; dispensary_name: string; template_name: string; category?: 'product' | 'specials' | 'brand' | 'metadata'; quality_score?: number; products_extracted?: number; fields_missing?: number; error_message?: string; provider?: string; }): void { const level: LogLevel = params.event === 'sandbox_failed' ? 'error' : 'info'; this.log({ timestamp: new Date().toISOString(), level, event: params.event, dispensary_id: params.dispensary_id, dispensary_name: params.dispensary_name, template_name: params.template_name, category: params.category, quality_score: params.quality_score, products_extracted: params.products_extracted, fields_missing: params.fields_missing, error_message: params.error_message, provider: params.provider, }); } /** * Log queue processing failures */ queueFailure(params: { queue_type: string; error_message: string; affected_items?: number; }): void { this.log({ timestamp: new Date().toISOString(), level: 'error', event: 'queue_failure', queue_type: params.queue_type, error_message: params.error_message, affected_items: params.affected_items, }); } /** * Log detection scan summary */ detectionScan(params: { total_scanned: number; detected: number; failed: number; skipped: number; duration_ms: number; }): void { this.log({ timestamp: new Date().toISOString(), level: 'info', event: 'detection_scan', total_scanned: params.total_scanned, detected: params.detected, failed: params.failed, skipped: params.skipped, duration_ms: params.duration_ms, }); } /** * Log intelligence run summary */ intelligenceRun(params: { run_type: 'detection' | 'production' | 'sandbox' | 'full'; dispensaries_processed: number; jobs_queued: number; duration_ms: number; }): void { this.log({ timestamp: new Date().toISOString(), level: 'info', event: 'intelligence_run', run_type: params.run_type, dispensaries_processed: params.dispensaries_processed, jobs_queued: params.jobs_queued, duration_ms: params.duration_ms, }); } } // Export singleton instance export const crawlerLogger = new CrawlerLoggerService();