"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.RequestScheduler = void 0; const logger_1 = require("../services/logger"); const crypto_1 = __importDefault(require("crypto")); class RequestScheduler { queue = []; inProgress = new Set(); seen = new Set(); deduplicationEnabled = true; constructor(deduplicationEnabled = true) { this.deduplicationEnabled = deduplicationEnabled; } /** * Generate fingerprint for request deduplication */ generateFingerprint(request) { if (request.fingerprint) { return request.fingerprint; } // Generate fingerprint based on URL and relevant metadata const data = { url: request.url, method: request.metadata?.method || 'GET', body: request.metadata?.body }; return crypto_1.default.createHash('md5').update(JSON.stringify(data)).digest('hex'); } /** * Add a request to the queue */ enqueue(partialRequest) { if (!partialRequest.url) { logger_1.logger.warn('scraper', 'Cannot enqueue request without URL'); return false; } const fingerprint = this.generateFingerprint(partialRequest); // Check for duplicates if (this.deduplicationEnabled && this.seen.has(fingerprint)) { logger_1.logger.debug('scraper', `Request already seen: ${partialRequest.url}`); return false; } // Create full request with defaults const request = { url: partialRequest.url, priority: partialRequest.priority ?? 0, retryCount: partialRequest.retryCount ?? 0, maxRetries: partialRequest.maxRetries ?? 3, metadata: partialRequest.metadata || {}, callback: partialRequest.callback, errorHandler: partialRequest.errorHandler, fingerprint }; this.queue.push(request); this.seen.add(fingerprint); // Sort by priority (higher priority first) this.queue.sort((a, b) => b.priority - a.priority); logger_1.logger.debug('scraper', `Enqueued: ${request.url} (priority: ${request.priority})`); return true; } /** * Get the next request from the queue */ dequeue() { const request = this.queue.shift(); if (request) { this.inProgress.add(request.fingerprint); } return request || null; } /** * Mark a request as complete */ markComplete(request) { if (request.fingerprint) { this.inProgress.delete(request.fingerprint); } } /** * Requeue a failed request (for retry) */ requeueForRetry(request) { if (request.fingerprint) { this.inProgress.delete(request.fingerprint); this.seen.delete(request.fingerprint); } request.retryCount++; if (request.retryCount > request.maxRetries) { logger_1.logger.warn('scraper', `Max retries exceeded for: ${request.url}`); return false; } // Decrease priority for retried requests request.priority = Math.max(0, request.priority - 1); return this.enqueue(request); } /** * Get queue stats */ getStats() { return { pending: this.queue.length, inProgress: this.inProgress.size, total: this.seen.size }; } /** * Check if queue is empty */ isEmpty() { return this.queue.length === 0 && this.inProgress.size === 0; } /** * Clear all queues */ clear() { this.queue = []; this.inProgress.clear(); this.seen.clear(); } /** * Get pending requests count */ getPendingCount() { return this.queue.length; } /** * Get in-progress count */ getInProgressCount() { return this.inProgress.size; } } exports.RequestScheduler = RequestScheduler;