"use strict"; /** * Scraper V2 - Scrapy-inspired web scraping framework * * Architecture: * - Engine: Main orchestrator * - Scheduler: Priority queue with deduplication * - Downloader: HTTP + Browser hybrid fetcher * - Middlewares: Request/response processing chain * - Pipelines: Item processing and persistence * - Navigation: Category discovery */ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __exportStar = (this && this.__exportStar) || function(m, exports) { for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.StatsPipeline = exports.DatabasePipeline = exports.ImagePipeline = exports.DeduplicationPipeline = exports.SanitizationPipeline = exports.ValidationPipeline = exports.PipelineEngine = exports.StealthMiddleware = exports.BotDetectionMiddleware = exports.RetryMiddleware = exports.RateLimitMiddleware = exports.ProxyMiddleware = exports.UserAgentMiddleware = exports.MiddlewareEngine = exports.NavigationDiscovery = exports.Downloader = exports.RequestScheduler = exports.DutchieSpider = exports.ScraperEngine = void 0; exports.scrapeCategory = scrapeCategory; exports.scrapeStore = scrapeStore; exports.discoverCategories = discoverCategories; var engine_1 = require("./engine"); Object.defineProperty(exports, "ScraperEngine", { enumerable: true, get: function () { return engine_1.ScraperEngine; } }); Object.defineProperty(exports, "DutchieSpider", { enumerable: true, get: function () { return engine_1.DutchieSpider; } }); var scheduler_1 = require("./scheduler"); Object.defineProperty(exports, "RequestScheduler", { enumerable: true, get: function () { return scheduler_1.RequestScheduler; } }); var downloader_1 = require("./downloader"); Object.defineProperty(exports, "Downloader", { enumerable: true, get: function () { return downloader_1.Downloader; } }); var navigation_1 = require("./navigation"); Object.defineProperty(exports, "NavigationDiscovery", { enumerable: true, get: function () { return navigation_1.NavigationDiscovery; } }); var middlewares_1 = require("./middlewares"); Object.defineProperty(exports, "MiddlewareEngine", { enumerable: true, get: function () { return middlewares_1.MiddlewareEngine; } }); Object.defineProperty(exports, "UserAgentMiddleware", { enumerable: true, get: function () { return middlewares_1.UserAgentMiddleware; } }); Object.defineProperty(exports, "ProxyMiddleware", { enumerable: true, get: function () { return middlewares_1.ProxyMiddleware; } }); Object.defineProperty(exports, "RateLimitMiddleware", { enumerable: true, get: function () { return middlewares_1.RateLimitMiddleware; } }); Object.defineProperty(exports, "RetryMiddleware", { enumerable: true, get: function () { return middlewares_1.RetryMiddleware; } }); Object.defineProperty(exports, "BotDetectionMiddleware", { enumerable: true, get: function () { return middlewares_1.BotDetectionMiddleware; } }); Object.defineProperty(exports, "StealthMiddleware", { enumerable: true, get: function () { return middlewares_1.StealthMiddleware; } }); var pipelines_1 = require("./pipelines"); Object.defineProperty(exports, "PipelineEngine", { enumerable: true, get: function () { return pipelines_1.PipelineEngine; } }); Object.defineProperty(exports, "ValidationPipeline", { enumerable: true, get: function () { return pipelines_1.ValidationPipeline; } }); Object.defineProperty(exports, "SanitizationPipeline", { enumerable: true, get: function () { return pipelines_1.SanitizationPipeline; } }); Object.defineProperty(exports, "DeduplicationPipeline", { enumerable: true, get: function () { return pipelines_1.DeduplicationPipeline; } }); Object.defineProperty(exports, "ImagePipeline", { enumerable: true, get: function () { return pipelines_1.ImagePipeline; } }); Object.defineProperty(exports, "DatabasePipeline", { enumerable: true, get: function () { return pipelines_1.DatabasePipeline; } }); Object.defineProperty(exports, "StatsPipeline", { enumerable: true, get: function () { return pipelines_1.StatsPipeline; } }); __exportStar(require("./types"), exports); // Main API functions const engine_2 = require("./engine"); const navigation_2 = require("./navigation"); const downloader_2 = require("./downloader"); const logger_1 = require("../services/logger"); /** * Scrape a single category */ async function scrapeCategory(storeId, categoryId) { const engine = new engine_2.ScraperEngine(1); const spider = new engine_2.DutchieSpider(engine); try { await spider.scrapeCategory(storeId, categoryId); } catch (error) { logger_1.logger.error('scraper', `scrapeCategory failed: ${error}`); throw error; } } /** * Scrape an entire store */ async function scrapeStore(storeId, parallel = 3) { const engine = new engine_2.ScraperEngine(1); const spider = new engine_2.DutchieSpider(engine); try { await spider.scrapeStore(storeId, parallel); } catch (error) { logger_1.logger.error('scraper', `scrapeStore failed: ${error}`); throw error; } } /** * Discover categories for a store */ async function discoverCategories(storeId) { const downloader = new downloader_2.Downloader(); const discovery = new navigation_2.NavigationDiscovery(downloader); try { // Discover categories (uses your existing Dutchie category structure) await discovery.discoverCategories(storeId); } catch (error) { logger_1.logger.error('scraper', `discoverCategories failed: ${error}`); throw error; } finally { await downloader.cleanup(); } }