"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.activeScrapers = void 0; exports.registerScraper = registerScraper; exports.updateScraperStats = updateScraperStats; exports.completeScraper = completeScraper; const express_1 = require("express"); const middleware_1 = require("../auth/middleware"); const migrate_1 = require("../db/migrate"); const router = (0, express_1.Router)(); router.use(middleware_1.authMiddleware); exports.activeScrapers = new Map(); // Get all active scrapers router.get('/active', async (req, res) => { try { const scrapers = Array.from(exports.activeScrapers.values()).map(scraper => ({ ...scraper, duration: Date.now() - scraper.startTime.getTime(), isStale: Date.now() - scraper.lastUpdate.getTime() > 60000 // 1 minute })); res.json({ scrapers }); } catch (error) { console.error('Error fetching active scrapers:', error); res.status(500).json({ error: 'Failed to fetch active scrapers' }); } }); // Get scraper by ID router.get('/active/:id', async (req, res) => { try { const { id } = req.params; const scraper = exports.activeScrapers.get(id); if (!scraper) { return res.status(404).json({ error: 'Scraper not found' }); } res.json({ scraper: { ...scraper, duration: Date.now() - scraper.startTime.getTime(), isStale: Date.now() - scraper.lastUpdate.getTime() > 60000 } }); } catch (error) { console.error('Error fetching scraper:', error); res.status(500).json({ error: 'Failed to fetch scraper' }); } }); // Get scraper history (last 50 completed scrapes) router.get('/history', async (req, res) => { try { const { limit = 50, dispensary_id } = req.query; let query = ` SELECT d.id as dispensary_id, COALESCE(d.dba_name, d.name) as dispensary_name, d.city, d.state, dcj.id as job_id, dcj.job_type, dcj.status, dcj.products_found, dcj.products_new, dcj.products_updated, dcj.in_stock_count, dcj.out_of_stock_count, dcj.duration_ms, dcj.completed_at as last_scraped_at, dcj.error_message, ( SELECT COUNT(*) FROM products p WHERE p.dispensary_id = d.id AND p.last_seen_at >= NOW() - INTERVAL '7 days' ) as product_count FROM dispensary_crawl_jobs dcj JOIN dispensaries d ON d.id = dcj.dispensary_id WHERE dcj.completed_at IS NOT NULL `; const params = []; let paramCount = 1; if (dispensary_id) { query += ` AND d.id = $${paramCount}`; params.push(dispensary_id); paramCount++; } query += ` ORDER BY dcj.completed_at DESC LIMIT $${paramCount}`; params.push(limit); const result = await migrate_1.pool.query(query, params); res.json({ history: result.rows }); } catch (error) { console.error('Error fetching scraper history:', error); res.status(500).json({ error: 'Failed to fetch scraper history' }); } }); // Helper function to register a scraper function registerScraper(id, storeId, storeName, categoryId, categoryName) { exports.activeScrapers.set(id, { id, storeId, storeName, categoryId, categoryName, startTime: new Date(), lastUpdate: new Date(), status: 'running', stats: { requestsTotal: 0, requestsSuccess: 0, itemsSaved: 0, itemsDropped: 0, errorsCount: 0 } }); } // Helper function to update scraper stats function updateScraperStats(id, stats, currentActivity) { const scraper = exports.activeScrapers.get(id); if (scraper) { scraper.stats = { ...scraper.stats, ...stats }; scraper.lastUpdate = new Date(); if (currentActivity) { scraper.currentActivity = currentActivity; } } } // Helper function to mark scraper as completed function completeScraper(id, error) { const scraper = exports.activeScrapers.get(id); if (scraper) { scraper.status = error ? 'error' : 'completed'; scraper.lastUpdate = new Date(); // Remove after 5 minutes setTimeout(() => { exports.activeScrapers.delete(id); }, 5 * 60 * 1000); } } // Dispensary crawl jobs endpoints router.get('/jobs/stats', async (req, res) => { try { const { dispensary_id } = req.query; let whereClause = ''; const params = []; if (dispensary_id) { whereClause = 'WHERE dispensary_id = $1'; params.push(dispensary_id); } const result = await migrate_1.pool.query(` SELECT status, COUNT(*) as count, SUM(products_found) as total_products_found, SUM(COALESCE(products_new, 0) + COALESCE(products_updated, 0)) as total_products_saved FROM dispensary_crawl_jobs ${whereClause} GROUP BY status `, params); const stats = { pending: 0, in_progress: 0, completed: 0, failed: 0, total_products_found: 0, total_products_saved: 0 }; result.rows.forEach((row) => { stats[row.status] = parseInt(row.count); if (row.status === 'completed') { stats.total_products_found += parseInt(row.total_products_found || '0'); stats.total_products_saved += parseInt(row.total_products_saved || '0'); } }); res.json(stats); } catch (error) { console.error('Error fetching job stats:', error); res.status(500).json({ error: 'Failed to fetch job stats' }); } }); router.get('/jobs/active', async (req, res) => { try { const { dispensary_id } = req.query; let whereClause = "WHERE dcj.status = 'in_progress'"; const params = []; let paramCount = 1; if (dispensary_id) { whereClause += ` AND dcj.dispensary_id = $${paramCount}`; params.push(dispensary_id); paramCount++; } const result = await migrate_1.pool.query(` SELECT dcj.id, dcj.dispensary_id, COALESCE(d.dba_name, d.name) as dispensary_name, dcj.job_type, dcj.status, dcj.worker_id, dcj.started_at, dcj.products_found, COALESCE(dcj.products_new, 0) + COALESCE(dcj.products_updated, 0) as products_saved, EXTRACT(EPOCH FROM (NOW() - dcj.started_at)) as duration_seconds FROM dispensary_crawl_jobs dcj JOIN dispensaries d ON d.id = dcj.dispensary_id ${whereClause} ORDER BY dcj.started_at DESC `, params); res.json({ jobs: result.rows }); } catch (error) { console.error('Error fetching active jobs:', error); res.status(500).json({ error: 'Failed to fetch active jobs' }); } }); router.get('/jobs/recent', async (req, res) => { try { const { limit = 50, dispensary_id, status } = req.query; let whereClause = ''; const params = []; let paramCount = 1; const conditions = []; if (dispensary_id) { conditions.push(`dcj.dispensary_id = $${paramCount}`); params.push(dispensary_id); paramCount++; } if (status) { conditions.push(`dcj.status = $${paramCount}`); params.push(status); paramCount++; } if (conditions.length > 0) { whereClause = 'WHERE ' + conditions.join(' AND '); } params.push(limit); const result = await migrate_1.pool.query(` SELECT dcj.id, dcj.dispensary_id, COALESCE(d.dba_name, d.name) as dispensary_name, dcj.job_type, dcj.status, dcj.worker_id, dcj.started_at, dcj.completed_at, dcj.products_found, COALESCE(dcj.products_new, 0) + COALESCE(dcj.products_updated, 0) as products_saved, dcj.error_message, EXTRACT(EPOCH FROM (COALESCE(dcj.completed_at, NOW()) - dcj.started_at)) as duration_seconds FROM dispensary_crawl_jobs dcj JOIN dispensaries d ON d.id = dcj.dispensary_id ${whereClause} ORDER BY dcj.created_at DESC LIMIT $${paramCount} `, params); res.json({ jobs: result.rows }); } catch (error) { console.error('Error fetching recent jobs:', error); res.status(500).json({ error: 'Failed to fetch recent jobs' }); } }); router.get('/jobs/workers', async (req, res) => { try { const { dispensary_id } = req.query; let whereClause = "WHERE status = 'in_progress' AND worker_id IS NOT NULL"; const params = []; if (dispensary_id) { whereClause += ` AND dispensary_id = $1`; params.push(dispensary_id); } const result = await migrate_1.pool.query(` SELECT worker_id, COUNT(*) as active_jobs, SUM(products_found) as total_products_found, SUM(COALESCE(products_new, 0) + COALESCE(products_updated, 0)) as total_products_saved, MIN(started_at) as earliest_start, MAX(started_at) as latest_start FROM dispensary_crawl_jobs ${whereClause} GROUP BY worker_id ORDER BY worker_id `, params); res.json({ workers: result.rows }); } catch (error) { console.error('Error fetching worker stats:', error); res.status(500).json({ error: 'Failed to fetch worker stats' }); } }); router.get('/jobs/worker-logs/:workerId', async (req, res) => { try { const { workerId } = req.params; const fs = await Promise.resolve().then(() => __importStar(require('fs/promises'))); const path = await Promise.resolve().then(() => __importStar(require('path'))); const logPath = path.join('/tmp', `worker-${workerId}.log`); try { const logs = await fs.readFile(logPath, 'utf-8'); const lines = logs.split('\n'); // Return last 100 lines const recentLogs = lines.slice(-100).join('\n'); res.json({ logs: recentLogs }); } catch (fileError) { res.json({ logs: 'No logs available for this worker yet.' }); } } catch (error) { console.error('Failed to get worker logs:', error); res.status(500).json({ error: 'Failed to get worker logs' }); } }); exports.default = router;