Files
cannaiq/backend/dist/routes/scraper-monitor.js
Kelly 66e07b2009 fix(monitor): remove non-existent worker columns from job_run_logs query
The job_run_logs table tracks scheduled job orchestration, not individual
worker jobs. Worker info (worker_id, worker_hostname) belongs on
dispensary_crawl_jobs, not job_run_logs.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-03 18:45:05 -07:00

350 lines
12 KiB
JavaScript

"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.activeScrapers = void 0;
exports.registerScraper = registerScraper;
exports.updateScraperStats = updateScraperStats;
exports.completeScraper = completeScraper;
const express_1 = require("express");
const middleware_1 = require("../auth/middleware");
const migrate_1 = require("../db/migrate");
const router = (0, express_1.Router)();
router.use(middleware_1.authMiddleware);
exports.activeScrapers = new Map();
// Get all active scrapers
router.get('/active', async (req, res) => {
try {
const scrapers = Array.from(exports.activeScrapers.values()).map(scraper => ({
...scraper,
duration: Date.now() - scraper.startTime.getTime(),
isStale: Date.now() - scraper.lastUpdate.getTime() > 60000 // 1 minute
}));
res.json({ scrapers });
}
catch (error) {
console.error('Error fetching active scrapers:', error);
res.status(500).json({ error: 'Failed to fetch active scrapers' });
}
});
// Get scraper by ID
router.get('/active/:id', async (req, res) => {
try {
const { id } = req.params;
const scraper = exports.activeScrapers.get(id);
if (!scraper) {
return res.status(404).json({ error: 'Scraper not found' });
}
res.json({
scraper: {
...scraper,
duration: Date.now() - scraper.startTime.getTime(),
isStale: Date.now() - scraper.lastUpdate.getTime() > 60000
}
});
}
catch (error) {
console.error('Error fetching scraper:', error);
res.status(500).json({ error: 'Failed to fetch scraper' });
}
});
// Get scraper history (last 50 completed scrapes)
router.get('/history', async (req, res) => {
try {
const { limit = 50, dispensary_id } = req.query;
let query = `
SELECT
d.id as dispensary_id,
COALESCE(d.dba_name, d.name) as dispensary_name,
d.city,
d.state,
dcj.id as job_id,
dcj.job_type,
dcj.status,
dcj.products_found,
dcj.products_new,
dcj.products_updated,
dcj.in_stock_count,
dcj.out_of_stock_count,
dcj.duration_ms,
dcj.completed_at as last_scraped_at,
dcj.error_message,
(
SELECT COUNT(*)
FROM products p
WHERE p.dispensary_id = d.id
AND p.last_seen_at >= NOW() - INTERVAL '7 days'
) as product_count
FROM dispensary_crawl_jobs dcj
JOIN dispensaries d ON d.id = dcj.dispensary_id
WHERE dcj.completed_at IS NOT NULL
`;
const params = [];
let paramCount = 1;
if (dispensary_id) {
query += ` AND d.id = $${paramCount}`;
params.push(dispensary_id);
paramCount++;
}
query += ` ORDER BY dcj.completed_at DESC LIMIT $${paramCount}`;
params.push(limit);
const result = await migrate_1.pool.query(query, params);
res.json({ history: result.rows });
}
catch (error) {
console.error('Error fetching scraper history:', error);
res.status(500).json({ error: 'Failed to fetch scraper history' });
}
});
// Helper function to register a scraper
function registerScraper(id, storeId, storeName, categoryId, categoryName) {
exports.activeScrapers.set(id, {
id,
storeId,
storeName,
categoryId,
categoryName,
startTime: new Date(),
lastUpdate: new Date(),
status: 'running',
stats: {
requestsTotal: 0,
requestsSuccess: 0,
itemsSaved: 0,
itemsDropped: 0,
errorsCount: 0
}
});
}
// Helper function to update scraper stats
function updateScraperStats(id, stats, currentActivity) {
const scraper = exports.activeScrapers.get(id);
if (scraper) {
scraper.stats = { ...scraper.stats, ...stats };
scraper.lastUpdate = new Date();
if (currentActivity) {
scraper.currentActivity = currentActivity;
}
}
}
// Helper function to mark scraper as completed
function completeScraper(id, error) {
const scraper = exports.activeScrapers.get(id);
if (scraper) {
scraper.status = error ? 'error' : 'completed';
scraper.lastUpdate = new Date();
// Remove after 5 minutes
setTimeout(() => {
exports.activeScrapers.delete(id);
}, 5 * 60 * 1000);
}
}
// Dispensary crawl jobs endpoints
router.get('/jobs/stats', async (req, res) => {
try {
const { dispensary_id } = req.query;
let whereClause = '';
const params = [];
if (dispensary_id) {
whereClause = 'WHERE dispensary_id = $1';
params.push(dispensary_id);
}
const result = await migrate_1.pool.query(`
SELECT
status,
COUNT(*) as count,
SUM(products_found) as total_products_found,
SUM(COALESCE(products_new, 0) + COALESCE(products_updated, 0)) as total_products_saved
FROM dispensary_crawl_jobs
${whereClause}
GROUP BY status
`, params);
const stats = {
pending: 0,
in_progress: 0,
completed: 0,
failed: 0,
total_products_found: 0,
total_products_saved: 0
};
result.rows.forEach((row) => {
stats[row.status] = parseInt(row.count);
if (row.status === 'completed') {
stats.total_products_found += parseInt(row.total_products_found || '0');
stats.total_products_saved += parseInt(row.total_products_saved || '0');
}
});
res.json(stats);
}
catch (error) {
console.error('Error fetching job stats:', error);
res.status(500).json({ error: 'Failed to fetch job stats' });
}
});
router.get('/jobs/active', async (req, res) => {
try {
const { dispensary_id } = req.query;
let whereClause = "WHERE dcj.status = 'in_progress'";
const params = [];
let paramCount = 1;
if (dispensary_id) {
whereClause += ` AND dcj.dispensary_id = $${paramCount}`;
params.push(dispensary_id);
paramCount++;
}
const result = await migrate_1.pool.query(`
SELECT
dcj.id,
dcj.dispensary_id,
COALESCE(d.dba_name, d.name) as dispensary_name,
dcj.job_type,
dcj.status,
dcj.worker_id,
dcj.started_at,
dcj.products_found,
COALESCE(dcj.products_new, 0) + COALESCE(dcj.products_updated, 0) as products_saved,
EXTRACT(EPOCH FROM (NOW() - dcj.started_at)) as duration_seconds
FROM dispensary_crawl_jobs dcj
JOIN dispensaries d ON d.id = dcj.dispensary_id
${whereClause}
ORDER BY dcj.started_at DESC
`, params);
res.json({ jobs: result.rows });
}
catch (error) {
console.error('Error fetching active jobs:', error);
res.status(500).json({ error: 'Failed to fetch active jobs' });
}
});
router.get('/jobs/recent', async (req, res) => {
try {
const { limit = 50, dispensary_id, status } = req.query;
let whereClause = '';
const params = [];
let paramCount = 1;
const conditions = [];
if (dispensary_id) {
conditions.push(`dcj.dispensary_id = $${paramCount}`);
params.push(dispensary_id);
paramCount++;
}
if (status) {
conditions.push(`dcj.status = $${paramCount}`);
params.push(status);
paramCount++;
}
if (conditions.length > 0) {
whereClause = 'WHERE ' + conditions.join(' AND ');
}
params.push(limit);
const result = await migrate_1.pool.query(`
SELECT
dcj.id,
dcj.dispensary_id,
COALESCE(d.dba_name, d.name) as dispensary_name,
dcj.job_type,
dcj.status,
dcj.worker_id,
dcj.started_at,
dcj.completed_at,
dcj.products_found,
COALESCE(dcj.products_new, 0) + COALESCE(dcj.products_updated, 0) as products_saved,
dcj.error_message,
EXTRACT(EPOCH FROM (COALESCE(dcj.completed_at, NOW()) - dcj.started_at)) as duration_seconds
FROM dispensary_crawl_jobs dcj
JOIN dispensaries d ON d.id = dcj.dispensary_id
${whereClause}
ORDER BY dcj.created_at DESC
LIMIT $${paramCount}
`, params);
res.json({ jobs: result.rows });
}
catch (error) {
console.error('Error fetching recent jobs:', error);
res.status(500).json({ error: 'Failed to fetch recent jobs' });
}
});
router.get('/jobs/workers', async (req, res) => {
try {
const { dispensary_id } = req.query;
let whereClause = "WHERE status = 'in_progress' AND worker_id IS NOT NULL";
const params = [];
if (dispensary_id) {
whereClause += ` AND dispensary_id = $1`;
params.push(dispensary_id);
}
const result = await migrate_1.pool.query(`
SELECT
worker_id,
COUNT(*) as active_jobs,
SUM(products_found) as total_products_found,
SUM(COALESCE(products_new, 0) + COALESCE(products_updated, 0)) as total_products_saved,
MIN(started_at) as earliest_start,
MAX(started_at) as latest_start
FROM dispensary_crawl_jobs
${whereClause}
GROUP BY worker_id
ORDER BY worker_id
`, params);
res.json({ workers: result.rows });
}
catch (error) {
console.error('Error fetching worker stats:', error);
res.status(500).json({ error: 'Failed to fetch worker stats' });
}
});
router.get('/jobs/worker-logs/:workerId', async (req, res) => {
try {
const { workerId } = req.params;
const fs = await Promise.resolve().then(() => __importStar(require('fs/promises')));
const path = await Promise.resolve().then(() => __importStar(require('path')));
const logPath = path.join('/tmp', `worker-${workerId}.log`);
try {
const logs = await fs.readFile(logPath, 'utf-8');
const lines = logs.split('\n');
// Return last 100 lines
const recentLogs = lines.slice(-100).join('\n');
res.json({ logs: recentLogs });
}
catch (fileError) {
res.json({ logs: 'No logs available for this worker yet.' });
}
}
catch (error) {
console.error('Failed to get worker logs:', error);
res.status(500).json({ error: 'Failed to get worker logs' });
}
});
exports.default = router;