The job_run_logs table tracks scheduled job orchestration, not individual worker jobs. Worker info (worker_id, worker_hostname) belongs on dispensary_crawl_jobs, not job_run_logs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
350 lines
12 KiB
JavaScript
350 lines
12 KiB
JavaScript
"use strict";
|
|
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
if (k2 === undefined) k2 = k;
|
|
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
}
|
|
Object.defineProperty(o, k2, desc);
|
|
}) : (function(o, m, k, k2) {
|
|
if (k2 === undefined) k2 = k;
|
|
o[k2] = m[k];
|
|
}));
|
|
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
}) : function(o, v) {
|
|
o["default"] = v;
|
|
});
|
|
var __importStar = (this && this.__importStar) || (function () {
|
|
var ownKeys = function(o) {
|
|
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
var ar = [];
|
|
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
return ar;
|
|
};
|
|
return ownKeys(o);
|
|
};
|
|
return function (mod) {
|
|
if (mod && mod.__esModule) return mod;
|
|
var result = {};
|
|
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
__setModuleDefault(result, mod);
|
|
return result;
|
|
};
|
|
})();
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.activeScrapers = void 0;
|
|
exports.registerScraper = registerScraper;
|
|
exports.updateScraperStats = updateScraperStats;
|
|
exports.completeScraper = completeScraper;
|
|
const express_1 = require("express");
|
|
const middleware_1 = require("../auth/middleware");
|
|
const migrate_1 = require("../db/migrate");
|
|
const router = (0, express_1.Router)();
|
|
router.use(middleware_1.authMiddleware);
|
|
exports.activeScrapers = new Map();
|
|
// Get all active scrapers
|
|
router.get('/active', async (req, res) => {
|
|
try {
|
|
const scrapers = Array.from(exports.activeScrapers.values()).map(scraper => ({
|
|
...scraper,
|
|
duration: Date.now() - scraper.startTime.getTime(),
|
|
isStale: Date.now() - scraper.lastUpdate.getTime() > 60000 // 1 minute
|
|
}));
|
|
res.json({ scrapers });
|
|
}
|
|
catch (error) {
|
|
console.error('Error fetching active scrapers:', error);
|
|
res.status(500).json({ error: 'Failed to fetch active scrapers' });
|
|
}
|
|
});
|
|
// Get scraper by ID
|
|
router.get('/active/:id', async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
const scraper = exports.activeScrapers.get(id);
|
|
if (!scraper) {
|
|
return res.status(404).json({ error: 'Scraper not found' });
|
|
}
|
|
res.json({
|
|
scraper: {
|
|
...scraper,
|
|
duration: Date.now() - scraper.startTime.getTime(),
|
|
isStale: Date.now() - scraper.lastUpdate.getTime() > 60000
|
|
}
|
|
});
|
|
}
|
|
catch (error) {
|
|
console.error('Error fetching scraper:', error);
|
|
res.status(500).json({ error: 'Failed to fetch scraper' });
|
|
}
|
|
});
|
|
// Get scraper history (last 50 completed scrapes)
|
|
router.get('/history', async (req, res) => {
|
|
try {
|
|
const { limit = 50, dispensary_id } = req.query;
|
|
let query = `
|
|
SELECT
|
|
d.id as dispensary_id,
|
|
COALESCE(d.dba_name, d.name) as dispensary_name,
|
|
d.city,
|
|
d.state,
|
|
dcj.id as job_id,
|
|
dcj.job_type,
|
|
dcj.status,
|
|
dcj.products_found,
|
|
dcj.products_new,
|
|
dcj.products_updated,
|
|
dcj.in_stock_count,
|
|
dcj.out_of_stock_count,
|
|
dcj.duration_ms,
|
|
dcj.completed_at as last_scraped_at,
|
|
dcj.error_message,
|
|
(
|
|
SELECT COUNT(*)
|
|
FROM products p
|
|
WHERE p.dispensary_id = d.id
|
|
AND p.last_seen_at >= NOW() - INTERVAL '7 days'
|
|
) as product_count
|
|
FROM dispensary_crawl_jobs dcj
|
|
JOIN dispensaries d ON d.id = dcj.dispensary_id
|
|
WHERE dcj.completed_at IS NOT NULL
|
|
`;
|
|
const params = [];
|
|
let paramCount = 1;
|
|
if (dispensary_id) {
|
|
query += ` AND d.id = $${paramCount}`;
|
|
params.push(dispensary_id);
|
|
paramCount++;
|
|
}
|
|
query += ` ORDER BY dcj.completed_at DESC LIMIT $${paramCount}`;
|
|
params.push(limit);
|
|
const result = await migrate_1.pool.query(query, params);
|
|
res.json({ history: result.rows });
|
|
}
|
|
catch (error) {
|
|
console.error('Error fetching scraper history:', error);
|
|
res.status(500).json({ error: 'Failed to fetch scraper history' });
|
|
}
|
|
});
|
|
// Helper function to register a scraper
|
|
function registerScraper(id, storeId, storeName, categoryId, categoryName) {
|
|
exports.activeScrapers.set(id, {
|
|
id,
|
|
storeId,
|
|
storeName,
|
|
categoryId,
|
|
categoryName,
|
|
startTime: new Date(),
|
|
lastUpdate: new Date(),
|
|
status: 'running',
|
|
stats: {
|
|
requestsTotal: 0,
|
|
requestsSuccess: 0,
|
|
itemsSaved: 0,
|
|
itemsDropped: 0,
|
|
errorsCount: 0
|
|
}
|
|
});
|
|
}
|
|
// Helper function to update scraper stats
|
|
function updateScraperStats(id, stats, currentActivity) {
|
|
const scraper = exports.activeScrapers.get(id);
|
|
if (scraper) {
|
|
scraper.stats = { ...scraper.stats, ...stats };
|
|
scraper.lastUpdate = new Date();
|
|
if (currentActivity) {
|
|
scraper.currentActivity = currentActivity;
|
|
}
|
|
}
|
|
}
|
|
// Helper function to mark scraper as completed
|
|
function completeScraper(id, error) {
|
|
const scraper = exports.activeScrapers.get(id);
|
|
if (scraper) {
|
|
scraper.status = error ? 'error' : 'completed';
|
|
scraper.lastUpdate = new Date();
|
|
// Remove after 5 minutes
|
|
setTimeout(() => {
|
|
exports.activeScrapers.delete(id);
|
|
}, 5 * 60 * 1000);
|
|
}
|
|
}
|
|
// Dispensary crawl jobs endpoints
|
|
router.get('/jobs/stats', async (req, res) => {
|
|
try {
|
|
const { dispensary_id } = req.query;
|
|
let whereClause = '';
|
|
const params = [];
|
|
if (dispensary_id) {
|
|
whereClause = 'WHERE dispensary_id = $1';
|
|
params.push(dispensary_id);
|
|
}
|
|
const result = await migrate_1.pool.query(`
|
|
SELECT
|
|
status,
|
|
COUNT(*) as count,
|
|
SUM(products_found) as total_products_found,
|
|
SUM(COALESCE(products_new, 0) + COALESCE(products_updated, 0)) as total_products_saved
|
|
FROM dispensary_crawl_jobs
|
|
${whereClause}
|
|
GROUP BY status
|
|
`, params);
|
|
const stats = {
|
|
pending: 0,
|
|
in_progress: 0,
|
|
completed: 0,
|
|
failed: 0,
|
|
total_products_found: 0,
|
|
total_products_saved: 0
|
|
};
|
|
result.rows.forEach((row) => {
|
|
stats[row.status] = parseInt(row.count);
|
|
if (row.status === 'completed') {
|
|
stats.total_products_found += parseInt(row.total_products_found || '0');
|
|
stats.total_products_saved += parseInt(row.total_products_saved || '0');
|
|
}
|
|
});
|
|
res.json(stats);
|
|
}
|
|
catch (error) {
|
|
console.error('Error fetching job stats:', error);
|
|
res.status(500).json({ error: 'Failed to fetch job stats' });
|
|
}
|
|
});
|
|
router.get('/jobs/active', async (req, res) => {
|
|
try {
|
|
const { dispensary_id } = req.query;
|
|
let whereClause = "WHERE dcj.status = 'in_progress'";
|
|
const params = [];
|
|
let paramCount = 1;
|
|
if (dispensary_id) {
|
|
whereClause += ` AND dcj.dispensary_id = $${paramCount}`;
|
|
params.push(dispensary_id);
|
|
paramCount++;
|
|
}
|
|
const result = await migrate_1.pool.query(`
|
|
SELECT
|
|
dcj.id,
|
|
dcj.dispensary_id,
|
|
COALESCE(d.dba_name, d.name) as dispensary_name,
|
|
dcj.job_type,
|
|
dcj.status,
|
|
dcj.worker_id,
|
|
dcj.started_at,
|
|
dcj.products_found,
|
|
COALESCE(dcj.products_new, 0) + COALESCE(dcj.products_updated, 0) as products_saved,
|
|
EXTRACT(EPOCH FROM (NOW() - dcj.started_at)) as duration_seconds
|
|
FROM dispensary_crawl_jobs dcj
|
|
JOIN dispensaries d ON d.id = dcj.dispensary_id
|
|
${whereClause}
|
|
ORDER BY dcj.started_at DESC
|
|
`, params);
|
|
res.json({ jobs: result.rows });
|
|
}
|
|
catch (error) {
|
|
console.error('Error fetching active jobs:', error);
|
|
res.status(500).json({ error: 'Failed to fetch active jobs' });
|
|
}
|
|
});
|
|
router.get('/jobs/recent', async (req, res) => {
|
|
try {
|
|
const { limit = 50, dispensary_id, status } = req.query;
|
|
let whereClause = '';
|
|
const params = [];
|
|
let paramCount = 1;
|
|
const conditions = [];
|
|
if (dispensary_id) {
|
|
conditions.push(`dcj.dispensary_id = $${paramCount}`);
|
|
params.push(dispensary_id);
|
|
paramCount++;
|
|
}
|
|
if (status) {
|
|
conditions.push(`dcj.status = $${paramCount}`);
|
|
params.push(status);
|
|
paramCount++;
|
|
}
|
|
if (conditions.length > 0) {
|
|
whereClause = 'WHERE ' + conditions.join(' AND ');
|
|
}
|
|
params.push(limit);
|
|
const result = await migrate_1.pool.query(`
|
|
SELECT
|
|
dcj.id,
|
|
dcj.dispensary_id,
|
|
COALESCE(d.dba_name, d.name) as dispensary_name,
|
|
dcj.job_type,
|
|
dcj.status,
|
|
dcj.worker_id,
|
|
dcj.started_at,
|
|
dcj.completed_at,
|
|
dcj.products_found,
|
|
COALESCE(dcj.products_new, 0) + COALESCE(dcj.products_updated, 0) as products_saved,
|
|
dcj.error_message,
|
|
EXTRACT(EPOCH FROM (COALESCE(dcj.completed_at, NOW()) - dcj.started_at)) as duration_seconds
|
|
FROM dispensary_crawl_jobs dcj
|
|
JOIN dispensaries d ON d.id = dcj.dispensary_id
|
|
${whereClause}
|
|
ORDER BY dcj.created_at DESC
|
|
LIMIT $${paramCount}
|
|
`, params);
|
|
res.json({ jobs: result.rows });
|
|
}
|
|
catch (error) {
|
|
console.error('Error fetching recent jobs:', error);
|
|
res.status(500).json({ error: 'Failed to fetch recent jobs' });
|
|
}
|
|
});
|
|
router.get('/jobs/workers', async (req, res) => {
|
|
try {
|
|
const { dispensary_id } = req.query;
|
|
let whereClause = "WHERE status = 'in_progress' AND worker_id IS NOT NULL";
|
|
const params = [];
|
|
if (dispensary_id) {
|
|
whereClause += ` AND dispensary_id = $1`;
|
|
params.push(dispensary_id);
|
|
}
|
|
const result = await migrate_1.pool.query(`
|
|
SELECT
|
|
worker_id,
|
|
COUNT(*) as active_jobs,
|
|
SUM(products_found) as total_products_found,
|
|
SUM(COALESCE(products_new, 0) + COALESCE(products_updated, 0)) as total_products_saved,
|
|
MIN(started_at) as earliest_start,
|
|
MAX(started_at) as latest_start
|
|
FROM dispensary_crawl_jobs
|
|
${whereClause}
|
|
GROUP BY worker_id
|
|
ORDER BY worker_id
|
|
`, params);
|
|
res.json({ workers: result.rows });
|
|
}
|
|
catch (error) {
|
|
console.error('Error fetching worker stats:', error);
|
|
res.status(500).json({ error: 'Failed to fetch worker stats' });
|
|
}
|
|
});
|
|
router.get('/jobs/worker-logs/:workerId', async (req, res) => {
|
|
try {
|
|
const { workerId } = req.params;
|
|
const fs = await Promise.resolve().then(() => __importStar(require('fs/promises')));
|
|
const path = await Promise.resolve().then(() => __importStar(require('path')));
|
|
const logPath = path.join('/tmp', `worker-${workerId}.log`);
|
|
try {
|
|
const logs = await fs.readFile(logPath, 'utf-8');
|
|
const lines = logs.split('\n');
|
|
// Return last 100 lines
|
|
const recentLogs = lines.slice(-100).join('\n');
|
|
res.json({ logs: recentLogs });
|
|
}
|
|
catch (fileError) {
|
|
res.json({ logs: 'No logs available for this worker yet.' });
|
|
}
|
|
}
|
|
catch (error) {
|
|
console.error('Failed to get worker logs:', error);
|
|
res.status(500).json({ error: 'Failed to get worker logs' });
|
|
}
|
|
});
|
|
exports.default = router;
|