/** * Health Check Routes * * Comprehensive health endpoints for monitoring API, DB, Redis, Workers, Crawls, and Analytics. * * Endpoints: * GET /api/health - Quick API health check * GET /api/health/db - Postgres health * GET /api/health/redis - Redis health * GET /api/health/workers - Queue and worker status * GET /api/health/crawls - Crawl activity summary * GET /api/health/analytics - Analytics/aggregates status * GET /api/health/full - Aggregated view of all subsystems */ import { Router, Request, Response } from 'express'; import { pool } from '../db/pool'; import { getRedis } from '../lib/redis'; import * as fs from 'fs'; import * as path from 'path'; const router = Router(); // Read package version let packageVersion = '1.0.0'; try { const packagePath = path.join(__dirname, '../../package.json'); if (fs.existsSync(packagePath)) { const pkg = JSON.parse(fs.readFileSync(packagePath, 'utf8')); packageVersion = pkg.version || '1.0.0'; } } catch { // Ignore errors reading package.json } // Store server start time for uptime calculation const serverStartTime = Date.now(); // Types interface HealthStatus { status: 'ok' | 'degraded' | 'error' | 'stale'; } interface ApiHealth extends HealthStatus { uptime: number; timestamp: string; version: string; build_sha: string | null; build_time: string | null; } interface DbHealth extends HealthStatus { connected: boolean; latency_ms: number; error?: string; } interface RedisHealth extends HealthStatus { connected: boolean; latency_ms: number; error?: string; } interface QueueInfo { name: string; waiting: number; active: number; completed: number; failed: number; paused: boolean; } interface WorkerInfo { id: string; queue: string; status: string; last_heartbeat?: string; } interface WorkersHealth extends HealthStatus { queues: QueueInfo[]; workers: WorkerInfo[]; } interface CrawlsHealth extends HealthStatus { last_run: string | null; runs_last_24h: number; stores_with_recent_crawl: number; stores_total: number; stale_stores: number; } interface AnalyticsHealth extends HealthStatus { last_aggregate: string | null; daily_runs_last_7d: number; missing_days: number; } interface FullHealth extends HealthStatus { api: ApiHealth; db: DbHealth; redis: RedisHealth; workers: WorkersHealth; crawls: CrawlsHealth; analytics: AnalyticsHealth; } // ============================================================ // Helper Functions // ============================================================ async function getApiHealth(): Promise { return { status: 'ok', uptime: Math.floor((Date.now() - serverStartTime) / 1000), timestamp: new Date().toISOString(), version: packageVersion, build_sha: process.env.APP_GIT_SHA && process.env.APP_GIT_SHA !== 'unknown' ? process.env.APP_GIT_SHA : null, build_time: process.env.APP_BUILD_TIME && process.env.APP_BUILD_TIME !== 'unknown' ? process.env.APP_BUILD_TIME : null, }; } async function getDbHealth(): Promise { const start = Date.now(); try { // pool imported from db/pool await pool.query('SELECT 1'); return { status: 'ok', connected: true, latency_ms: Date.now() - start, }; } catch (err: any) { return { status: 'error', connected: false, latency_ms: Date.now() - start, error: err.message || 'Database connection failed', }; } } async function getRedisHealth(): Promise { const start = Date.now(); const isLocal = process.env.NODE_ENV === 'development' || process.env.NODE_ENV === 'local' || !process.env.NODE_ENV; // Check if Redis is configured if (!process.env.REDIS_URL && !process.env.REDIS_HOST) { // Redis is optional in local dev, required in prod/staging return { status: isLocal ? 'ok' : 'error', connected: false, latency_ms: 0, error: isLocal ? 'Redis not configured (optional in local)' : 'Redis not configured (required in production)', }; } try { const redis = getRedis(); // Use a timeout to prevent hanging const pingPromise = redis.ping(); const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('Redis ping timeout')), 3000) ); await Promise.race([pingPromise, timeoutPromise]); return { status: 'ok', connected: true, latency_ms: Date.now() - start, }; } catch (err: any) { return { status: 'degraded', connected: false, latency_ms: Date.now() - start, error: err.message || 'Redis ping failed', }; } } async function getWorkersHealth(): Promise { try { // pool imported from db/pool // Get queue stats from v_queue_stats view or equivalent const queueStatsResult = await pool.query(` SELECT job_type as name, COUNT(*) FILTER (WHERE status = 'pending') as waiting, COUNT(*) FILTER (WHERE status = 'running') as active, COUNT(*) FILTER (WHERE status = 'success') as completed, COUNT(*) FILTER (WHERE status IN ('error', 'failed')) as failed, false as paused FROM dispensary_crawl_jobs WHERE created_at > NOW() - INTERVAL '7 days' GROUP BY job_type `); const queues: QueueInfo[] = queueStatsResult.rows.map((row: any) => ({ name: row.name || 'unknown', waiting: parseInt(row.waiting) || 0, active: parseInt(row.active) || 0, completed: parseInt(row.completed) || 0, failed: parseInt(row.failed) || 0, paused: row.paused || false, })); // Get active workers from job_schedules or active heartbeats const workersResult = await pool.query(` SELECT COALESCE(job_config->>'worker_name', job_name) as id, job_name as queue, CASE WHEN enabled THEN 'connected' ELSE 'disconnected' END as status, last_run_at as last_heartbeat FROM job_schedules WHERE enabled = true ORDER BY last_run_at DESC NULLS LAST LIMIT 20 `); const workers: WorkerInfo[] = workersResult.rows.map((row: any) => ({ id: row.id, queue: row.queue, status: row.status, last_heartbeat: row.last_heartbeat?.toISOString() || undefined, })); // Determine overall status const hasActiveWorkers = workers.length > 0; const hasFailedJobs = queues.some((q) => q.failed > 0); const hasStuckJobs = queues.some((q) => q.active > 5); // Arbitrary threshold let status: 'ok' | 'degraded' | 'error' = 'ok'; if (!hasActiveWorkers) { status = 'degraded'; } else if (hasFailedJobs || hasStuckJobs) { status = 'degraded'; } return { status, queues, workers, }; } catch (err: any) { return { status: 'error', queues: [], workers: [], }; } } async function getCrawlsHealth(): Promise { try { // pool imported from db/pool // Get crawl statistics const statsResult = await pool.query(` SELECT (SELECT MAX(completed_at) FROM dispensary_crawl_jobs WHERE status = 'success') as last_run, (SELECT COUNT(*) FROM dispensary_crawl_jobs WHERE status = 'success' AND completed_at > NOW() - INTERVAL '24 hours') as runs_24h, (SELECT COUNT(*) FROM dispensaries WHERE last_crawl_at > NOW() - INTERVAL '24 hours') as stores_recent, (SELECT COUNT(*) FROM dispensaries WHERE menu_type IS NOT NULL AND platform_dispensary_id IS NOT NULL) as stores_total, (SELECT COUNT(*) FROM dispensaries WHERE menu_type = 'dutchie' AND platform_dispensary_id IS NOT NULL AND (last_crawl_at IS NULL OR last_crawl_at < NOW() - INTERVAL '24 hours')) as stores_stale `); const stats = statsResult.rows[0] || {}; const storesTotal = parseInt(stats.stores_total) || 0; const storesRecent = parseInt(stats.stores_recent) || 0; const staleStores = parseInt(stats.stores_stale) || 0; // Calculate freshness percentage const freshPercent = storesTotal > 0 ? (storesRecent / storesTotal) * 100 : 0; let status: 'ok' | 'degraded' | 'stale' | 'error' = 'ok'; if (freshPercent >= 90) { status = 'ok'; } else if (freshPercent >= 50) { status = 'degraded'; } else { status = 'stale'; } return { status, last_run: stats.last_run?.toISOString() || null, runs_last_24h: parseInt(stats.runs_24h) || 0, stores_with_recent_crawl: storesRecent, stores_total: storesTotal, stale_stores: staleStores, }; } catch (err: any) { return { status: 'error', last_run: null, runs_last_24h: 0, stores_with_recent_crawl: 0, stores_total: 0, stale_stores: 0, }; } } async function getAnalyticsHealth(): Promise { try { // pool imported from db/pool // Check analytics/aggregate job runs const statsResult = await pool.query(` SELECT (SELECT MAX(completed_at) FROM job_run_logs WHERE job_name LIKE '%analytics%' AND status = 'success') as last_aggregate, (SELECT COUNT(DISTINCT DATE(started_at)) FROM job_run_logs WHERE job_name LIKE '%analytics%' AND status = 'success' AND started_at > NOW() - INTERVAL '7 days') as runs_7d `); const stats = statsResult.rows[0] || {}; const runsLast7d = parseInt(stats.runs_7d) || 0; const missingDays = Math.max(0, 7 - runsLast7d); let status: 'ok' | 'degraded' | 'stale' | 'error' = 'ok'; if (missingDays === 0) { status = 'ok'; } else if (missingDays <= 2) { status = 'degraded'; } else { status = 'stale'; } return { status, last_aggregate: stats.last_aggregate?.toISOString() || null, daily_runs_last_7d: runsLast7d, missing_days: missingDays, }; } catch (err: any) { return { status: 'error', last_aggregate: null, daily_runs_last_7d: 0, missing_days: 7, }; } } function determineOverallStatus( api: ApiHealth, db: DbHealth, redis: RedisHealth, workers: WorkersHealth, crawls: CrawlsHealth, analytics: AnalyticsHealth ): 'ok' | 'degraded' | 'error' { const statuses = [api.status, db.status, redis.status, workers.status, crawls.status, analytics.status]; if (statuses.includes('error')) { return 'error'; } if (statuses.includes('degraded') || statuses.includes('stale')) { return 'degraded'; } return 'ok'; } // ============================================================ // Routes // ============================================================ /** * GET /api/health - Quick API health check (no auth required) */ router.get('/', async (_req: Request, res: Response) => { const health = await getApiHealth(); res.json(health); }); /** * GET /api/health/db - Postgres health */ router.get('/db', async (_req: Request, res: Response) => { const health = await getDbHealth(); const statusCode = health.status === 'ok' ? 200 : 503; res.status(statusCode).json(health); }); /** * GET /api/health/redis - Redis health */ router.get('/redis', async (_req: Request, res: Response) => { const health = await getRedisHealth(); const statusCode = health.status === 'ok' ? 200 : health.status === 'degraded' ? 200 : 503; res.status(statusCode).json(health); }); /** * GET /api/health/workers - Queue and worker status */ router.get('/workers', async (_req: Request, res: Response) => { const health = await getWorkersHealth(); const statusCode = health.status === 'ok' ? 200 : health.status === 'degraded' ? 200 : 503; res.status(statusCode).json(health); }); /** * GET /api/health/crawls - Crawl activity summary */ router.get('/crawls', async (_req: Request, res: Response) => { const health = await getCrawlsHealth(); const statusCode = health.status === 'ok' ? 200 : health.status === 'degraded' ? 200 : 503; res.status(statusCode).json(health); }); /** * GET /api/health/analytics - Analytics/aggregates status */ router.get('/analytics', async (_req: Request, res: Response) => { const health = await getAnalyticsHealth(); const statusCode = health.status === 'ok' ? 200 : health.status === 'degraded' ? 200 : 503; res.status(statusCode).json(health); }); /** * GET /api/health/full - Aggregated view of all subsystems */ router.get('/full', async (_req: Request, res: Response) => { const [api, db, redis, workers, crawls, analytics] = await Promise.all([ getApiHealth(), getDbHealth(), getRedisHealth(), getWorkersHealth(), getCrawlsHealth(), getAnalyticsHealth(), ]); const overallStatus = determineOverallStatus(api, db, redis, workers, crawls, analytics); const fullHealth: FullHealth = { status: overallStatus, api, db, redis, workers, crawls, analytics, }; const statusCode = overallStatus === 'ok' ? 200 : overallStatus === 'degraded' ? 200 : 503; res.status(statusCode).json(fullHealth); }); export default router; // Export helper functions for reuse in other modules export { getApiHealth, getDbHealth, getRedisHealth, getWorkersHealth, getCrawlsHealth, getAnalyticsHealth, };