Reads APP_GIT_SHA and APP_BUILD_TIME env vars set during Docker build. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
462 lines
13 KiB
TypeScript
462 lines
13 KiB
TypeScript
/**
|
|
* Health Check Routes
|
|
*
|
|
* Comprehensive health endpoints for monitoring API, DB, Redis, Workers, Crawls, and Analytics.
|
|
*
|
|
* Endpoints:
|
|
* GET /api/health - Quick API health check
|
|
* GET /api/health/db - Postgres health
|
|
* GET /api/health/redis - Redis health
|
|
* GET /api/health/workers - Queue and worker status
|
|
* GET /api/health/crawls - Crawl activity summary
|
|
* GET /api/health/analytics - Analytics/aggregates status
|
|
* GET /api/health/full - Aggregated view of all subsystems
|
|
*/
|
|
|
|
import { Router, Request, Response } from 'express';
|
|
import { pool } from '../db/pool';
|
|
import { getRedis } from '../lib/redis';
|
|
import * as fs from 'fs';
|
|
import * as path from 'path';
|
|
|
|
const router = Router();
|
|
|
|
// Read package version
|
|
let packageVersion = '1.0.0';
|
|
try {
|
|
const packagePath = path.join(__dirname, '../../package.json');
|
|
if (fs.existsSync(packagePath)) {
|
|
const pkg = JSON.parse(fs.readFileSync(packagePath, 'utf8'));
|
|
packageVersion = pkg.version || '1.0.0';
|
|
}
|
|
} catch {
|
|
// Ignore errors reading package.json
|
|
}
|
|
|
|
// Store server start time for uptime calculation
|
|
const serverStartTime = Date.now();
|
|
|
|
// Types
|
|
interface HealthStatus {
|
|
status: 'ok' | 'degraded' | 'error' | 'stale';
|
|
}
|
|
|
|
interface ApiHealth extends HealthStatus {
|
|
uptime: number;
|
|
timestamp: string;
|
|
version: string;
|
|
build_sha: string | null;
|
|
build_time: string | null;
|
|
}
|
|
|
|
interface DbHealth extends HealthStatus {
|
|
connected: boolean;
|
|
latency_ms: number;
|
|
error?: string;
|
|
}
|
|
|
|
interface RedisHealth extends HealthStatus {
|
|
connected: boolean;
|
|
latency_ms: number;
|
|
error?: string;
|
|
}
|
|
|
|
interface QueueInfo {
|
|
name: string;
|
|
waiting: number;
|
|
active: number;
|
|
completed: number;
|
|
failed: number;
|
|
paused: boolean;
|
|
}
|
|
|
|
interface WorkerInfo {
|
|
id: string;
|
|
queue: string;
|
|
status: string;
|
|
last_heartbeat?: string;
|
|
}
|
|
|
|
interface WorkersHealth extends HealthStatus {
|
|
queues: QueueInfo[];
|
|
workers: WorkerInfo[];
|
|
}
|
|
|
|
interface CrawlsHealth extends HealthStatus {
|
|
last_run: string | null;
|
|
runs_last_24h: number;
|
|
stores_with_recent_crawl: number;
|
|
stores_total: number;
|
|
stale_stores: number;
|
|
}
|
|
|
|
interface AnalyticsHealth extends HealthStatus {
|
|
last_aggregate: string | null;
|
|
daily_runs_last_7d: number;
|
|
missing_days: number;
|
|
}
|
|
|
|
interface FullHealth extends HealthStatus {
|
|
api: ApiHealth;
|
|
db: DbHealth;
|
|
redis: RedisHealth;
|
|
workers: WorkersHealth;
|
|
crawls: CrawlsHealth;
|
|
analytics: AnalyticsHealth;
|
|
}
|
|
|
|
// ============================================================
|
|
// Helper Functions
|
|
// ============================================================
|
|
|
|
async function getApiHealth(): Promise<ApiHealth> {
|
|
return {
|
|
status: 'ok',
|
|
uptime: Math.floor((Date.now() - serverStartTime) / 1000),
|
|
timestamp: new Date().toISOString(),
|
|
version: packageVersion,
|
|
build_sha: process.env.APP_GIT_SHA && process.env.APP_GIT_SHA !== 'unknown' ? process.env.APP_GIT_SHA : null,
|
|
build_time: process.env.APP_BUILD_TIME && process.env.APP_BUILD_TIME !== 'unknown' ? process.env.APP_BUILD_TIME : null,
|
|
};
|
|
}
|
|
|
|
async function getDbHealth(): Promise<DbHealth> {
|
|
const start = Date.now();
|
|
try {
|
|
// pool imported from db/pool
|
|
await pool.query('SELECT 1');
|
|
return {
|
|
status: 'ok',
|
|
connected: true,
|
|
latency_ms: Date.now() - start,
|
|
};
|
|
} catch (err: any) {
|
|
return {
|
|
status: 'error',
|
|
connected: false,
|
|
latency_ms: Date.now() - start,
|
|
error: err.message || 'Database connection failed',
|
|
};
|
|
}
|
|
}
|
|
|
|
async function getRedisHealth(): Promise<RedisHealth> {
|
|
const start = Date.now();
|
|
const isLocal = process.env.NODE_ENV === 'development' || process.env.NODE_ENV === 'local' || !process.env.NODE_ENV;
|
|
|
|
// Check if Redis is configured
|
|
if (!process.env.REDIS_URL && !process.env.REDIS_HOST) {
|
|
// Redis is optional in local dev, required in prod/staging
|
|
return {
|
|
status: isLocal ? 'ok' : 'error',
|
|
connected: false,
|
|
latency_ms: 0,
|
|
error: isLocal ? 'Redis not configured (optional in local)' : 'Redis not configured (required in production)',
|
|
};
|
|
}
|
|
|
|
try {
|
|
const redis = getRedis();
|
|
// Use a timeout to prevent hanging
|
|
const pingPromise = redis.ping();
|
|
const timeoutPromise = new Promise<never>((_, reject) =>
|
|
setTimeout(() => reject(new Error('Redis ping timeout')), 3000)
|
|
);
|
|
|
|
await Promise.race([pingPromise, timeoutPromise]);
|
|
return {
|
|
status: 'ok',
|
|
connected: true,
|
|
latency_ms: Date.now() - start,
|
|
};
|
|
} catch (err: any) {
|
|
return {
|
|
status: 'degraded',
|
|
connected: false,
|
|
latency_ms: Date.now() - start,
|
|
error: err.message || 'Redis ping failed',
|
|
};
|
|
}
|
|
}
|
|
|
|
async function getWorkersHealth(): Promise<WorkersHealth> {
|
|
try {
|
|
// pool imported from db/pool
|
|
|
|
// Get queue stats from v_queue_stats view or equivalent
|
|
const queueStatsResult = await pool.query(`
|
|
SELECT
|
|
job_type as name,
|
|
COUNT(*) FILTER (WHERE status = 'pending') as waiting,
|
|
COUNT(*) FILTER (WHERE status = 'running') as active,
|
|
COUNT(*) FILTER (WHERE status = 'success') as completed,
|
|
COUNT(*) FILTER (WHERE status IN ('error', 'failed')) as failed,
|
|
false as paused
|
|
FROM dispensary_crawl_jobs
|
|
WHERE created_at > NOW() - INTERVAL '7 days'
|
|
GROUP BY job_type
|
|
`);
|
|
|
|
const queues: QueueInfo[] = queueStatsResult.rows.map((row: any) => ({
|
|
name: row.name || 'unknown',
|
|
waiting: parseInt(row.waiting) || 0,
|
|
active: parseInt(row.active) || 0,
|
|
completed: parseInt(row.completed) || 0,
|
|
failed: parseInt(row.failed) || 0,
|
|
paused: row.paused || false,
|
|
}));
|
|
|
|
// Get active workers from job_schedules or active heartbeats
|
|
const workersResult = await pool.query(`
|
|
SELECT
|
|
COALESCE(job_config->>'worker_name', job_name) as id,
|
|
job_name as queue,
|
|
CASE WHEN enabled THEN 'connected' ELSE 'disconnected' END as status,
|
|
last_run_at as last_heartbeat
|
|
FROM job_schedules
|
|
WHERE enabled = true
|
|
ORDER BY last_run_at DESC NULLS LAST
|
|
LIMIT 20
|
|
`);
|
|
|
|
const workers: WorkerInfo[] = workersResult.rows.map((row: any) => ({
|
|
id: row.id,
|
|
queue: row.queue,
|
|
status: row.status,
|
|
last_heartbeat: row.last_heartbeat?.toISOString() || undefined,
|
|
}));
|
|
|
|
// Determine overall status
|
|
const hasActiveWorkers = workers.length > 0;
|
|
const hasFailedJobs = queues.some((q) => q.failed > 0);
|
|
const hasStuckJobs = queues.some((q) => q.active > 5); // Arbitrary threshold
|
|
|
|
let status: 'ok' | 'degraded' | 'error' = 'ok';
|
|
if (!hasActiveWorkers) {
|
|
status = 'degraded';
|
|
} else if (hasFailedJobs || hasStuckJobs) {
|
|
status = 'degraded';
|
|
}
|
|
|
|
return {
|
|
status,
|
|
queues,
|
|
workers,
|
|
};
|
|
} catch (err: any) {
|
|
return {
|
|
status: 'error',
|
|
queues: [],
|
|
workers: [],
|
|
};
|
|
}
|
|
}
|
|
|
|
async function getCrawlsHealth(): Promise<CrawlsHealth> {
|
|
try {
|
|
// pool imported from db/pool
|
|
|
|
// Get crawl statistics
|
|
const statsResult = await pool.query(`
|
|
SELECT
|
|
(SELECT MAX(completed_at) FROM dispensary_crawl_jobs WHERE status = 'success') as last_run,
|
|
(SELECT COUNT(*) FROM dispensary_crawl_jobs WHERE status = 'success' AND completed_at > NOW() - INTERVAL '24 hours') as runs_24h,
|
|
(SELECT COUNT(*) FROM dispensaries WHERE last_crawl_at > NOW() - INTERVAL '24 hours') as stores_recent,
|
|
(SELECT COUNT(*) FROM dispensaries WHERE menu_type IS NOT NULL AND platform_dispensary_id IS NOT NULL) as stores_total,
|
|
(SELECT COUNT(*) FROM dispensaries WHERE menu_type = 'dutchie' AND platform_dispensary_id IS NOT NULL AND (last_crawl_at IS NULL OR last_crawl_at < NOW() - INTERVAL '24 hours')) as stores_stale
|
|
`);
|
|
|
|
const stats = statsResult.rows[0] || {};
|
|
const storesTotal = parseInt(stats.stores_total) || 0;
|
|
const storesRecent = parseInt(stats.stores_recent) || 0;
|
|
const staleStores = parseInt(stats.stores_stale) || 0;
|
|
|
|
// Calculate freshness percentage
|
|
const freshPercent = storesTotal > 0 ? (storesRecent / storesTotal) * 100 : 0;
|
|
|
|
let status: 'ok' | 'degraded' | 'stale' | 'error' = 'ok';
|
|
if (freshPercent >= 90) {
|
|
status = 'ok';
|
|
} else if (freshPercent >= 50) {
|
|
status = 'degraded';
|
|
} else {
|
|
status = 'stale';
|
|
}
|
|
|
|
return {
|
|
status,
|
|
last_run: stats.last_run?.toISOString() || null,
|
|
runs_last_24h: parseInt(stats.runs_24h) || 0,
|
|
stores_with_recent_crawl: storesRecent,
|
|
stores_total: storesTotal,
|
|
stale_stores: staleStores,
|
|
};
|
|
} catch (err: any) {
|
|
return {
|
|
status: 'error',
|
|
last_run: null,
|
|
runs_last_24h: 0,
|
|
stores_with_recent_crawl: 0,
|
|
stores_total: 0,
|
|
stale_stores: 0,
|
|
};
|
|
}
|
|
}
|
|
|
|
async function getAnalyticsHealth(): Promise<AnalyticsHealth> {
|
|
try {
|
|
// pool imported from db/pool
|
|
|
|
// Check analytics/aggregate job runs
|
|
const statsResult = await pool.query(`
|
|
SELECT
|
|
(SELECT MAX(completed_at) FROM job_run_logs WHERE job_name LIKE '%analytics%' AND status = 'success') as last_aggregate,
|
|
(SELECT COUNT(DISTINCT DATE(started_at)) FROM job_run_logs WHERE job_name LIKE '%analytics%' AND status = 'success' AND started_at > NOW() - INTERVAL '7 days') as runs_7d
|
|
`);
|
|
|
|
const stats = statsResult.rows[0] || {};
|
|
const runsLast7d = parseInt(stats.runs_7d) || 0;
|
|
const missingDays = Math.max(0, 7 - runsLast7d);
|
|
|
|
let status: 'ok' | 'degraded' | 'stale' | 'error' = 'ok';
|
|
if (missingDays === 0) {
|
|
status = 'ok';
|
|
} else if (missingDays <= 2) {
|
|
status = 'degraded';
|
|
} else {
|
|
status = 'stale';
|
|
}
|
|
|
|
return {
|
|
status,
|
|
last_aggregate: stats.last_aggregate?.toISOString() || null,
|
|
daily_runs_last_7d: runsLast7d,
|
|
missing_days: missingDays,
|
|
};
|
|
} catch (err: any) {
|
|
return {
|
|
status: 'error',
|
|
last_aggregate: null,
|
|
daily_runs_last_7d: 0,
|
|
missing_days: 7,
|
|
};
|
|
}
|
|
}
|
|
|
|
function determineOverallStatus(
|
|
api: ApiHealth,
|
|
db: DbHealth,
|
|
redis: RedisHealth,
|
|
workers: WorkersHealth,
|
|
crawls: CrawlsHealth,
|
|
analytics: AnalyticsHealth
|
|
): 'ok' | 'degraded' | 'error' {
|
|
const statuses = [api.status, db.status, redis.status, workers.status, crawls.status, analytics.status];
|
|
|
|
if (statuses.includes('error')) {
|
|
return 'error';
|
|
}
|
|
if (statuses.includes('degraded') || statuses.includes('stale')) {
|
|
return 'degraded';
|
|
}
|
|
return 'ok';
|
|
}
|
|
|
|
// ============================================================
|
|
// Routes
|
|
// ============================================================
|
|
|
|
/**
|
|
* GET /api/health - Quick API health check (no auth required)
|
|
*/
|
|
router.get('/', async (_req: Request, res: Response) => {
|
|
const health = await getApiHealth();
|
|
res.json(health);
|
|
});
|
|
|
|
/**
|
|
* GET /api/health/db - Postgres health
|
|
*/
|
|
router.get('/db', async (_req: Request, res: Response) => {
|
|
const health = await getDbHealth();
|
|
const statusCode = health.status === 'ok' ? 200 : 503;
|
|
res.status(statusCode).json(health);
|
|
});
|
|
|
|
/**
|
|
* GET /api/health/redis - Redis health
|
|
*/
|
|
router.get('/redis', async (_req: Request, res: Response) => {
|
|
const health = await getRedisHealth();
|
|
const statusCode = health.status === 'ok' ? 200 : health.status === 'degraded' ? 200 : 503;
|
|
res.status(statusCode).json(health);
|
|
});
|
|
|
|
/**
|
|
* GET /api/health/workers - Queue and worker status
|
|
*/
|
|
router.get('/workers', async (_req: Request, res: Response) => {
|
|
const health = await getWorkersHealth();
|
|
const statusCode = health.status === 'ok' ? 200 : health.status === 'degraded' ? 200 : 503;
|
|
res.status(statusCode).json(health);
|
|
});
|
|
|
|
/**
|
|
* GET /api/health/crawls - Crawl activity summary
|
|
*/
|
|
router.get('/crawls', async (_req: Request, res: Response) => {
|
|
const health = await getCrawlsHealth();
|
|
const statusCode = health.status === 'ok' ? 200 : health.status === 'degraded' ? 200 : 503;
|
|
res.status(statusCode).json(health);
|
|
});
|
|
|
|
/**
|
|
* GET /api/health/analytics - Analytics/aggregates status
|
|
*/
|
|
router.get('/analytics', async (_req: Request, res: Response) => {
|
|
const health = await getAnalyticsHealth();
|
|
const statusCode = health.status === 'ok' ? 200 : health.status === 'degraded' ? 200 : 503;
|
|
res.status(statusCode).json(health);
|
|
});
|
|
|
|
/**
|
|
* GET /api/health/full - Aggregated view of all subsystems
|
|
*/
|
|
router.get('/full', async (_req: Request, res: Response) => {
|
|
const [api, db, redis, workers, crawls, analytics] = await Promise.all([
|
|
getApiHealth(),
|
|
getDbHealth(),
|
|
getRedisHealth(),
|
|
getWorkersHealth(),
|
|
getCrawlsHealth(),
|
|
getAnalyticsHealth(),
|
|
]);
|
|
|
|
const overallStatus = determineOverallStatus(api, db, redis, workers, crawls, analytics);
|
|
|
|
const fullHealth: FullHealth = {
|
|
status: overallStatus,
|
|
api,
|
|
db,
|
|
redis,
|
|
workers,
|
|
crawls,
|
|
analytics,
|
|
};
|
|
|
|
const statusCode = overallStatus === 'ok' ? 200 : overallStatus === 'degraded' ? 200 : 503;
|
|
res.status(statusCode).json(fullHealth);
|
|
});
|
|
|
|
export default router;
|
|
|
|
// Export helper functions for reuse in other modules
|
|
export {
|
|
getApiHealth,
|
|
getDbHealth,
|
|
getRedisHealth,
|
|
getWorkersHealth,
|
|
getCrawlsHealth,
|
|
getAnalyticsHealth,
|
|
};
|