/** * Orchestrator Admin Routes * * Read-only admin API endpoints for the CannaiQ Orchestrator Dashboard. * Provides OBSERVABILITY ONLY - no state changes. */ import { Router, Request, Response } from 'express'; import { pool } from '../db/pool'; import { getLatestTrace, getTracesForDispensary, getTraceById } from '../services/orchestrator-trace'; import { getProviderDisplayName } from '../utils/provider-display'; import * as fs from 'fs'; import * as path from 'path'; const router = Router(); // ============================================================ // ORCHESTRATOR METRICS // ============================================================ /** * GET /api/admin/orchestrator/metrics * Returns nationwide metrics for the orchestrator dashboard */ router.get('/metrics', async (_req: Request, res: Response) => { try { // Get aggregate metrics using 7-stage pipeline const { rows: metrics } = await pool.query(` SELECT (SELECT COUNT(*) FROM store_products) as total_products, (SELECT COUNT(DISTINCT brand_name_raw) FROM store_products WHERE brand_name_raw IS NOT NULL) as total_brands, (SELECT COUNT(*) FROM dispensaries WHERE menu_type = 'dutchie' AND crawl_enabled = true) as total_stores, -- Stage counts from dispensaries table (7-stage pipeline) (SELECT COUNT(*) FROM dispensaries WHERE stage = 'discovered') as discovered_count, (SELECT COUNT(*) FROM dispensaries WHERE stage = 'validated') as validated_count, (SELECT COUNT(*) FROM dispensaries WHERE stage = 'promoted') as promoted_count, (SELECT COUNT(*) FROM dispensaries WHERE stage = 'sandbox') as sandbox_count, (SELECT COUNT(*) FROM dispensaries WHERE stage = 'hydrating') as hydrating_count, (SELECT COUNT(*) FROM dispensaries WHERE stage = 'production') as production_count, (SELECT COUNT(*) FROM dispensaries WHERE stage = 'failing') as failing_count, -- Discovery pipeline counts (SELECT COUNT(*) FROM dutchie_discovery_locations WHERE stage = 'discovered' AND active = true) as discovery_pending `); const row = metrics[0] || {}; res.json({ total_products: parseInt(row.total_products || '0', 10), total_brands: parseInt(row.total_brands || '0', 10), total_stores: parseInt(row.total_stores || '0', 10), // 7-Stage Pipeline Counts stages: { discovered: parseInt(row.discovered_count || '0', 10), validated: parseInt(row.validated_count || '0', 10), promoted: parseInt(row.promoted_count || '0', 10), sandbox: parseInt(row.sandbox_count || '0', 10), hydrating: parseInt(row.hydrating_count || '0', 10), production: parseInt(row.production_count || '0', 10), failing: parseInt(row.failing_count || '0', 10), }, // Discovery pipeline discovery_pending: parseInt(row.discovery_pending || '0', 10), // Legacy compatibility healthy_count: parseInt(row.production_count || '0', 10), sandbox_count: parseInt(row.sandbox_count || '0', 10), needs_manual_count: parseInt(row.failing_count || '0', 10), failing_count: parseInt(row.failing_count || '0', 10), }); } catch (error: any) { console.error('[OrchestratorAdmin] Error fetching metrics:', error.message); res.status(500).json({ error: error.message }); } }); // ============================================================ // STATES LIST // ============================================================ /** * GET /api/admin/orchestrator/states * Returns array of states with at least one crawl-enabled dispensary */ router.get('/states', async (_req: Request, res: Response) => { try { const { rows } = await pool.query(` SELECT DISTINCT state, COUNT(*) as store_count FROM dispensaries WHERE state IS NOT NULL AND crawl_enabled = true GROUP BY state ORDER BY state `); res.json({ states: rows.map((r: any) => ({ state: r.state, storeCount: parseInt(r.store_count || '0', 10), })), }); } catch (error: any) { console.error('[OrchestratorAdmin] Error fetching states:', error.message); res.status(500).json({ error: error.message }); } }); // ============================================================ // STORES LIST // ============================================================ /** * GET /api/admin/orchestrator/stores * Returns list of stores with orchestrator status info * Query params: * - state: Filter by state (e.g., "AZ") * - crawl_enabled: Filter by crawl status (default: true, use "all" to show all, "false" for disabled only) * - limit: Max results (default 100) * - offset: Pagination offset */ router.get('/stores', async (req: Request, res: Response) => { try { const { state, crawl_enabled, limit = '100', offset = '0' } = req.query; let whereClause = 'WHERE 1=1'; const params: any[] = []; let paramIndex = 1; if (state && state !== 'all') { whereClause += ` AND d.state = $${paramIndex}`; params.push(state); paramIndex++; } // Filter by crawl_enabled - defaults to showing only enabled if (crawl_enabled === 'false' || crawl_enabled === '0') { whereClause += ` AND (d.crawl_enabled = false OR d.crawl_enabled IS NULL)`; } else if (crawl_enabled === 'all') { // Show all (no filter) } else { // Default: show only enabled whereClause += ` AND d.crawl_enabled = true`; } params.push(parseInt(limit as string, 10), parseInt(offset as string, 10)); const { rows } = await pool.query(` SELECT d.id, d.name, d.city, d.state, d.menu_type as provider, d.platform_dispensary_id, d.last_crawl_at, d.crawl_enabled, d.stage, d.stage_changed_at, d.first_crawl_at, d.last_successful_crawl_at, dcp.id as profile_id, dcp.profile_key, dcp.consecutive_successes, dcp.consecutive_failures, ( SELECT MAX(cot.completed_at) FROM crawl_orchestration_traces cot WHERE cot.dispensary_id = d.id AND cot.success = true ) as last_success_at, ( SELECT MAX(cot.completed_at) FROM crawl_orchestration_traces cot WHERE cot.dispensary_id = d.id AND cot.success = false ) as last_failure_at, ( SELECT COUNT(*) FROM store_products sp WHERE sp.dispensary_id = d.id ) as product_count FROM dispensaries d LEFT JOIN dispensary_crawler_profiles dcp ON dcp.dispensary_id = d.id AND dcp.enabled = true ${whereClause} ORDER BY d.name LIMIT $${paramIndex} OFFSET $${paramIndex + 1} `, params); // Get total count const { rows: countRows } = await pool.query( `SELECT COUNT(*) as total FROM dispensaries d ${whereClause}`, params.slice(0, -2) ); res.json({ stores: rows.map((r: any) => ({ id: r.id, name: r.name, city: r.city, state: r.state, provider: r.provider || 'unknown', provider_raw: r.provider || null, // Admin routes show actual provider names (not anonymized) provider_display: r.provider || 'Unknown', platformDispensaryId: r.platform_dispensary_id, crawlEnabled: r.crawl_enabled ?? false, // Use stage from dispensaries table (6-stage pipeline) stage: r.stage || 'discovered', stageChangedAt: r.stage_changed_at, firstCrawlAt: r.first_crawl_at, lastSuccessfulCrawlAt: r.last_successful_crawl_at, consecutiveSuccesses: r.consecutive_successes || 0, consecutiveFailures: r.consecutive_failures || 0, profileId: r.profile_id, profileKey: r.profile_key, lastCrawlAt: r.last_crawl_at, lastSuccessAt: r.last_success_at, lastFailureAt: r.last_failure_at, productCount: parseInt(r.product_count || '0', 10), })), total: parseInt(countRows[0]?.total || '0', 10), limit: parseInt(limit as string, 10), offset: parseInt(offset as string, 10), }); } catch (error: any) { console.error('[OrchestratorAdmin] Error fetching stores:', error.message); res.status(500).json({ error: error.message }); } }); // ============================================================ // DISPENSARY TRACE (already exists but adding here for clarity) // ============================================================ /** * GET /api/admin/dispensaries/:id/crawl-trace/latest * Returns the latest orchestrator trace for a dispensary */ router.get('/dispensaries/:id/crawl-trace/latest', async (req: Request, res: Response) => { try { const { id } = req.params; const trace = await getLatestTrace(parseInt(id, 10)); if (!trace) { return res.status(404).json({ error: 'No trace found for this dispensary' }); } res.json(trace); } catch (error: any) { console.error('[OrchestratorAdmin] Error fetching trace:', error.message); res.status(500).json({ error: error.message }); } }); /** * GET /api/admin/dispensaries/:id/crawl-traces * Returns paginated list of traces for a dispensary */ router.get('/dispensaries/:id/crawl-traces', async (req: Request, res: Response) => { try { const { id } = req.params; const { limit = '20', offset = '0' } = req.query; const result = await getTracesForDispensary( parseInt(id, 10), parseInt(limit as string, 10), parseInt(offset as string, 10) ); res.json(result); } catch (error: any) { console.error('[OrchestratorAdmin] Error fetching traces:', error.message); res.status(500).json({ error: error.message }); } }); // ============================================================ // DISPENSARY PROFILE // ============================================================ /** * GET /api/admin/dispensaries/:id/profile * Returns the crawler profile for a dispensary */ router.get('/dispensaries/:id/profile', async (req: Request, res: Response) => { try { const { id } = req.params; const { rows } = await pool.query(` SELECT dcp.id, dcp.dispensary_id, dcp.profile_key, dcp.profile_name, dcp.crawler_type, dcp.version, dcp.status, dcp.config, dcp.enabled, dcp.sandbox_attempts, dcp.created_at, dcp.updated_at, d.name as dispensary_name, d.active_crawler_profile_id FROM dispensary_crawler_profiles dcp JOIN dispensaries d ON d.id = dcp.dispensary_id WHERE dcp.dispensary_id = $1 AND dcp.enabled = true ORDER BY dcp.updated_at DESC LIMIT 1 `, [parseInt(id, 10)]); if (rows.length === 0) { // Return basic dispensary info even if no profile const { rows: dispRows } = await pool.query(` SELECT id, name, active_crawler_profile_id, menu_type, platform_dispensary_id FROM dispensaries WHERE id = $1 `, [parseInt(id, 10)]); if (dispRows.length === 0) { return res.status(404).json({ error: 'Dispensary not found' }); } return res.json({ dispensaryId: dispRows[0].id, dispensaryName: dispRows[0].name, hasProfile: false, activeProfileId: dispRows[0].active_crawler_profile_id, menuType: dispRows[0].menu_type, platformDispensaryId: dispRows[0].platform_dispensary_id, }); } const profile = rows[0]; res.json({ dispensaryId: profile.dispensary_id, dispensaryName: profile.dispensary_name, hasProfile: true, activeProfileId: profile.active_crawler_profile_id, profile: { id: profile.id, profileKey: profile.profile_key, profileName: profile.profile_name, crawlerType: profile.crawler_type, version: profile.version, status: profile.status || profile.config?.status || 'unknown', config: profile.config, enabled: profile.enabled, sandboxAttempts: profile.sandbox_attempts || [], createdAt: profile.created_at, updatedAt: profile.updated_at, }, }); } catch (error: any) { console.error('[OrchestratorAdmin] Error fetching profile:', error.message); res.status(500).json({ error: error.message }); } }); // ============================================================ // CRAWLER MODULE PREVIEW // ============================================================ /** * GET /api/admin/dispensaries/:id/crawler-module * Returns the raw .ts file content for the per-store crawler */ router.get('/dispensaries/:id/crawler-module', async (req: Request, res: Response) => { try { const { id } = req.params; // Get the profile key for this dispensary const { rows } = await pool.query(` SELECT profile_key, crawler_type FROM dispensary_crawler_profiles WHERE dispensary_id = $1 AND enabled = true ORDER BY updated_at DESC LIMIT 1 `, [parseInt(id, 10)]); if (rows.length === 0 || !rows[0].profile_key) { return res.status(404).json({ error: 'No per-store crawler module found for this dispensary', hasModule: false, }); } const profileKey = rows[0].profile_key; const crawlerType = rows[0].crawler_type || 'dutchie'; // Construct file path const modulePath = path.join( __dirname, '..', 'crawlers', crawlerType, 'stores', `${profileKey}.ts` ); // Check if file exists if (!fs.existsSync(modulePath)) { return res.status(404).json({ error: `Crawler module file not found: ${profileKey}.ts`, hasModule: false, expectedPath: `crawlers/${crawlerType}/stores/${profileKey}.ts`, }); } // Read file content const content = fs.readFileSync(modulePath, 'utf-8'); res.json({ hasModule: true, profileKey, crawlerType, fileName: `${profileKey}.ts`, filePath: `crawlers/${crawlerType}/stores/${profileKey}.ts`, content, lines: content.split('\n').length, }); } catch (error: any) { console.error('[OrchestratorAdmin] Error fetching crawler module:', error.message); res.status(500).json({ error: error.message }); } }); // ============================================================ // TRACE BY ID // ============================================================ /** * GET /api/admin/crawl-traces/:traceId * Returns a specific trace by ID */ router.get('/crawl-traces/:traceId', async (req: Request, res: Response) => { try { const { traceId } = req.params; const trace = await getTraceById(parseInt(traceId, 10)); if (!trace) { return res.status(404).json({ error: 'Trace not found' }); } res.json(trace); } catch (error: any) { console.error('[OrchestratorAdmin] Error fetching trace:', error.message); res.status(500).json({ error: error.message }); } }); // ============================================================ // STATUS MANAGEMENT // ============================================================ // 6-Stage Pipeline Statuses const VALID_STAGES = ['discovered', 'validated', 'promoted', 'sandbox', 'production', 'failing'] as const; /** * POST /api/admin/orchestrator/stores/:id/stage * Manually update the stage for a store (use /api/pipeline for proper transitions) * Body: { stage: 'discovered' | 'validated' | 'promoted' | 'sandbox' | 'production' | 'failing', reason?: string } */ router.post('/stores/:id/stage', async (req: Request, res: Response) => { try { const { id } = req.params; const { stage: status, reason } = req.body; if (!status || !VALID_STAGES.includes(status)) { return res.status(400).json({ error: `Invalid stage. Must be one of: ${VALID_STAGES.join(', ')}`, }); } const dispensaryId = parseInt(id, 10); // Get current profile and status const { rows: profileRows } = await pool.query(` SELECT dcp.id, dcp.status as current_status, d.name as dispensary_name FROM dispensary_crawler_profiles dcp JOIN dispensaries d ON d.id = dcp.dispensary_id WHERE dcp.dispensary_id = $1 AND dcp.enabled = true ORDER BY dcp.updated_at DESC LIMIT 1 `, [dispensaryId]); if (profileRows.length === 0) { return res.status(404).json({ error: 'No crawler profile found for this store' }); } const profileId = profileRows[0].id; const currentStatus = profileRows[0].current_status; const dispensaryName = profileRows[0].dispensary_name; // Update the status await pool.query(` UPDATE dispensary_crawler_profiles SET status = $1, status_reason = $2, status_changed_at = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP WHERE id = $3 `, [status, reason || `Manual status change to ${status}`, profileId]); // Create status alert const severity = status === 'production' ? 'info' : status === 'needs_manual' ? 'warning' : status === 'failing' ? 'error' : 'info'; await pool.query(` INSERT INTO crawler_status_alerts (dispensary_id, profile_id, alert_type, severity, message, previous_status, new_status, metadata) VALUES ($1, $2, 'status_change', $3, $4, $5, $6, $7) `, [ dispensaryId, profileId, severity, `${dispensaryName}: Status changed from ${currentStatus || 'unknown'} to ${status}`, currentStatus, status, JSON.stringify({ reason, changedBy: 'admin_api' }), ]); res.json({ success: true, dispensaryId, profileId, previousStatus: currentStatus, newStatus: status, message: `Status updated to ${status}`, }); } catch (error: any) { console.error('[OrchestratorAdmin] Error updating status:', error.message); res.status(500).json({ error: error.message }); } }); /** * GET /api/admin/orchestrator/alerts * Get recent status alerts for the dashboard * Query params: * - severity: Filter by severity (info, warning, error, critical) * - acknowledged: Filter by acknowledged status (true/false) * - limit: Max results (default 50) */ router.get('/alerts', async (req: Request, res: Response) => { try { const { severity, acknowledged, dispensary_id, limit = '50' } = req.query; let whereClause = 'WHERE 1=1'; const params: any[] = []; let paramIndex = 1; if (severity) { whereClause += ` AND csa.severity = $${paramIndex}`; params.push(severity); paramIndex++; } if (acknowledged === 'true') { whereClause += ' AND csa.acknowledged = true'; } else if (acknowledged === 'false') { whereClause += ' AND csa.acknowledged = false'; } if (dispensary_id) { whereClause += ` AND csa.dispensary_id = $${paramIndex}`; params.push(parseInt(dispensary_id as string, 10)); paramIndex++; } params.push(parseInt(limit as string, 10)); const { rows } = await pool.query(` SELECT csa.*, d.name as dispensary_name, d.city, d.state FROM crawler_status_alerts csa LEFT JOIN dispensaries d ON csa.dispensary_id = d.id ${whereClause} ORDER BY csa.created_at DESC LIMIT $${paramIndex} `, params); // Get unacknowledged count by severity const { rows: countRows } = await pool.query(` SELECT severity, COUNT(*) as count FROM crawler_status_alerts WHERE acknowledged = false GROUP BY severity `); const unacknowledgedCounts = countRows.reduce((acc: Record, row: any) => { acc[row.severity] = parseInt(row.count, 10); return acc; }, {}); res.json({ alerts: rows.map((r: any) => ({ id: r.id, dispensaryId: r.dispensary_id, dispensaryName: r.dispensary_name, city: r.city, state: r.state, profileId: r.profile_id, alertType: r.alert_type, severity: r.severity, message: r.message, previousStatus: r.previous_status, newStatus: r.new_status, errorDetails: r.error_details, metadata: r.metadata, acknowledged: r.acknowledged, acknowledgedAt: r.acknowledged_at, acknowledgedBy: r.acknowledged_by, createdAt: r.created_at, })), unacknowledgedCounts, }); } catch (error: any) { console.error('[OrchestratorAdmin] Error fetching alerts:', error.message); res.status(500).json({ error: error.message }); } }); /** * POST /api/admin/orchestrator/alerts/:id/acknowledge * Acknowledge an alert */ router.post('/alerts/:id/acknowledge', async (req: Request, res: Response) => { try { const { id } = req.params; const { acknowledgedBy = 'admin' } = req.body; await pool.query(` UPDATE crawler_status_alerts SET acknowledged = true, acknowledged_at = CURRENT_TIMESTAMP, acknowledged_by = $1 WHERE id = $2 `, [acknowledgedBy, parseInt(id, 10)]); res.json({ success: true, alertId: parseInt(id, 10) }); } catch (error: any) { console.error('[OrchestratorAdmin] Error acknowledging alert:', error.message); res.status(500).json({ error: error.message }); } }); /** * POST /api/admin/orchestrator/alerts/acknowledge-all * Acknowledge all unacknowledged alerts (optionally filtered) */ router.post('/alerts/acknowledge-all', async (req: Request, res: Response) => { try { const { severity, dispensaryId, acknowledgedBy = 'admin' } = req.body; let whereClause = 'WHERE acknowledged = false'; const params: any[] = [acknowledgedBy]; let paramIndex = 2; if (severity) { whereClause += ` AND severity = $${paramIndex}`; params.push(severity); paramIndex++; } if (dispensaryId) { whereClause += ` AND dispensary_id = $${paramIndex}`; params.push(dispensaryId); paramIndex++; } const result = await pool.query(` UPDATE crawler_status_alerts SET acknowledged = true, acknowledged_at = CURRENT_TIMESTAMP, acknowledged_by = $1 ${whereClause} `, params); res.json({ success: true, acknowledgedCount: result.rowCount }); } catch (error: any) { console.error('[OrchestratorAdmin] Error acknowledging alerts:', error.message); res.status(500).json({ error: error.message }); } }); /** * POST /api/admin/orchestrator/crawl-outcome * Record a crawl outcome and update status based on success/failure * This endpoint is called by the crawler after each crawl attempt */ router.post('/crawl-outcome', async (req: Request, res: Response) => { try { const { dispensaryId, success, productsFound = 0, error, metadata = {}, } = req.body; if (!dispensaryId) { return res.status(400).json({ error: 'dispensaryId is required' }); } // Get current profile const { rows: profileRows } = await pool.query(` SELECT dcp.id, dcp.status, dcp.consecutive_successes, dcp.consecutive_failures, d.name as dispensary_name FROM dispensary_crawler_profiles dcp JOIN dispensaries d ON d.id = dcp.dispensary_id WHERE dcp.dispensary_id = $1 AND dcp.enabled = true ORDER BY dcp.updated_at DESC LIMIT 1 `, [dispensaryId]); if (profileRows.length === 0) { return res.status(404).json({ error: 'No crawler profile found' }); } const profile = profileRows[0]; const currentStatus = profile.status; let newStatus = currentStatus; let statusChanged = false; let consecutiveSuccesses = profile.consecutive_successes || 0; let consecutiveFailures = profile.consecutive_failures || 0; if (success) { consecutiveSuccesses++; consecutiveFailures = 0; // Auto-promote from sandbox to production after 3 consecutive successes if (currentStatus === 'sandbox' && consecutiveSuccesses >= 3) { newStatus = 'production'; statusChanged = true; } // Auto-recover from needs_manual/failing after 2 consecutive successes else if ((currentStatus === 'needs_manual' || currentStatus === 'failing') && consecutiveSuccesses >= 2) { newStatus = 'production'; statusChanged = true; } } else { consecutiveFailures++; consecutiveSuccesses = 0; // Demote to needs_manual after 2 consecutive failures if (currentStatus === 'production' && consecutiveFailures >= 2) { newStatus = 'needs_manual'; statusChanged = true; } // Demote to failing after 5 consecutive failures else if (currentStatus === 'needs_manual' && consecutiveFailures >= 5) { newStatus = 'failing'; statusChanged = true; } // Keep sandbox as sandbox even with failures (needs manual intervention to fix) else if (currentStatus === 'sandbox' && consecutiveFailures >= 3) { newStatus = 'needs_manual'; statusChanged = true; } } // Update profile await pool.query(` UPDATE dispensary_crawler_profiles SET consecutive_successes = $1, consecutive_failures = $2, status = $3, status_reason = CASE WHEN $4 THEN $5 ELSE status_reason END, status_changed_at = CASE WHEN $4 THEN CURRENT_TIMESTAMP ELSE status_changed_at END, updated_at = CURRENT_TIMESTAMP WHERE id = $6 `, [ consecutiveSuccesses, consecutiveFailures, newStatus, statusChanged, statusChanged ? (success ? 'Auto-promoted after consecutive successes' : `Auto-demoted after ${consecutiveFailures} consecutive failures`) : null, profile.id, ]); // Create alert if status changed or error occurred if (statusChanged) { const severity = newStatus === 'production' ? 'info' : newStatus === 'needs_manual' ? 'warning' : 'error'; await pool.query(` INSERT INTO crawler_status_alerts (dispensary_id, profile_id, alert_type, severity, message, previous_status, new_status, metadata) VALUES ($1, $2, 'status_change', $3, $4, $5, $6, $7) `, [ dispensaryId, profile.id, severity, `${profile.dispensary_name}: ${success ? 'Promoted' : 'Demoted'} from ${currentStatus} to ${newStatus}`, currentStatus, newStatus, JSON.stringify({ productsFound, consecutiveSuccesses, consecutiveFailures, ...metadata }), ]); } else if (!success && error) { // Log crawl error as alert await pool.query(` INSERT INTO crawler_status_alerts (dispensary_id, profile_id, alert_type, severity, message, error_details, metadata) VALUES ($1, $2, 'crawl_error', $3, $4, $5, $6) `, [ dispensaryId, profile.id, consecutiveFailures >= 2 ? 'warning' : 'info', `${profile.dispensary_name}: Crawl failed - ${error}`, JSON.stringify({ error, stack: metadata.stack }), JSON.stringify({ consecutiveFailures, ...metadata }), ]); } res.json({ success: true, dispensaryId, profileId: profile.id, statusChanged, previousStatus: currentStatus, newStatus, consecutiveSuccesses, consecutiveFailures, }); } catch (error: any) { console.error('[OrchestratorAdmin] Error recording crawl outcome:', error.message); res.status(500).json({ error: error.message }); } }); export default router;