feat: Add v2 architecture with multi-state support and orchestrator services

Major additions:
- Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare
- Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator
- Discovery system: dutchie discovery service, geo validation, city seeding scripts
- Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages
- Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram)
- Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata

Frontend pages added:
- Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores
- StateHeatmap, CrossStateCompare, SyncInfoPanel

Components added:
- StateSelector, OrchestratorTraceModal, WorkflowStepper

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-07 11:30:57 -07:00
parent 8ac64ba077
commit b4a2fb7d03
248 changed files with 60714 additions and 666 deletions

View File

@@ -0,0 +1,430 @@
/**
* Orchestrator Admin Routes
*
* Read-only admin API endpoints for the CannaiQ Orchestrator Dashboard.
* Provides OBSERVABILITY ONLY - no state changes.
*/
import { Router, Request, Response } from 'express';
import { pool } from '../db/pool';
import { getLatestTrace, getTracesForDispensary, getTraceById } from '../services/orchestrator-trace';
import { getProviderDisplayName } from '../utils/provider-display';
import * as fs from 'fs';
import * as path from 'path';
const router = Router();
// ============================================================
// ORCHESTRATOR METRICS
// ============================================================
/**
* GET /api/admin/orchestrator/metrics
* Returns nationwide metrics for the orchestrator dashboard
*/
router.get('/metrics', async (_req: Request, res: Response) => {
try {
// Get aggregate metrics
const { rows: metrics } = await pool.query(`
SELECT
(SELECT COUNT(*) FROM dutchie_products) as total_products,
(SELECT COUNT(DISTINCT brand_name) FROM dutchie_products WHERE brand_name IS NOT NULL) as total_brands,
(SELECT COUNT(*) FROM dispensaries WHERE state = 'AZ') as total_stores,
(
SELECT COUNT(*)
FROM dispensary_crawler_profiles dcp
WHERE dcp.enabled = true
AND (dcp.status = 'production' OR (dcp.config->>'status')::text = 'production')
) as healthy_count,
(
SELECT COUNT(*)
FROM dispensary_crawler_profiles dcp
WHERE dcp.enabled = true
AND (dcp.status = 'sandbox' OR (dcp.config->>'status')::text = 'sandbox')
) as sandbox_count,
(
SELECT COUNT(*)
FROM dispensary_crawler_profiles dcp
WHERE dcp.enabled = true
AND (dcp.status = 'needs_manual' OR (dcp.config->>'status')::text = 'needs_manual')
) as needs_manual_count,
(
SELECT COUNT(*)
FROM dispensary_crawler_profiles dcp
JOIN dispensaries d ON d.id = dcp.dispensary_id
WHERE d.state = 'AZ'
AND dcp.status = 'needs_manual'
) as failing_count
`);
const row = metrics[0] || {};
res.json({
total_products: parseInt(row.total_products || '0', 10),
total_brands: parseInt(row.total_brands || '0', 10),
total_stores: parseInt(row.total_stores || '0', 10),
// Placeholder sentiment values - these would come from actual analytics
market_sentiment: 'neutral',
market_direction: 'stable',
// Health counts
healthy_count: parseInt(row.healthy_count || '0', 10),
sandbox_count: parseInt(row.sandbox_count || '0', 10),
needs_manual_count: parseInt(row.needs_manual_count || '0', 10),
failing_count: parseInt(row.failing_count || '0', 10),
});
} catch (error: any) {
console.error('[OrchestratorAdmin] Error fetching metrics:', error.message);
res.status(500).json({ error: error.message });
}
});
// ============================================================
// STATES LIST
// ============================================================
/**
* GET /api/admin/orchestrator/states
* Returns array of states with at least one known dispensary
*/
router.get('/states', async (_req: Request, res: Response) => {
try {
const { rows } = await pool.query(`
SELECT DISTINCT state, COUNT(*) as store_count
FROM dispensaries
WHERE state IS NOT NULL
GROUP BY state
ORDER BY state
`);
res.json({
states: rows.map((r: any) => ({
state: r.state,
storeCount: parseInt(r.store_count || '0', 10),
})),
});
} catch (error: any) {
console.error('[OrchestratorAdmin] Error fetching states:', error.message);
res.status(500).json({ error: error.message });
}
});
// ============================================================
// STORES LIST
// ============================================================
/**
* GET /api/admin/orchestrator/stores
* Returns list of stores with orchestrator status info
* Query params:
* - state: Filter by state (e.g., "AZ")
* - limit: Max results (default 100)
* - offset: Pagination offset
*/
router.get('/stores', async (req: Request, res: Response) => {
try {
const { state, limit = '100', offset = '0' } = req.query;
let whereClause = 'WHERE 1=1';
const params: any[] = [];
let paramIndex = 1;
if (state && state !== 'all') {
whereClause += ` AND d.state = $${paramIndex}`;
params.push(state);
paramIndex++;
}
params.push(parseInt(limit as string, 10), parseInt(offset as string, 10));
const { rows } = await pool.query(`
SELECT
d.id,
d.name,
d.city,
d.state,
d.menu_type as provider,
d.platform_dispensary_id,
d.last_crawl_at,
dcp.id as profile_id,
dcp.profile_key,
COALESCE(dcp.status, dcp.config->>'status', 'legacy') as crawler_status,
(
SELECT MAX(cot.completed_at)
FROM crawl_orchestration_traces cot
WHERE cot.dispensary_id = d.id AND cot.success = true
) as last_success_at,
(
SELECT MAX(cot.completed_at)
FROM crawl_orchestration_traces cot
WHERE cot.dispensary_id = d.id AND cot.success = false
) as last_failure_at,
(
SELECT COUNT(*)
FROM dutchie_products dp
WHERE dp.dispensary_id = d.id
) as product_count
FROM dispensaries d
LEFT JOIN dispensary_crawler_profiles dcp
ON dcp.dispensary_id = d.id AND dcp.enabled = true
${whereClause}
ORDER BY d.name
LIMIT $${paramIndex} OFFSET $${paramIndex + 1}
`, params);
// Get total count
const { rows: countRows } = await pool.query(
`SELECT COUNT(*) as total FROM dispensaries d ${whereClause}`,
params.slice(0, -2)
);
res.json({
stores: rows.map((r: any) => ({
id: r.id,
name: r.name,
city: r.city,
state: r.state,
provider: r.provider || 'unknown',
provider_raw: r.provider || null,
provider_display: getProviderDisplayName(r.provider),
platformDispensaryId: r.platform_dispensary_id,
status: r.crawler_status || (r.platform_dispensary_id ? 'legacy' : 'pending'),
profileId: r.profile_id,
profileKey: r.profile_key,
lastCrawlAt: r.last_crawl_at,
lastSuccessAt: r.last_success_at,
lastFailureAt: r.last_failure_at,
productCount: parseInt(r.product_count || '0', 10),
})),
total: parseInt(countRows[0]?.total || '0', 10),
limit: parseInt(limit as string, 10),
offset: parseInt(offset as string, 10),
});
} catch (error: any) {
console.error('[OrchestratorAdmin] Error fetching stores:', error.message);
res.status(500).json({ error: error.message });
}
});
// ============================================================
// DISPENSARY TRACE (already exists but adding here for clarity)
// ============================================================
/**
* GET /api/admin/dispensaries/:id/crawl-trace/latest
* Returns the latest orchestrator trace for a dispensary
*/
router.get('/dispensaries/:id/crawl-trace/latest', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const trace = await getLatestTrace(parseInt(id, 10));
if (!trace) {
return res.status(404).json({ error: 'No trace found for this dispensary' });
}
res.json(trace);
} catch (error: any) {
console.error('[OrchestratorAdmin] Error fetching trace:', error.message);
res.status(500).json({ error: error.message });
}
});
/**
* GET /api/admin/dispensaries/:id/crawl-traces
* Returns paginated list of traces for a dispensary
*/
router.get('/dispensaries/:id/crawl-traces', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { limit = '20', offset = '0' } = req.query;
const result = await getTracesForDispensary(
parseInt(id, 10),
parseInt(limit as string, 10),
parseInt(offset as string, 10)
);
res.json(result);
} catch (error: any) {
console.error('[OrchestratorAdmin] Error fetching traces:', error.message);
res.status(500).json({ error: error.message });
}
});
// ============================================================
// DISPENSARY PROFILE
// ============================================================
/**
* GET /api/admin/dispensaries/:id/profile
* Returns the crawler profile for a dispensary
*/
router.get('/dispensaries/:id/profile', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { rows } = await pool.query(`
SELECT
dcp.id,
dcp.dispensary_id,
dcp.profile_key,
dcp.profile_name,
dcp.platform,
dcp.version,
dcp.status,
dcp.config,
dcp.enabled,
dcp.sandbox_attempt_count,
dcp.next_retry_at,
dcp.created_at,
dcp.updated_at,
d.name as dispensary_name,
d.active_crawler_profile_id
FROM dispensary_crawler_profiles dcp
JOIN dispensaries d ON d.id = dcp.dispensary_id
WHERE dcp.dispensary_id = $1 AND dcp.enabled = true
ORDER BY dcp.updated_at DESC
LIMIT 1
`, [parseInt(id, 10)]);
if (rows.length === 0) {
// Return basic dispensary info even if no profile
const { rows: dispRows } = await pool.query(`
SELECT id, name, active_crawler_profile_id, menu_type, platform_dispensary_id
FROM dispensaries WHERE id = $1
`, [parseInt(id, 10)]);
if (dispRows.length === 0) {
return res.status(404).json({ error: 'Dispensary not found' });
}
return res.json({
dispensaryId: dispRows[0].id,
dispensaryName: dispRows[0].name,
hasProfile: false,
activeProfileId: dispRows[0].active_crawler_profile_id,
menuType: dispRows[0].menu_type,
platformDispensaryId: dispRows[0].platform_dispensary_id,
});
}
const profile = rows[0];
res.json({
dispensaryId: profile.dispensary_id,
dispensaryName: profile.dispensary_name,
hasProfile: true,
activeProfileId: profile.active_crawler_profile_id,
profile: {
id: profile.id,
profileKey: profile.profile_key,
profileName: profile.profile_name,
platform: profile.platform,
version: profile.version,
status: profile.status || profile.config?.status || 'unknown',
config: profile.config,
enabled: profile.enabled,
sandboxAttemptCount: profile.sandbox_attempt_count,
nextRetryAt: profile.next_retry_at,
createdAt: profile.created_at,
updatedAt: profile.updated_at,
},
});
} catch (error: any) {
console.error('[OrchestratorAdmin] Error fetching profile:', error.message);
res.status(500).json({ error: error.message });
}
});
// ============================================================
// CRAWLER MODULE PREVIEW
// ============================================================
/**
* GET /api/admin/dispensaries/:id/crawler-module
* Returns the raw .ts file content for the per-store crawler
*/
router.get('/dispensaries/:id/crawler-module', async (req: Request, res: Response) => {
try {
const { id } = req.params;
// Get the profile key for this dispensary
const { rows } = await pool.query(`
SELECT profile_key, platform
FROM dispensary_crawler_profiles
WHERE dispensary_id = $1 AND enabled = true
ORDER BY updated_at DESC
LIMIT 1
`, [parseInt(id, 10)]);
if (rows.length === 0 || !rows[0].profile_key) {
return res.status(404).json({
error: 'No per-store crawler module found for this dispensary',
hasModule: false,
});
}
const profileKey = rows[0].profile_key;
const platform = rows[0].platform || 'dutchie';
// Construct file path
const modulePath = path.join(
__dirname,
'..',
'crawlers',
platform,
'stores',
`${profileKey}.ts`
);
// Check if file exists
if (!fs.existsSync(modulePath)) {
return res.status(404).json({
error: `Crawler module file not found: ${profileKey}.ts`,
hasModule: false,
expectedPath: `crawlers/${platform}/stores/${profileKey}.ts`,
});
}
// Read file content
const content = fs.readFileSync(modulePath, 'utf-8');
res.json({
hasModule: true,
profileKey,
platform,
fileName: `${profileKey}.ts`,
filePath: `crawlers/${platform}/stores/${profileKey}.ts`,
content,
lines: content.split('\n').length,
});
} catch (error: any) {
console.error('[OrchestratorAdmin] Error fetching crawler module:', error.message);
res.status(500).json({ error: error.message });
}
});
// ============================================================
// TRACE BY ID
// ============================================================
/**
* GET /api/admin/crawl-traces/:traceId
* Returns a specific trace by ID
*/
router.get('/crawl-traces/:traceId', async (req: Request, res: Response) => {
try {
const { traceId } = req.params;
const trace = await getTraceById(parseInt(traceId, 10));
if (!trace) {
return res.status(404).json({ error: 'Trace not found' });
}
res.json(trace);
} catch (error: any) {
console.error('[OrchestratorAdmin] Error fetching trace:', error.message);
res.status(500).json({ error: error.message });
}
});
export default router;