feat: Add v2 architecture with multi-state support and orchestrator services

Major additions:
- Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare
- Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator
- Discovery system: dutchie discovery service, geo validation, city seeding scripts
- Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages
- Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram)
- Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata

Frontend pages added:
- Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores
- StateHeatmap, CrossStateCompare, SyncInfoPanel

Components added:
- StateSelector, OrchestratorTraceModal, WorkflowStepper

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-07 11:30:57 -07:00
parent 8ac64ba077
commit b4a2fb7d03
248 changed files with 60714 additions and 666 deletions

View File

@@ -0,0 +1,73 @@
-- Migration: Create crawl_orchestration_traces table
-- Purpose: Store detailed step-by-step traces for every crawl orchestration run
-- This enables full visibility into per-store crawler behavior
CREATE TABLE IF NOT EXISTS crawl_orchestration_traces (
id SERIAL PRIMARY KEY,
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
run_id VARCHAR(255), -- UUID or job ID for this crawl run
profile_id INTEGER REFERENCES dispensary_crawler_profiles(id) ON DELETE SET NULL,
profile_key VARCHAR(255), -- e.g. "trulieve-scottsdale"
crawler_module VARCHAR(255), -- Full path to .ts file loaded
state_at_start VARCHAR(50), -- sandbox, production, legacy, disabled
state_at_end VARCHAR(50), -- sandbox, production, needs_manual, etc.
-- The trace: ordered array of step objects
trace JSONB NOT NULL DEFAULT '[]'::jsonb,
-- Summary metrics for quick querying
total_steps INTEGER DEFAULT 0,
duration_ms INTEGER,
success BOOLEAN,
error_message TEXT,
products_found INTEGER,
-- Timestamps
started_at TIMESTAMPTZ DEFAULT NOW(),
completed_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Index for quick lookup by dispensary
CREATE INDEX IF NOT EXISTS idx_traces_dispensary_id
ON crawl_orchestration_traces(dispensary_id);
-- Index for finding latest trace per dispensary
CREATE INDEX IF NOT EXISTS idx_traces_dispensary_created
ON crawl_orchestration_traces(dispensary_id, created_at DESC);
-- Index for finding traces by run_id
CREATE INDEX IF NOT EXISTS idx_traces_run_id
ON crawl_orchestration_traces(run_id) WHERE run_id IS NOT NULL;
-- Index for finding traces by profile
CREATE INDEX IF NOT EXISTS idx_traces_profile_id
ON crawl_orchestration_traces(profile_id) WHERE profile_id IS NOT NULL;
-- Comment explaining trace structure
COMMENT ON COLUMN crawl_orchestration_traces.trace IS
'Ordered array of step objects. Each step has:
{
"step": 1,
"action": "load_profile",
"description": "Loading crawler profile for dispensary",
"timestamp": 1701234567890,
"duration_ms": 45,
"input": { ... },
"output": { ... },
"what": "Description of what happened",
"why": "Reason this step was taken",
"where": "Code location / module",
"how": "Method or approach used",
"when": "ISO timestamp"
}';
-- View for easy access to latest traces
CREATE OR REPLACE VIEW v_latest_crawl_traces AS
SELECT DISTINCT ON (dispensary_id)
cot.*,
d.name AS dispensary_name,
d.city AS dispensary_city
FROM crawl_orchestration_traces cot
JOIN dispensaries d ON d.id = cot.dispensary_id
ORDER BY dispensary_id, cot.created_at DESC;