Major additions: - Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare - Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator - Discovery system: dutchie discovery service, geo validation, city seeding scripts - Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages - Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram) - Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata Frontend pages added: - Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores - StateHeatmap, CrossStateCompare, SyncInfoPanel Components added: - StateSelector, OrchestratorTraceModal, WorkflowStepper 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
74 lines
2.7 KiB
SQL
74 lines
2.7 KiB
SQL
-- Migration: Create crawl_orchestration_traces table
|
|
-- Purpose: Store detailed step-by-step traces for every crawl orchestration run
|
|
-- This enables full visibility into per-store crawler behavior
|
|
|
|
CREATE TABLE IF NOT EXISTS crawl_orchestration_traces (
|
|
id SERIAL PRIMARY KEY,
|
|
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
|
run_id VARCHAR(255), -- UUID or job ID for this crawl run
|
|
profile_id INTEGER REFERENCES dispensary_crawler_profiles(id) ON DELETE SET NULL,
|
|
profile_key VARCHAR(255), -- e.g. "trulieve-scottsdale"
|
|
crawler_module VARCHAR(255), -- Full path to .ts file loaded
|
|
state_at_start VARCHAR(50), -- sandbox, production, legacy, disabled
|
|
state_at_end VARCHAR(50), -- sandbox, production, needs_manual, etc.
|
|
|
|
-- The trace: ordered array of step objects
|
|
trace JSONB NOT NULL DEFAULT '[]'::jsonb,
|
|
|
|
-- Summary metrics for quick querying
|
|
total_steps INTEGER DEFAULT 0,
|
|
duration_ms INTEGER,
|
|
success BOOLEAN,
|
|
error_message TEXT,
|
|
products_found INTEGER,
|
|
|
|
-- Timestamps
|
|
started_at TIMESTAMPTZ DEFAULT NOW(),
|
|
completed_at TIMESTAMPTZ,
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- Index for quick lookup by dispensary
|
|
CREATE INDEX IF NOT EXISTS idx_traces_dispensary_id
|
|
ON crawl_orchestration_traces(dispensary_id);
|
|
|
|
-- Index for finding latest trace per dispensary
|
|
CREATE INDEX IF NOT EXISTS idx_traces_dispensary_created
|
|
ON crawl_orchestration_traces(dispensary_id, created_at DESC);
|
|
|
|
-- Index for finding traces by run_id
|
|
CREATE INDEX IF NOT EXISTS idx_traces_run_id
|
|
ON crawl_orchestration_traces(run_id) WHERE run_id IS NOT NULL;
|
|
|
|
-- Index for finding traces by profile
|
|
CREATE INDEX IF NOT EXISTS idx_traces_profile_id
|
|
ON crawl_orchestration_traces(profile_id) WHERE profile_id IS NOT NULL;
|
|
|
|
-- Comment explaining trace structure
|
|
COMMENT ON COLUMN crawl_orchestration_traces.trace IS
|
|
'Ordered array of step objects. Each step has:
|
|
{
|
|
"step": 1,
|
|
"action": "load_profile",
|
|
"description": "Loading crawler profile for dispensary",
|
|
"timestamp": 1701234567890,
|
|
"duration_ms": 45,
|
|
"input": { ... },
|
|
"output": { ... },
|
|
"what": "Description of what happened",
|
|
"why": "Reason this step was taken",
|
|
"where": "Code location / module",
|
|
"how": "Method or approach used",
|
|
"when": "ISO timestamp"
|
|
}';
|
|
|
|
-- View for easy access to latest traces
|
|
CREATE OR REPLACE VIEW v_latest_crawl_traces AS
|
|
SELECT DISTINCT ON (dispensary_id)
|
|
cot.*,
|
|
d.name AS dispensary_name,
|
|
d.city AS dispensary_city
|
|
FROM crawl_orchestration_traces cot
|
|
JOIN dispensaries d ON d.id = cot.dispensary_id
|
|
ORDER BY dispensary_id, cot.created_at DESC;
|