-- Migration: Create crawl_orchestration_traces table -- Purpose: Store detailed step-by-step traces for every crawl orchestration run -- This enables full visibility into per-store crawler behavior CREATE TABLE IF NOT EXISTS crawl_orchestration_traces ( id SERIAL PRIMARY KEY, dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE, run_id VARCHAR(255), -- UUID or job ID for this crawl run profile_id INTEGER REFERENCES dispensary_crawler_profiles(id) ON DELETE SET NULL, profile_key VARCHAR(255), -- e.g. "trulieve-scottsdale" crawler_module VARCHAR(255), -- Full path to .ts file loaded state_at_start VARCHAR(50), -- sandbox, production, legacy, disabled state_at_end VARCHAR(50), -- sandbox, production, needs_manual, etc. -- The trace: ordered array of step objects trace JSONB NOT NULL DEFAULT '[]'::jsonb, -- Summary metrics for quick querying total_steps INTEGER DEFAULT 0, duration_ms INTEGER, success BOOLEAN, error_message TEXT, products_found INTEGER, -- Timestamps started_at TIMESTAMPTZ DEFAULT NOW(), completed_at TIMESTAMPTZ, created_at TIMESTAMPTZ DEFAULT NOW() ); -- Index for quick lookup by dispensary CREATE INDEX IF NOT EXISTS idx_traces_dispensary_id ON crawl_orchestration_traces(dispensary_id); -- Index for finding latest trace per dispensary CREATE INDEX IF NOT EXISTS idx_traces_dispensary_created ON crawl_orchestration_traces(dispensary_id, created_at DESC); -- Index for finding traces by run_id CREATE INDEX IF NOT EXISTS idx_traces_run_id ON crawl_orchestration_traces(run_id) WHERE run_id IS NOT NULL; -- Index for finding traces by profile CREATE INDEX IF NOT EXISTS idx_traces_profile_id ON crawl_orchestration_traces(profile_id) WHERE profile_id IS NOT NULL; -- Comment explaining trace structure COMMENT ON COLUMN crawl_orchestration_traces.trace IS 'Ordered array of step objects. Each step has: { "step": 1, "action": "load_profile", "description": "Loading crawler profile for dispensary", "timestamp": 1701234567890, "duration_ms": 45, "input": { ... }, "output": { ... }, "what": "Description of what happened", "why": "Reason this step was taken", "where": "Code location / module", "how": "Method or approach used", "when": "ISO timestamp" }'; -- View for easy access to latest traces CREATE OR REPLACE VIEW v_latest_crawl_traces AS SELECT DISTINCT ON (dispensary_id) cot.*, d.name AS dispensary_name, d.city AS dispensary_city FROM crawl_orchestration_traces cot JOIN dispensaries d ON d.id = cot.dispensary_id ORDER BY dispensary_id, cot.created_at DESC;