-- Migration: Add status field to dispensary_crawler_profiles -- This adds a proper status column for crawler state machine -- Status values: 'production', 'sandbox', 'needs_manual', 'disabled' -- Add status column with default 'production' for existing profiles ALTER TABLE dispensary_crawler_profiles ADD COLUMN IF NOT EXISTS status VARCHAR(50) DEFAULT 'production'; -- Add next_retry_at column for sandbox retry scheduling ALTER TABLE dispensary_crawler_profiles ADD COLUMN IF NOT EXISTS next_retry_at TIMESTAMPTZ; -- Add sandbox_attempt_count for quick lookup ALTER TABLE dispensary_crawler_profiles ADD COLUMN IF NOT EXISTS sandbox_attempt_count INTEGER DEFAULT 0; -- Add last_sandbox_at for tracking ALTER TABLE dispensary_crawler_profiles ADD COLUMN IF NOT EXISTS last_sandbox_at TIMESTAMPTZ; -- Create index for finding profiles by status CREATE INDEX IF NOT EXISTS idx_crawler_profiles_status ON dispensary_crawler_profiles(status) WHERE enabled = true; -- Create index for finding profiles needing retry CREATE INDEX IF NOT EXISTS idx_crawler_profiles_next_retry ON dispensary_crawler_profiles(next_retry_at) WHERE enabled = true AND status = 'sandbox'; -- Add comment explaining status values COMMENT ON COLUMN dispensary_crawler_profiles.status IS 'Crawler status: production (ready for regular crawls), sandbox (discovery mode), needs_manual (max retries exceeded), disabled (turned off)'; -- Update existing profiles to have status based on config if present UPDATE dispensary_crawler_profiles SET status = COALESCE(config->>'status', 'production') WHERE status IS NULL OR status = ''; -- Backfill sandbox_attempt_count from config UPDATE dispensary_crawler_profiles SET sandbox_attempt_count = COALESCE( jsonb_array_length(config->'sandboxAttempts'), 0 ) WHERE config->'sandboxAttempts' IS NOT NULL; -- Backfill next_retry_at from config UPDATE dispensary_crawler_profiles SET next_retry_at = (config->>'nextRetryAt')::timestamptz WHERE config->>'nextRetryAt' IS NOT NULL; -- Create view for crawler profile summary CREATE OR REPLACE VIEW v_crawler_profile_summary AS SELECT dcp.id, dcp.dispensary_id, d.name AS dispensary_name, d.city, d.menu_type, dcp.profile_name, dcp.profile_key, dcp.crawler_type, dcp.status, dcp.enabled, dcp.sandbox_attempt_count, dcp.next_retry_at, dcp.last_sandbox_at, dcp.created_at, dcp.updated_at, CASE WHEN dcp.profile_key IS NOT NULL THEN 'per-store' ELSE 'legacy' END AS crawler_mode, CASE WHEN dcp.status = 'production' THEN 'Ready' WHEN dcp.status = 'sandbox' AND dcp.next_retry_at <= NOW() THEN 'Retry Due' WHEN dcp.status = 'sandbox' THEN 'Waiting' WHEN dcp.status = 'needs_manual' THEN 'Needs Manual' WHEN dcp.status = 'disabled' THEN 'Disabled' ELSE 'Unknown' END AS status_display FROM dispensary_crawler_profiles dcp JOIN dispensaries d ON d.id = dcp.dispensary_id WHERE dcp.enabled = true ORDER BY dcp.status, dcp.updated_at DESC;