Files
cannaiq/backend/migrations/038_profile_status_field.sql
Kelly b4a2fb7d03 feat: Add v2 architecture with multi-state support and orchestrator services
Major additions:
- Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare
- Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator
- Discovery system: dutchie discovery service, geo validation, city seeding scripts
- Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages
- Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram)
- Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata

Frontend pages added:
- Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores
- StateHeatmap, CrossStateCompare, SyncInfoPanel

Components added:
- StateSelector, OrchestratorTraceModal, WorkflowStepper

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-07 11:30:57 -07:00

85 lines
3.0 KiB
SQL

-- Migration: Add status field to dispensary_crawler_profiles
-- This adds a proper status column for crawler state machine
-- Status values: 'production', 'sandbox', 'needs_manual', 'disabled'
-- Add status column with default 'production' for existing profiles
ALTER TABLE dispensary_crawler_profiles
ADD COLUMN IF NOT EXISTS status VARCHAR(50) DEFAULT 'production';
-- Add next_retry_at column for sandbox retry scheduling
ALTER TABLE dispensary_crawler_profiles
ADD COLUMN IF NOT EXISTS next_retry_at TIMESTAMPTZ;
-- Add sandbox_attempt_count for quick lookup
ALTER TABLE dispensary_crawler_profiles
ADD COLUMN IF NOT EXISTS sandbox_attempt_count INTEGER DEFAULT 0;
-- Add last_sandbox_at for tracking
ALTER TABLE dispensary_crawler_profiles
ADD COLUMN IF NOT EXISTS last_sandbox_at TIMESTAMPTZ;
-- Create index for finding profiles by status
CREATE INDEX IF NOT EXISTS idx_crawler_profiles_status
ON dispensary_crawler_profiles(status) WHERE enabled = true;
-- Create index for finding profiles needing retry
CREATE INDEX IF NOT EXISTS idx_crawler_profiles_next_retry
ON dispensary_crawler_profiles(next_retry_at) WHERE enabled = true AND status = 'sandbox';
-- Add comment explaining status values
COMMENT ON COLUMN dispensary_crawler_profiles.status IS
'Crawler status: production (ready for regular crawls), sandbox (discovery mode), needs_manual (max retries exceeded), disabled (turned off)';
-- Update existing profiles to have status based on config if present
UPDATE dispensary_crawler_profiles
SET status = COALESCE(config->>'status', 'production')
WHERE status IS NULL OR status = '';
-- Backfill sandbox_attempt_count from config
UPDATE dispensary_crawler_profiles
SET sandbox_attempt_count = COALESCE(
jsonb_array_length(config->'sandboxAttempts'),
0
)
WHERE config->'sandboxAttempts' IS NOT NULL;
-- Backfill next_retry_at from config
UPDATE dispensary_crawler_profiles
SET next_retry_at = (config->>'nextRetryAt')::timestamptz
WHERE config->>'nextRetryAt' IS NOT NULL;
-- Create view for crawler profile summary
CREATE OR REPLACE VIEW v_crawler_profile_summary AS
SELECT
dcp.id,
dcp.dispensary_id,
d.name AS dispensary_name,
d.city,
d.menu_type,
dcp.profile_name,
dcp.profile_key,
dcp.crawler_type,
dcp.status,
dcp.enabled,
dcp.sandbox_attempt_count,
dcp.next_retry_at,
dcp.last_sandbox_at,
dcp.created_at,
dcp.updated_at,
CASE
WHEN dcp.profile_key IS NOT NULL THEN 'per-store'
ELSE 'legacy'
END AS crawler_mode,
CASE
WHEN dcp.status = 'production' THEN 'Ready'
WHEN dcp.status = 'sandbox' AND dcp.next_retry_at <= NOW() THEN 'Retry Due'
WHEN dcp.status = 'sandbox' THEN 'Waiting'
WHEN dcp.status = 'needs_manual' THEN 'Needs Manual'
WHEN dcp.status = 'disabled' THEN 'Disabled'
ELSE 'Unknown'
END AS status_display
FROM dispensary_crawler_profiles dcp
JOIN dispensaries d ON d.id = dcp.dispensary_id
WHERE dcp.enabled = true
ORDER BY dcp.status, dcp.updated_at DESC;