Major additions: - Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare - Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator - Discovery system: dutchie discovery service, geo validation, city seeding scripts - Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages - Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram) - Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata Frontend pages added: - Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores - StateHeatmap, CrossStateCompare, SyncInfoPanel Components added: - StateSelector, OrchestratorTraceModal, WorkflowStepper 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
85 lines
3.0 KiB
SQL
85 lines
3.0 KiB
SQL
-- Migration: Add status field to dispensary_crawler_profiles
|
|
-- This adds a proper status column for crawler state machine
|
|
-- Status values: 'production', 'sandbox', 'needs_manual', 'disabled'
|
|
|
|
-- Add status column with default 'production' for existing profiles
|
|
ALTER TABLE dispensary_crawler_profiles
|
|
ADD COLUMN IF NOT EXISTS status VARCHAR(50) DEFAULT 'production';
|
|
|
|
-- Add next_retry_at column for sandbox retry scheduling
|
|
ALTER TABLE dispensary_crawler_profiles
|
|
ADD COLUMN IF NOT EXISTS next_retry_at TIMESTAMPTZ;
|
|
|
|
-- Add sandbox_attempt_count for quick lookup
|
|
ALTER TABLE dispensary_crawler_profiles
|
|
ADD COLUMN IF NOT EXISTS sandbox_attempt_count INTEGER DEFAULT 0;
|
|
|
|
-- Add last_sandbox_at for tracking
|
|
ALTER TABLE dispensary_crawler_profiles
|
|
ADD COLUMN IF NOT EXISTS last_sandbox_at TIMESTAMPTZ;
|
|
|
|
-- Create index for finding profiles by status
|
|
CREATE INDEX IF NOT EXISTS idx_crawler_profiles_status
|
|
ON dispensary_crawler_profiles(status) WHERE enabled = true;
|
|
|
|
-- Create index for finding profiles needing retry
|
|
CREATE INDEX IF NOT EXISTS idx_crawler_profiles_next_retry
|
|
ON dispensary_crawler_profiles(next_retry_at) WHERE enabled = true AND status = 'sandbox';
|
|
|
|
-- Add comment explaining status values
|
|
COMMENT ON COLUMN dispensary_crawler_profiles.status IS
|
|
'Crawler status: production (ready for regular crawls), sandbox (discovery mode), needs_manual (max retries exceeded), disabled (turned off)';
|
|
|
|
-- Update existing profiles to have status based on config if present
|
|
UPDATE dispensary_crawler_profiles
|
|
SET status = COALESCE(config->>'status', 'production')
|
|
WHERE status IS NULL OR status = '';
|
|
|
|
-- Backfill sandbox_attempt_count from config
|
|
UPDATE dispensary_crawler_profiles
|
|
SET sandbox_attempt_count = COALESCE(
|
|
jsonb_array_length(config->'sandboxAttempts'),
|
|
0
|
|
)
|
|
WHERE config->'sandboxAttempts' IS NOT NULL;
|
|
|
|
-- Backfill next_retry_at from config
|
|
UPDATE dispensary_crawler_profiles
|
|
SET next_retry_at = (config->>'nextRetryAt')::timestamptz
|
|
WHERE config->>'nextRetryAt' IS NOT NULL;
|
|
|
|
-- Create view for crawler profile summary
|
|
CREATE OR REPLACE VIEW v_crawler_profile_summary AS
|
|
SELECT
|
|
dcp.id,
|
|
dcp.dispensary_id,
|
|
d.name AS dispensary_name,
|
|
d.city,
|
|
d.menu_type,
|
|
dcp.profile_name,
|
|
dcp.profile_key,
|
|
dcp.crawler_type,
|
|
dcp.status,
|
|
dcp.enabled,
|
|
dcp.sandbox_attempt_count,
|
|
dcp.next_retry_at,
|
|
dcp.last_sandbox_at,
|
|
dcp.created_at,
|
|
dcp.updated_at,
|
|
CASE
|
|
WHEN dcp.profile_key IS NOT NULL THEN 'per-store'
|
|
ELSE 'legacy'
|
|
END AS crawler_mode,
|
|
CASE
|
|
WHEN dcp.status = 'production' THEN 'Ready'
|
|
WHEN dcp.status = 'sandbox' AND dcp.next_retry_at <= NOW() THEN 'Retry Due'
|
|
WHEN dcp.status = 'sandbox' THEN 'Waiting'
|
|
WHEN dcp.status = 'needs_manual' THEN 'Needs Manual'
|
|
WHEN dcp.status = 'disabled' THEN 'Disabled'
|
|
ELSE 'Unknown'
|
|
END AS status_display
|
|
FROM dispensary_crawler_profiles dcp
|
|
JOIN dispensaries d ON d.id = dcp.dispensary_id
|
|
WHERE dcp.enabled = true
|
|
ORDER BY dcp.status, dcp.updated_at DESC;
|