feat: Add v2 architecture with multi-state support and orchestrator services
Major additions: - Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare - Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator - Discovery system: dutchie discovery service, geo validation, city seeding scripts - Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages - Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram) - Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata Frontend pages added: - Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores - StateHeatmap, CrossStateCompare, SyncInfoPanel Components added: - StateSelector, OrchestratorTraceModal, WorkflowStepper 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
84
backend/migrations/038_profile_status_field.sql
Normal file
84
backend/migrations/038_profile_status_field.sql
Normal file
@@ -0,0 +1,84 @@
|
||||
-- Migration: Add status field to dispensary_crawler_profiles
|
||||
-- This adds a proper status column for crawler state machine
|
||||
-- Status values: 'production', 'sandbox', 'needs_manual', 'disabled'
|
||||
|
||||
-- Add status column with default 'production' for existing profiles
|
||||
ALTER TABLE dispensary_crawler_profiles
|
||||
ADD COLUMN IF NOT EXISTS status VARCHAR(50) DEFAULT 'production';
|
||||
|
||||
-- Add next_retry_at column for sandbox retry scheduling
|
||||
ALTER TABLE dispensary_crawler_profiles
|
||||
ADD COLUMN IF NOT EXISTS next_retry_at TIMESTAMPTZ;
|
||||
|
||||
-- Add sandbox_attempt_count for quick lookup
|
||||
ALTER TABLE dispensary_crawler_profiles
|
||||
ADD COLUMN IF NOT EXISTS sandbox_attempt_count INTEGER DEFAULT 0;
|
||||
|
||||
-- Add last_sandbox_at for tracking
|
||||
ALTER TABLE dispensary_crawler_profiles
|
||||
ADD COLUMN IF NOT EXISTS last_sandbox_at TIMESTAMPTZ;
|
||||
|
||||
-- Create index for finding profiles by status
|
||||
CREATE INDEX IF NOT EXISTS idx_crawler_profiles_status
|
||||
ON dispensary_crawler_profiles(status) WHERE enabled = true;
|
||||
|
||||
-- Create index for finding profiles needing retry
|
||||
CREATE INDEX IF NOT EXISTS idx_crawler_profiles_next_retry
|
||||
ON dispensary_crawler_profiles(next_retry_at) WHERE enabled = true AND status = 'sandbox';
|
||||
|
||||
-- Add comment explaining status values
|
||||
COMMENT ON COLUMN dispensary_crawler_profiles.status IS
|
||||
'Crawler status: production (ready for regular crawls), sandbox (discovery mode), needs_manual (max retries exceeded), disabled (turned off)';
|
||||
|
||||
-- Update existing profiles to have status based on config if present
|
||||
UPDATE dispensary_crawler_profiles
|
||||
SET status = COALESCE(config->>'status', 'production')
|
||||
WHERE status IS NULL OR status = '';
|
||||
|
||||
-- Backfill sandbox_attempt_count from config
|
||||
UPDATE dispensary_crawler_profiles
|
||||
SET sandbox_attempt_count = COALESCE(
|
||||
jsonb_array_length(config->'sandboxAttempts'),
|
||||
0
|
||||
)
|
||||
WHERE config->'sandboxAttempts' IS NOT NULL;
|
||||
|
||||
-- Backfill next_retry_at from config
|
||||
UPDATE dispensary_crawler_profiles
|
||||
SET next_retry_at = (config->>'nextRetryAt')::timestamptz
|
||||
WHERE config->>'nextRetryAt' IS NOT NULL;
|
||||
|
||||
-- Create view for crawler profile summary
|
||||
CREATE OR REPLACE VIEW v_crawler_profile_summary AS
|
||||
SELECT
|
||||
dcp.id,
|
||||
dcp.dispensary_id,
|
||||
d.name AS dispensary_name,
|
||||
d.city,
|
||||
d.menu_type,
|
||||
dcp.profile_name,
|
||||
dcp.profile_key,
|
||||
dcp.crawler_type,
|
||||
dcp.status,
|
||||
dcp.enabled,
|
||||
dcp.sandbox_attempt_count,
|
||||
dcp.next_retry_at,
|
||||
dcp.last_sandbox_at,
|
||||
dcp.created_at,
|
||||
dcp.updated_at,
|
||||
CASE
|
||||
WHEN dcp.profile_key IS NOT NULL THEN 'per-store'
|
||||
ELSE 'legacy'
|
||||
END AS crawler_mode,
|
||||
CASE
|
||||
WHEN dcp.status = 'production' THEN 'Ready'
|
||||
WHEN dcp.status = 'sandbox' AND dcp.next_retry_at <= NOW() THEN 'Retry Due'
|
||||
WHEN dcp.status = 'sandbox' THEN 'Waiting'
|
||||
WHEN dcp.status = 'needs_manual' THEN 'Needs Manual'
|
||||
WHEN dcp.status = 'disabled' THEN 'Disabled'
|
||||
ELSE 'Unknown'
|
||||
END AS status_display
|
||||
FROM dispensary_crawler_profiles dcp
|
||||
JOIN dispensaries d ON d.id = dcp.dispensary_id
|
||||
WHERE dcp.enabled = true
|
||||
ORDER BY dcp.status, dcp.updated_at DESC;
|
||||
Reference in New Issue
Block a user