Files
cannaiq/backend/migrations/052_hydration_schema_alignment.sql
Kelly b4a2fb7d03 feat: Add v2 architecture with multi-state support and orchestrator services
Major additions:
- Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare
- Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator
- Discovery system: dutchie discovery service, geo validation, city seeding scripts
- Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages
- Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram)
- Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata

Frontend pages added:
- Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores
- StateHeatmap, CrossStateCompare, SyncInfoPanel

Components added:
- StateSelector, OrchestratorTraceModal, WorkflowStepper

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-07 11:30:57 -07:00

250 lines
10 KiB
SQL

-- ============================================================================
-- Migration 052: Hydration Schema Alignment
-- ============================================================================
--
-- Purpose: Add columns to canonical tables needed for hydration from
-- dutchie_products and dutchie_product_snapshots.
--
-- This migration ensures store_products and store_product_snapshots can
-- receive all data from the legacy dutchie_* tables.
--
-- SAFETY RULES:
-- - ALL columns use ADD COLUMN IF NOT EXISTS
-- - NO DROP, DELETE, TRUNCATE, or destructive operations
-- - Fully idempotent - safe to run multiple times
--
-- Run with:
-- psql "postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \
-- -f migrations/052_hydration_schema_alignment.sql
--
-- ============================================================================
-- ============================================================================
-- SECTION 1: store_products - Additional columns from dutchie_products
-- ============================================================================
-- Brand ID from Dutchie GraphQL (brandId field)
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS provider_brand_id VARCHAR(100);
-- Legacy dutchie_products.id for cross-reference during migration
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS legacy_dutchie_product_id INTEGER;
-- THC/CBD content as text (from dutchie_products.thc_content/cbd_content)
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS thc_content_text VARCHAR(50);
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cbd_content_text VARCHAR(50);
-- Full cannabinoid data
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cannabinoids JSONB;
-- Effects array
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS effects TEXT[];
-- Type (Flower, Edible, etc.) - maps to category in legacy
-- Already have category VARCHAR(100), but type may differ
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS product_type VARCHAR(100);
-- Additional images array
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS additional_images TEXT[];
-- Local image paths (from 032 migration)
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS local_image_url TEXT;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS local_image_thumb_url TEXT;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS local_image_medium_url TEXT;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS original_image_url TEXT;
-- Status from Dutchie (Active/Inactive)
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS platform_status VARCHAR(20);
-- Threshold flags
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS is_below_threshold BOOLEAN DEFAULT FALSE;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS is_below_kiosk_threshold BOOLEAN DEFAULT FALSE;
-- cName / slug from Dutchie
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS c_name VARCHAR(255);
-- Coming soon flag
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS is_coming_soon BOOLEAN DEFAULT FALSE;
-- Provider column already exists, ensure we have provider_dispensary_id
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS provider_dispensary_id VARCHAR(100);
-- Enterprise product ID (cross-store product linking)
-- Already exists from migration 051
-- Total quantity available (from POSMetaData.children)
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS total_quantity_available INTEGER;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS total_kiosk_quantity_available INTEGER;
-- Weight
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS weight VARCHAR(50);
-- Options array (size/weight options)
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS options TEXT[];
-- Measurements
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS measurements JSONB;
-- Raw data from last crawl
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS raw_data JSONB;
-- Source timestamps from Dutchie
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS source_created_at TIMESTAMPTZ;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS source_updated_at TIMESTAMPTZ;
-- ============================================================================
-- SECTION 2: store_product_snapshots - Additional columns for hydration
-- ============================================================================
-- Legacy dutchie_product_snapshot.id for cross-reference
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS legacy_snapshot_id INTEGER;
-- Legacy dutchie_product_id reference
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS legacy_dutchie_product_id INTEGER;
-- Options JSONB from dutchie_product_snapshots
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS options JSONB;
-- Provider dispensary ID
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS provider_dispensary_id VARCHAR(100);
-- Inventory details
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS total_quantity_available INTEGER;
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS total_kiosk_quantity_available INTEGER;
-- Platform status at time of snapshot
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS platform_status VARCHAR(20);
-- Threshold flags at time of snapshot
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS is_below_threshold BOOLEAN DEFAULT FALSE;
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS is_below_kiosk_threshold BOOLEAN DEFAULT FALSE;
-- Special data
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS special_data JSONB;
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS special_name TEXT;
-- Pricing mode (rec/med)
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS pricing_type VARCHAR(10);
-- Crawl mode (mode_a/mode_b)
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS crawl_mode VARCHAR(20);
-- ============================================================================
-- SECTION 3: crawl_runs - Additional columns for hydration
-- ============================================================================
-- Legacy job ID references
ALTER TABLE crawl_runs ADD COLUMN IF NOT EXISTS legacy_dispensary_crawl_job_id INTEGER;
ALTER TABLE crawl_runs ADD COLUMN IF NOT EXISTS legacy_job_run_log_id INTEGER;
-- Schedule reference
ALTER TABLE crawl_runs ADD COLUMN IF NOT EXISTS schedule_id INTEGER;
-- Job type
ALTER TABLE crawl_runs ADD COLUMN IF NOT EXISTS job_type VARCHAR(50);
-- Brands found count
ALTER TABLE crawl_runs ADD COLUMN IF NOT EXISTS brands_found INTEGER DEFAULT 0;
-- Retry count
ALTER TABLE crawl_runs ADD COLUMN IF NOT EXISTS retry_count INTEGER DEFAULT 0;
-- ============================================================================
-- SECTION 4: INDEXES for hydration queries
-- ============================================================================
-- Index on legacy IDs for migration lookups
CREATE INDEX IF NOT EXISTS idx_store_products_legacy_id
ON store_products(legacy_dutchie_product_id)
WHERE legacy_dutchie_product_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_snapshots_legacy_id
ON store_product_snapshots(legacy_snapshot_id)
WHERE legacy_snapshot_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_snapshots_legacy_product_id
ON store_product_snapshots(legacy_dutchie_product_id)
WHERE legacy_dutchie_product_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_crawl_runs_legacy_job_id
ON crawl_runs(legacy_dispensary_crawl_job_id)
WHERE legacy_dispensary_crawl_job_id IS NOT NULL;
-- Index on provider_product_id for upserts
CREATE INDEX IF NOT EXISTS idx_store_products_provider_id
ON store_products(provider_product_id);
-- Composite index for canonical key lookup
CREATE INDEX IF NOT EXISTS idx_store_products_canonical_key
ON store_products(dispensary_id, provider, provider_product_id);
-- ============================================================================
-- SECTION 5: Unique constraint for idempotent hydration
-- ============================================================================
-- Ensure unique snapshots per product per crawl
-- This prevents duplicate snapshots during re-runs
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_constraint
WHERE conname = 'store_product_snapshots_unique_per_crawl'
) THEN
-- Can't add unique constraint on nullable columns directly,
-- so we use a partial unique index instead
CREATE UNIQUE INDEX IF NOT EXISTS idx_snapshots_unique_per_crawl
ON store_product_snapshots(store_product_id, crawl_run_id)
WHERE store_product_id IS NOT NULL AND crawl_run_id IS NOT NULL;
END IF;
EXCEPTION
WHEN duplicate_object THEN NULL;
WHEN OTHERS THEN NULL;
END $$;
-- ============================================================================
-- SECTION 6: View for hydration status monitoring
-- ============================================================================
CREATE OR REPLACE VIEW v_hydration_status AS
SELECT
'dutchie_products' AS source_table,
(SELECT COUNT(*) FROM dutchie_products) AS source_count,
(SELECT COUNT(*) FROM store_products WHERE legacy_dutchie_product_id IS NOT NULL) AS hydrated_count,
ROUND(
100.0 * (SELECT COUNT(*) FROM store_products WHERE legacy_dutchie_product_id IS NOT NULL) /
NULLIF((SELECT COUNT(*) FROM dutchie_products), 0),
2
) AS hydration_pct
UNION ALL
SELECT
'dutchie_product_snapshots' AS source_table,
(SELECT COUNT(*) FROM dutchie_product_snapshots) AS source_count,
(SELECT COUNT(*) FROM store_product_snapshots WHERE legacy_snapshot_id IS NOT NULL) AS hydrated_count,
ROUND(
100.0 * (SELECT COUNT(*) FROM store_product_snapshots WHERE legacy_snapshot_id IS NOT NULL) /
NULLIF((SELECT COUNT(*) FROM dutchie_product_snapshots), 0),
2
) AS hydration_pct
UNION ALL
SELECT
'dispensary_crawl_jobs' AS source_table,
(SELECT COUNT(*) FROM dispensary_crawl_jobs WHERE status = 'completed') AS source_count,
(SELECT COUNT(*) FROM crawl_runs WHERE legacy_dispensary_crawl_job_id IS NOT NULL) AS hydrated_count,
ROUND(
100.0 * (SELECT COUNT(*) FROM crawl_runs WHERE legacy_dispensary_crawl_job_id IS NOT NULL) /
NULLIF((SELECT COUNT(*) FROM dispensary_crawl_jobs WHERE status = 'completed'), 0),
2
) AS hydration_pct;
-- ============================================================================
-- DONE
-- ============================================================================
SELECT 'Migration 052 completed successfully. Hydration schema aligned.' AS status;