Files
cannaiq/backend/migrations/051_create_mv_state_metrics.sql
Kelly b4a2fb7d03 feat: Add v2 architecture with multi-state support and orchestrator services
Major additions:
- Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare
- Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator
- Discovery system: dutchie discovery service, geo validation, city seeding scripts
- Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages
- Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram)
- Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata

Frontend pages added:
- Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores
- StateHeatmap, CrossStateCompare, SyncInfoPanel

Components added:
- StateSelector, OrchestratorTraceModal, WorkflowStepper

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-07 11:30:57 -07:00

99 lines
4.1 KiB
SQL

-- Migration 051: Create materialized view for state metrics
-- Used by Analytics V2 state endpoints for fast aggregated queries
-- Canonical tables: states, dispensaries, store_products, store_product_snapshots, brands
-- Drop existing view if it exists (for clean recreation)
DROP MATERIALIZED VIEW IF EXISTS mv_state_metrics;
-- Create materialized view with comprehensive state metrics
-- Schema verified via information_schema on 2025-12-06
-- Real columns used:
-- states: id, code, name, recreational_legal, medical_legal, rec_year, med_year
-- dispensaries: id, state_id (NO is_active column)
-- store_products: id, dispensary_id, brand_id, category_raw, price_rec, price_med, is_in_stock
-- store_product_snapshots: id, store_product_id, captured_at
-- brands: id (joined via sp.brand_id)
CREATE MATERIALIZED VIEW mv_state_metrics AS
SELECT
s.id AS state_id,
s.code AS state,
s.name AS state_name,
COALESCE(s.recreational_legal, FALSE) AS recreational_legal,
COALESCE(s.medical_legal, FALSE) AS medical_legal,
s.rec_year,
s.med_year,
-- Dispensary metrics
COUNT(DISTINCT d.id) AS dispensary_count,
-- Product metrics
COUNT(DISTINCT sp.id) AS total_products,
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock = TRUE) AS in_stock_products,
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock = FALSE) AS out_of_stock_products,
-- Brand metrics (using brand_id FK, not brand_name)
COUNT(DISTINCT sp.brand_id) FILTER (WHERE sp.brand_id IS NOT NULL) AS unique_brands,
-- Category metrics (using category_raw, not category)
COUNT(DISTINCT sp.category_raw) FILTER (WHERE sp.category_raw IS NOT NULL) AS unique_categories,
-- Pricing metrics (recreational)
AVG(sp.price_rec) FILTER (WHERE sp.price_rec IS NOT NULL AND sp.is_in_stock = TRUE) AS avg_price_rec,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)
FILTER (WHERE sp.price_rec IS NOT NULL AND sp.is_in_stock = TRUE) AS median_price_rec,
MIN(sp.price_rec) FILTER (WHERE sp.price_rec IS NOT NULL AND sp.is_in_stock = TRUE) AS min_price_rec,
MAX(sp.price_rec) FILTER (WHERE sp.price_rec IS NOT NULL AND sp.is_in_stock = TRUE) AS max_price_rec,
-- Pricing metrics (medical)
AVG(sp.price_med) FILTER (WHERE sp.price_med IS NOT NULL AND sp.is_in_stock = TRUE) AS avg_price_med,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_med)
FILTER (WHERE sp.price_med IS NOT NULL AND sp.is_in_stock = TRUE) AS median_price_med,
-- Snapshot/crawl metrics
COUNT(sps.id) AS total_snapshots,
MAX(sps.captured_at) AS last_crawl_at,
MIN(sps.captured_at) AS first_crawl_at,
-- Data freshness
CASE
WHEN MAX(sps.captured_at) > NOW() - INTERVAL '24 hours' THEN 'fresh'
WHEN MAX(sps.captured_at) > NOW() - INTERVAL '7 days' THEN 'recent'
WHEN MAX(sps.captured_at) IS NOT NULL THEN 'stale'
ELSE 'no_data'
END AS data_freshness,
-- Metadata
NOW() AS refreshed_at
FROM states s
LEFT JOIN dispensaries d ON d.state_id = s.id
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
LEFT JOIN store_product_snapshots sps ON sps.store_product_id = sp.id
GROUP BY s.id, s.code, s.name, s.recreational_legal, s.medical_legal, s.rec_year, s.med_year;
-- Create unique index on state code for fast lookups
CREATE UNIQUE INDEX IF NOT EXISTS mv_state_metrics_state_idx
ON mv_state_metrics (state);
-- Create index on state_id for joins
CREATE INDEX IF NOT EXISTS mv_state_metrics_state_id_idx
ON mv_state_metrics (state_id);
-- Create index for legal status filtering
CREATE INDEX IF NOT EXISTS mv_state_metrics_legal_idx
ON mv_state_metrics (recreational_legal, medical_legal);
-- Create index for data freshness queries
CREATE INDEX IF NOT EXISTS mv_state_metrics_freshness_idx
ON mv_state_metrics (data_freshness);
-- Comment on the view
COMMENT ON MATERIALIZED VIEW mv_state_metrics IS
'Aggregated state-level metrics for Analytics V2 endpoints. Refresh periodically with: REFRESH MATERIALIZED VIEW CONCURRENTLY mv_state_metrics;';
-- Record migration
INSERT INTO schema_migrations (version, name, applied_at)
VALUES ('051', 'create_mv_state_metrics', NOW())
ON CONFLICT (version) DO NOTHING;