Major additions: - Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare - Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator - Discovery system: dutchie discovery service, geo validation, city seeding scripts - Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages - Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram) - Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata Frontend pages added: - Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores - StateHeatmap, CrossStateCompare, SyncInfoPanel Components added: - StateSelector, OrchestratorTraceModal, WorkflowStepper 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
99 lines
4.1 KiB
SQL
99 lines
4.1 KiB
SQL
-- Migration 051: Create materialized view for state metrics
|
|
-- Used by Analytics V2 state endpoints for fast aggregated queries
|
|
-- Canonical tables: states, dispensaries, store_products, store_product_snapshots, brands
|
|
|
|
-- Drop existing view if it exists (for clean recreation)
|
|
DROP MATERIALIZED VIEW IF EXISTS mv_state_metrics;
|
|
|
|
-- Create materialized view with comprehensive state metrics
|
|
-- Schema verified via information_schema on 2025-12-06
|
|
-- Real columns used:
|
|
-- states: id, code, name, recreational_legal, medical_legal, rec_year, med_year
|
|
-- dispensaries: id, state_id (NO is_active column)
|
|
-- store_products: id, dispensary_id, brand_id, category_raw, price_rec, price_med, is_in_stock
|
|
-- store_product_snapshots: id, store_product_id, captured_at
|
|
-- brands: id (joined via sp.brand_id)
|
|
|
|
CREATE MATERIALIZED VIEW mv_state_metrics AS
|
|
SELECT
|
|
s.id AS state_id,
|
|
s.code AS state,
|
|
s.name AS state_name,
|
|
COALESCE(s.recreational_legal, FALSE) AS recreational_legal,
|
|
COALESCE(s.medical_legal, FALSE) AS medical_legal,
|
|
s.rec_year,
|
|
s.med_year,
|
|
|
|
-- Dispensary metrics
|
|
COUNT(DISTINCT d.id) AS dispensary_count,
|
|
|
|
-- Product metrics
|
|
COUNT(DISTINCT sp.id) AS total_products,
|
|
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock = TRUE) AS in_stock_products,
|
|
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock = FALSE) AS out_of_stock_products,
|
|
|
|
-- Brand metrics (using brand_id FK, not brand_name)
|
|
COUNT(DISTINCT sp.brand_id) FILTER (WHERE sp.brand_id IS NOT NULL) AS unique_brands,
|
|
|
|
-- Category metrics (using category_raw, not category)
|
|
COUNT(DISTINCT sp.category_raw) FILTER (WHERE sp.category_raw IS NOT NULL) AS unique_categories,
|
|
|
|
-- Pricing metrics (recreational)
|
|
AVG(sp.price_rec) FILTER (WHERE sp.price_rec IS NOT NULL AND sp.is_in_stock = TRUE) AS avg_price_rec,
|
|
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)
|
|
FILTER (WHERE sp.price_rec IS NOT NULL AND sp.is_in_stock = TRUE) AS median_price_rec,
|
|
MIN(sp.price_rec) FILTER (WHERE sp.price_rec IS NOT NULL AND sp.is_in_stock = TRUE) AS min_price_rec,
|
|
MAX(sp.price_rec) FILTER (WHERE sp.price_rec IS NOT NULL AND sp.is_in_stock = TRUE) AS max_price_rec,
|
|
|
|
-- Pricing metrics (medical)
|
|
AVG(sp.price_med) FILTER (WHERE sp.price_med IS NOT NULL AND sp.is_in_stock = TRUE) AS avg_price_med,
|
|
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_med)
|
|
FILTER (WHERE sp.price_med IS NOT NULL AND sp.is_in_stock = TRUE) AS median_price_med,
|
|
|
|
-- Snapshot/crawl metrics
|
|
COUNT(sps.id) AS total_snapshots,
|
|
MAX(sps.captured_at) AS last_crawl_at,
|
|
MIN(sps.captured_at) AS first_crawl_at,
|
|
|
|
-- Data freshness
|
|
CASE
|
|
WHEN MAX(sps.captured_at) > NOW() - INTERVAL '24 hours' THEN 'fresh'
|
|
WHEN MAX(sps.captured_at) > NOW() - INTERVAL '7 days' THEN 'recent'
|
|
WHEN MAX(sps.captured_at) IS NOT NULL THEN 'stale'
|
|
ELSE 'no_data'
|
|
END AS data_freshness,
|
|
|
|
-- Metadata
|
|
NOW() AS refreshed_at
|
|
|
|
FROM states s
|
|
LEFT JOIN dispensaries d ON d.state_id = s.id
|
|
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
|
|
LEFT JOIN store_product_snapshots sps ON sps.store_product_id = sp.id
|
|
GROUP BY s.id, s.code, s.name, s.recreational_legal, s.medical_legal, s.rec_year, s.med_year;
|
|
|
|
-- Create unique index on state code for fast lookups
|
|
CREATE UNIQUE INDEX IF NOT EXISTS mv_state_metrics_state_idx
|
|
ON mv_state_metrics (state);
|
|
|
|
-- Create index on state_id for joins
|
|
CREATE INDEX IF NOT EXISTS mv_state_metrics_state_id_idx
|
|
ON mv_state_metrics (state_id);
|
|
|
|
-- Create index for legal status filtering
|
|
CREATE INDEX IF NOT EXISTS mv_state_metrics_legal_idx
|
|
ON mv_state_metrics (recreational_legal, medical_legal);
|
|
|
|
-- Create index for data freshness queries
|
|
CREATE INDEX IF NOT EXISTS mv_state_metrics_freshness_idx
|
|
ON mv_state_metrics (data_freshness);
|
|
|
|
-- Comment on the view
|
|
COMMENT ON MATERIALIZED VIEW mv_state_metrics IS
|
|
'Aggregated state-level metrics for Analytics V2 endpoints. Refresh periodically with: REFRESH MATERIALIZED VIEW CONCURRENTLY mv_state_metrics;';
|
|
|
|
-- Record migration
|
|
INSERT INTO schema_migrations (version, name, applied_at)
|
|
VALUES ('051', 'create_mv_state_metrics', NOW())
|
|
ON CONFLICT (version) DO NOTHING;
|