Files
cannaiq/backend/migrations/047_analytics_infrastructure.sql
Kelly b4a2fb7d03 feat: Add v2 architecture with multi-state support and orchestrator services
Major additions:
- Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare
- Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator
- Discovery system: dutchie discovery service, geo validation, city seeding scripts
- Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages
- Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram)
- Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata

Frontend pages added:
- Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores
- StateHeatmap, CrossStateCompare, SyncInfoPanel

Components added:
- StateSelector, OrchestratorTraceModal, WorkflowStepper

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-07 11:30:57 -07:00

474 lines
17 KiB
PL/PgSQL

-- Migration 047: Analytics Infrastructure
-- Phase 3: Analytics Dashboards for CannaiQ
-- Creates views, functions, and tables for price trends, brand penetration, category growth, etc.
-- ============================================================
-- ANALYTICS CACHE TABLE (for expensive query results)
-- ============================================================
CREATE TABLE IF NOT EXISTS analytics_cache (
id SERIAL PRIMARY KEY,
cache_key VARCHAR(255) NOT NULL UNIQUE,
cache_data JSONB NOT NULL,
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
expires_at TIMESTAMPTZ NOT NULL,
query_time_ms INTEGER,
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_analytics_cache_key ON analytics_cache(cache_key);
CREATE INDEX IF NOT EXISTS idx_analytics_cache_expires ON analytics_cache(expires_at);
-- ============================================================
-- PRICE EXTRACTION HELPER FUNCTION
-- Extracts pricing from JSONB latest_raw_payload
-- ============================================================
CREATE OR REPLACE FUNCTION extract_min_price(payload JSONB)
RETURNS NUMERIC AS $$
DECLARE
prices JSONB;
min_val NUMERIC;
BEGIN
-- Try recPrices first (retail prices)
prices := payload->'recPrices';
IF prices IS NOT NULL AND jsonb_array_length(prices) > 0 THEN
SELECT MIN(value::NUMERIC) INTO min_val FROM jsonb_array_elements_text(prices) AS value WHERE value ~ '^[0-9.]+$';
IF min_val IS NOT NULL THEN RETURN min_val; END IF;
END IF;
-- Try Prices array
prices := payload->'Prices';
IF prices IS NOT NULL AND jsonb_array_length(prices) > 0 THEN
SELECT MIN(value::NUMERIC) INTO min_val FROM jsonb_array_elements_text(prices) AS value WHERE value ~ '^[0-9.]+$';
IF min_val IS NOT NULL THEN RETURN min_val; END IF;
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION extract_max_price(payload JSONB)
RETURNS NUMERIC AS $$
DECLARE
prices JSONB;
max_val NUMERIC;
BEGIN
prices := payload->'recPrices';
IF prices IS NOT NULL AND jsonb_array_length(prices) > 0 THEN
SELECT MAX(value::NUMERIC) INTO max_val FROM jsonb_array_elements_text(prices) AS value WHERE value ~ '^[0-9.]+$';
IF max_val IS NOT NULL THEN RETURN max_val; END IF;
END IF;
prices := payload->'Prices';
IF prices IS NOT NULL AND jsonb_array_length(prices) > 0 THEN
SELECT MAX(value::NUMERIC) INTO max_val FROM jsonb_array_elements_text(prices) AS value WHERE value ~ '^[0-9.]+$';
IF max_val IS NOT NULL THEN RETURN max_val; END IF;
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION extract_wholesale_price(payload JSONB)
RETURNS NUMERIC AS $$
DECLARE
prices JSONB;
min_val NUMERIC;
BEGIN
prices := payload->'wholesalePrices';
IF prices IS NOT NULL AND jsonb_array_length(prices) > 0 THEN
SELECT MIN(value::NUMERIC) INTO min_val FROM jsonb_array_elements_text(prices) AS value WHERE value ~ '^[0-9.]+$';
RETURN min_val;
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
-- ============================================================
-- VIEW: v_product_pricing
-- Flattened view of products with extracted pricing
-- ============================================================
CREATE OR REPLACE VIEW v_product_pricing AS
SELECT
dp.id,
dp.dispensary_id,
dp.name,
dp.brand_name,
dp.brand_id,
dp.type as category,
dp.subcategory,
dp.strain_type,
dp.stock_status,
dp.status,
d.name as store_name,
d.city,
d.state,
extract_min_price(dp.latest_raw_payload) as min_price,
extract_max_price(dp.latest_raw_payload) as max_price,
extract_wholesale_price(dp.latest_raw_payload) as wholesale_price,
dp.thc,
dp.cbd,
dp.updated_at,
dp.created_at
FROM dutchie_products dp
JOIN dispensaries d ON dp.dispensary_id = d.id;
-- ============================================================
-- VIEW: v_brand_store_presence
-- Which brands are in which stores
-- ============================================================
CREATE OR REPLACE VIEW v_brand_store_presence AS
SELECT
dp.brand_name,
dp.brand_id,
dp.dispensary_id,
d.name as store_name,
d.city,
d.state,
dp.type as category,
COUNT(*) as sku_count,
AVG(extract_min_price(dp.latest_raw_payload)) as avg_price,
SUM(CASE WHEN dp.stock_status = 'in_stock' THEN 1 ELSE 0 END) as in_stock_count,
MAX(dp.updated_at) as last_updated
FROM dutchie_products dp
JOIN dispensaries d ON dp.dispensary_id = d.id
WHERE dp.brand_name IS NOT NULL
GROUP BY dp.brand_name, dp.brand_id, dp.dispensary_id, d.name, d.city, d.state, dp.type;
-- ============================================================
-- VIEW: v_category_store_summary
-- Category breakdown per store
-- ============================================================
CREATE OR REPLACE VIEW v_category_store_summary AS
SELECT
dp.dispensary_id,
d.name as store_name,
d.city,
d.state,
dp.type as category,
COUNT(*) as sku_count,
COUNT(DISTINCT dp.brand_name) as brand_count,
AVG(extract_min_price(dp.latest_raw_payload)) as avg_price,
MIN(extract_min_price(dp.latest_raw_payload)) as min_price,
MAX(extract_max_price(dp.latest_raw_payload)) as max_price,
SUM(CASE WHEN dp.stock_status = 'in_stock' THEN 1 ELSE 0 END) as in_stock_count
FROM dutchie_products dp
JOIN dispensaries d ON dp.dispensary_id = d.id
WHERE dp.type IS NOT NULL
GROUP BY dp.dispensary_id, d.name, d.city, d.state, dp.type;
-- ============================================================
-- VIEW: v_brand_summary
-- Global brand statistics
-- ============================================================
CREATE OR REPLACE VIEW v_brand_summary AS
SELECT
dp.brand_name,
dp.brand_id,
COUNT(*) as total_skus,
COUNT(DISTINCT dp.dispensary_id) as store_count,
COUNT(DISTINCT dp.type) as category_count,
AVG(extract_min_price(dp.latest_raw_payload)) as avg_price,
MIN(extract_min_price(dp.latest_raw_payload)) as min_price,
MAX(extract_max_price(dp.latest_raw_payload)) as max_price,
SUM(CASE WHEN dp.stock_status = 'in_stock' THEN 1 ELSE 0 END) as in_stock_skus,
ARRAY_AGG(DISTINCT dp.type) FILTER (WHERE dp.type IS NOT NULL) as categories,
MAX(dp.updated_at) as last_updated
FROM dutchie_products dp
WHERE dp.brand_name IS NOT NULL
GROUP BY dp.brand_name, dp.brand_id
ORDER BY total_skus DESC;
-- ============================================================
-- VIEW: v_category_summary
-- Global category statistics
-- ============================================================
CREATE OR REPLACE VIEW v_category_summary AS
SELECT
dp.type as category,
COUNT(*) as total_skus,
COUNT(DISTINCT dp.brand_name) as brand_count,
COUNT(DISTINCT dp.dispensary_id) as store_count,
AVG(extract_min_price(dp.latest_raw_payload)) as avg_price,
MIN(extract_min_price(dp.latest_raw_payload)) as min_price,
MAX(extract_max_price(dp.latest_raw_payload)) as max_price,
SUM(CASE WHEN dp.stock_status = 'in_stock' THEN 1 ELSE 0 END) as in_stock_skus
FROM dutchie_products dp
WHERE dp.type IS NOT NULL
GROUP BY dp.type
ORDER BY total_skus DESC;
-- ============================================================
-- VIEW: v_store_summary
-- Store-level statistics
-- ============================================================
CREATE OR REPLACE VIEW v_store_summary AS
SELECT
d.id as store_id,
d.name as store_name,
d.city,
d.state,
d.chain_id,
COUNT(dp.id) as total_skus,
COUNT(DISTINCT dp.brand_name) as brand_count,
COUNT(DISTINCT dp.type) as category_count,
AVG(extract_min_price(dp.latest_raw_payload)) as avg_price,
SUM(CASE WHEN dp.stock_status = 'in_stock' THEN 1 ELSE 0 END) as in_stock_skus,
d.last_crawl_at,
d.product_count
FROM dispensaries d
LEFT JOIN dutchie_products dp ON d.id = dp.dispensary_id
GROUP BY d.id, d.name, d.city, d.state, d.chain_id, d.last_crawl_at, d.product_count;
-- ============================================================
-- TABLE: brand_snapshots (for historical brand tracking)
-- ============================================================
CREATE TABLE IF NOT EXISTS brand_snapshots (
id SERIAL PRIMARY KEY,
brand_name VARCHAR(255) NOT NULL,
brand_id VARCHAR(255),
snapshot_date DATE NOT NULL,
store_count INTEGER NOT NULL DEFAULT 0,
total_skus INTEGER NOT NULL DEFAULT 0,
avg_price NUMERIC(10,2),
in_stock_skus INTEGER NOT NULL DEFAULT 0,
categories TEXT[],
created_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(brand_name, snapshot_date)
);
CREATE INDEX IF NOT EXISTS idx_brand_snapshots_brand ON brand_snapshots(brand_name);
CREATE INDEX IF NOT EXISTS idx_brand_snapshots_date ON brand_snapshots(snapshot_date);
-- ============================================================
-- TABLE: category_snapshots (for historical category tracking)
-- ============================================================
CREATE TABLE IF NOT EXISTS category_snapshots (
id SERIAL PRIMARY KEY,
category VARCHAR(255) NOT NULL,
snapshot_date DATE NOT NULL,
store_count INTEGER NOT NULL DEFAULT 0,
brand_count INTEGER NOT NULL DEFAULT 0,
total_skus INTEGER NOT NULL DEFAULT 0,
avg_price NUMERIC(10,2),
in_stock_skus INTEGER NOT NULL DEFAULT 0,
created_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(category, snapshot_date)
);
CREATE INDEX IF NOT EXISTS idx_category_snapshots_cat ON category_snapshots(category);
CREATE INDEX IF NOT EXISTS idx_category_snapshots_date ON category_snapshots(snapshot_date);
-- ============================================================
-- TABLE: store_change_events (for tracking store changes)
-- ============================================================
CREATE TABLE IF NOT EXISTS store_change_events (
id SERIAL PRIMARY KEY,
store_id INTEGER NOT NULL REFERENCES dispensaries(id),
event_type VARCHAR(50) NOT NULL, -- brand_added, brand_removed, product_added, product_removed, price_change, stock_change
event_date DATE NOT NULL,
brand_name VARCHAR(255),
product_id INTEGER,
product_name VARCHAR(500),
category VARCHAR(255),
old_value TEXT,
new_value TEXT,
metadata JSONB,
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_store_events_store ON store_change_events(store_id);
CREATE INDEX IF NOT EXISTS idx_store_events_type ON store_change_events(event_type);
CREATE INDEX IF NOT EXISTS idx_store_events_date ON store_change_events(event_date);
CREATE INDEX IF NOT EXISTS idx_store_events_brand ON store_change_events(brand_name);
-- ============================================================
-- TABLE: analytics_alerts
-- ============================================================
CREATE TABLE IF NOT EXISTS analytics_alerts (
id SERIAL PRIMARY KEY,
alert_type VARCHAR(50) NOT NULL, -- price_warning, brand_dropped, competitive_intrusion, restock_event
severity VARCHAR(20) NOT NULL DEFAULT 'info', -- info, warning, critical
title VARCHAR(255) NOT NULL,
description TEXT,
store_id INTEGER REFERENCES dispensaries(id),
brand_name VARCHAR(255),
product_id INTEGER,
category VARCHAR(255),
metadata JSONB,
is_read BOOLEAN DEFAULT FALSE,
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_analytics_alerts_type ON analytics_alerts(alert_type);
CREATE INDEX IF NOT EXISTS idx_analytics_alerts_read ON analytics_alerts(is_read);
CREATE INDEX IF NOT EXISTS idx_analytics_alerts_created ON analytics_alerts(created_at DESC);
-- ============================================================
-- FUNCTION: Capture daily brand snapshots
-- ============================================================
CREATE OR REPLACE FUNCTION capture_brand_snapshots()
RETURNS INTEGER AS $$
DECLARE
inserted_count INTEGER;
BEGIN
INSERT INTO brand_snapshots (brand_name, brand_id, snapshot_date, store_count, total_skus, avg_price, in_stock_skus, categories)
SELECT
brand_name,
brand_id,
CURRENT_DATE,
COUNT(DISTINCT dispensary_id),
COUNT(*),
AVG(extract_min_price(latest_raw_payload)),
SUM(CASE WHEN stock_status = 'in_stock' THEN 1 ELSE 0 END),
ARRAY_AGG(DISTINCT type) FILTER (WHERE type IS NOT NULL)
FROM dutchie_products
WHERE brand_name IS NOT NULL
GROUP BY brand_name, brand_id
ON CONFLICT (brand_name, snapshot_date)
DO UPDATE SET
store_count = EXCLUDED.store_count,
total_skus = EXCLUDED.total_skus,
avg_price = EXCLUDED.avg_price,
in_stock_skus = EXCLUDED.in_stock_skus,
categories = EXCLUDED.categories;
GET DIAGNOSTICS inserted_count = ROW_COUNT;
RETURN inserted_count;
END;
$$ LANGUAGE plpgsql;
-- ============================================================
-- FUNCTION: Capture daily category snapshots
-- ============================================================
CREATE OR REPLACE FUNCTION capture_category_snapshots()
RETURNS INTEGER AS $$
DECLARE
inserted_count INTEGER;
BEGIN
INSERT INTO category_snapshots (category, snapshot_date, store_count, brand_count, total_skus, avg_price, in_stock_skus)
SELECT
type,
CURRENT_DATE,
COUNT(DISTINCT dispensary_id),
COUNT(DISTINCT brand_name),
COUNT(*),
AVG(extract_min_price(latest_raw_payload)),
SUM(CASE WHEN stock_status = 'in_stock' THEN 1 ELSE 0 END)
FROM dutchie_products
WHERE type IS NOT NULL
GROUP BY type
ON CONFLICT (category, snapshot_date)
DO UPDATE SET
store_count = EXCLUDED.store_count,
brand_count = EXCLUDED.brand_count,
total_skus = EXCLUDED.total_skus,
avg_price = EXCLUDED.avg_price,
in_stock_skus = EXCLUDED.in_stock_skus;
GET DIAGNOSTICS inserted_count = ROW_COUNT;
RETURN inserted_count;
END;
$$ LANGUAGE plpgsql;
-- ============================================================
-- FUNCTION: Calculate price volatility for a product
-- ============================================================
CREATE OR REPLACE FUNCTION calculate_price_volatility(
p_product_id INTEGER,
p_days INTEGER DEFAULT 30
)
RETURNS NUMERIC AS $$
DECLARE
std_dev NUMERIC;
avg_price NUMERIC;
BEGIN
-- Using dutchie_product_snapshots if available
SELECT
STDDEV(rec_min_price_cents / 100.0),
AVG(rec_min_price_cents / 100.0)
INTO std_dev, avg_price
FROM dutchie_product_snapshots
WHERE dutchie_product_id = p_product_id
AND crawled_at >= NOW() - (p_days || ' days')::INTERVAL
AND rec_min_price_cents IS NOT NULL;
IF avg_price IS NULL OR avg_price = 0 THEN
RETURN NULL;
END IF;
-- Return coefficient of variation (CV)
RETURN ROUND((std_dev / avg_price) * 100, 2);
END;
$$ LANGUAGE plpgsql;
-- ============================================================
-- FUNCTION: Get brand penetration stats
-- ============================================================
CREATE OR REPLACE FUNCTION get_brand_penetration(
p_brand_name VARCHAR,
p_state VARCHAR DEFAULT NULL
)
RETURNS TABLE (
total_stores BIGINT,
stores_carrying BIGINT,
penetration_pct NUMERIC,
total_skus BIGINT,
avg_skus_per_store NUMERIC,
shelf_share_pct NUMERIC
) AS $$
BEGIN
RETURN QUERY
WITH store_counts AS (
SELECT
COUNT(DISTINCT d.id) as total,
COUNT(DISTINCT CASE WHEN dp.brand_name = p_brand_name THEN dp.dispensary_id END) as carrying
FROM dispensaries d
LEFT JOIN dutchie_products dp ON d.id = dp.dispensary_id
WHERE (p_state IS NULL OR d.state = p_state)
),
sku_counts AS (
SELECT
COUNT(*) as brand_skus,
COUNT(DISTINCT dispensary_id) as stores_with_brand
FROM dutchie_products
WHERE brand_name = p_brand_name
),
total_skus AS (
SELECT COUNT(*) as total FROM dutchie_products dp
JOIN dispensaries d ON dp.dispensary_id = d.id
WHERE (p_state IS NULL OR d.state = p_state)
)
SELECT
sc.total,
sc.carrying,
ROUND((sc.carrying::NUMERIC / NULLIF(sc.total, 0)) * 100, 2),
skc.brand_skus,
ROUND(skc.brand_skus::NUMERIC / NULLIF(skc.stores_with_brand, 0), 2),
ROUND((skc.brand_skus::NUMERIC / NULLIF(ts.total, 0)) * 100, 2)
FROM store_counts sc, sku_counts skc, total_skus ts;
END;
$$ LANGUAGE plpgsql;
-- ============================================================
-- Initial snapshot capture (run manually if needed)
-- ============================================================
-- Note: Run these after migration to capture initial snapshots:
-- SELECT capture_brand_snapshots();
-- SELECT capture_category_snapshots();
-- ============================================================
-- Grant permissions
-- ============================================================
-- Views are accessible to all roles by default
COMMENT ON VIEW v_product_pricing IS 'Flattened product view with extracted pricing from JSONB';
COMMENT ON VIEW v_brand_store_presence IS 'Brand presence across stores with SKU counts';
COMMENT ON VIEW v_brand_summary IS 'Global brand statistics';
COMMENT ON VIEW v_category_summary IS 'Global category statistics';
COMMENT ON VIEW v_store_summary IS 'Store-level statistics';
COMMENT ON TABLE analytics_cache IS 'Cache for expensive analytics queries';
COMMENT ON TABLE brand_snapshots IS 'Historical daily snapshots of brand metrics';
COMMENT ON TABLE category_snapshots IS 'Historical daily snapshots of category metrics';
COMMENT ON TABLE store_change_events IS 'Log of brand/product changes at stores';
COMMENT ON TABLE analytics_alerts IS 'Analytics-generated alerts and notifications';