-- Migration 047: Analytics Infrastructure -- Phase 3: Analytics Dashboards for CannaiQ -- Creates views, functions, and tables for price trends, brand penetration, category growth, etc. -- ============================================================ -- ANALYTICS CACHE TABLE (for expensive query results) -- ============================================================ CREATE TABLE IF NOT EXISTS analytics_cache ( id SERIAL PRIMARY KEY, cache_key VARCHAR(255) NOT NULL UNIQUE, cache_data JSONB NOT NULL, computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), expires_at TIMESTAMPTZ NOT NULL, query_time_ms INTEGER, created_at TIMESTAMPTZ DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_analytics_cache_key ON analytics_cache(cache_key); CREATE INDEX IF NOT EXISTS idx_analytics_cache_expires ON analytics_cache(expires_at); -- ============================================================ -- PRICE EXTRACTION HELPER FUNCTION -- Extracts pricing from JSONB latest_raw_payload -- ============================================================ CREATE OR REPLACE FUNCTION extract_min_price(payload JSONB) RETURNS NUMERIC AS $$ DECLARE prices JSONB; min_val NUMERIC; BEGIN -- Try recPrices first (retail prices) prices := payload->'recPrices'; IF prices IS NOT NULL AND jsonb_array_length(prices) > 0 THEN SELECT MIN(value::NUMERIC) INTO min_val FROM jsonb_array_elements_text(prices) AS value WHERE value ~ '^[0-9.]+$'; IF min_val IS NOT NULL THEN RETURN min_val; END IF; END IF; -- Try Prices array prices := payload->'Prices'; IF prices IS NOT NULL AND jsonb_array_length(prices) > 0 THEN SELECT MIN(value::NUMERIC) INTO min_val FROM jsonb_array_elements_text(prices) AS value WHERE value ~ '^[0-9.]+$'; IF min_val IS NOT NULL THEN RETURN min_val; END IF; END IF; RETURN NULL; END; $$ LANGUAGE plpgsql IMMUTABLE; CREATE OR REPLACE FUNCTION extract_max_price(payload JSONB) RETURNS NUMERIC AS $$ DECLARE prices JSONB; max_val NUMERIC; BEGIN prices := payload->'recPrices'; IF prices IS NOT NULL AND jsonb_array_length(prices) > 0 THEN SELECT MAX(value::NUMERIC) INTO max_val FROM jsonb_array_elements_text(prices) AS value WHERE value ~ '^[0-9.]+$'; IF max_val IS NOT NULL THEN RETURN max_val; END IF; END IF; prices := payload->'Prices'; IF prices IS NOT NULL AND jsonb_array_length(prices) > 0 THEN SELECT MAX(value::NUMERIC) INTO max_val FROM jsonb_array_elements_text(prices) AS value WHERE value ~ '^[0-9.]+$'; IF max_val IS NOT NULL THEN RETURN max_val; END IF; END IF; RETURN NULL; END; $$ LANGUAGE plpgsql IMMUTABLE; CREATE OR REPLACE FUNCTION extract_wholesale_price(payload JSONB) RETURNS NUMERIC AS $$ DECLARE prices JSONB; min_val NUMERIC; BEGIN prices := payload->'wholesalePrices'; IF prices IS NOT NULL AND jsonb_array_length(prices) > 0 THEN SELECT MIN(value::NUMERIC) INTO min_val FROM jsonb_array_elements_text(prices) AS value WHERE value ~ '^[0-9.]+$'; RETURN min_val; END IF; RETURN NULL; END; $$ LANGUAGE plpgsql IMMUTABLE; -- ============================================================ -- VIEW: v_product_pricing -- Flattened view of products with extracted pricing -- ============================================================ CREATE OR REPLACE VIEW v_product_pricing AS SELECT dp.id, dp.dispensary_id, dp.name, dp.brand_name, dp.brand_id, dp.type as category, dp.subcategory, dp.strain_type, dp.stock_status, dp.status, d.name as store_name, d.city, d.state, extract_min_price(dp.latest_raw_payload) as min_price, extract_max_price(dp.latest_raw_payload) as max_price, extract_wholesale_price(dp.latest_raw_payload) as wholesale_price, dp.thc, dp.cbd, dp.updated_at, dp.created_at FROM dutchie_products dp JOIN dispensaries d ON dp.dispensary_id = d.id; -- ============================================================ -- VIEW: v_brand_store_presence -- Which brands are in which stores -- ============================================================ CREATE OR REPLACE VIEW v_brand_store_presence AS SELECT dp.brand_name, dp.brand_id, dp.dispensary_id, d.name as store_name, d.city, d.state, dp.type as category, COUNT(*) as sku_count, AVG(extract_min_price(dp.latest_raw_payload)) as avg_price, SUM(CASE WHEN dp.stock_status = 'in_stock' THEN 1 ELSE 0 END) as in_stock_count, MAX(dp.updated_at) as last_updated FROM dutchie_products dp JOIN dispensaries d ON dp.dispensary_id = d.id WHERE dp.brand_name IS NOT NULL GROUP BY dp.brand_name, dp.brand_id, dp.dispensary_id, d.name, d.city, d.state, dp.type; -- ============================================================ -- VIEW: v_category_store_summary -- Category breakdown per store -- ============================================================ CREATE OR REPLACE VIEW v_category_store_summary AS SELECT dp.dispensary_id, d.name as store_name, d.city, d.state, dp.type as category, COUNT(*) as sku_count, COUNT(DISTINCT dp.brand_name) as brand_count, AVG(extract_min_price(dp.latest_raw_payload)) as avg_price, MIN(extract_min_price(dp.latest_raw_payload)) as min_price, MAX(extract_max_price(dp.latest_raw_payload)) as max_price, SUM(CASE WHEN dp.stock_status = 'in_stock' THEN 1 ELSE 0 END) as in_stock_count FROM dutchie_products dp JOIN dispensaries d ON dp.dispensary_id = d.id WHERE dp.type IS NOT NULL GROUP BY dp.dispensary_id, d.name, d.city, d.state, dp.type; -- ============================================================ -- VIEW: v_brand_summary -- Global brand statistics -- ============================================================ CREATE OR REPLACE VIEW v_brand_summary AS SELECT dp.brand_name, dp.brand_id, COUNT(*) as total_skus, COUNT(DISTINCT dp.dispensary_id) as store_count, COUNT(DISTINCT dp.type) as category_count, AVG(extract_min_price(dp.latest_raw_payload)) as avg_price, MIN(extract_min_price(dp.latest_raw_payload)) as min_price, MAX(extract_max_price(dp.latest_raw_payload)) as max_price, SUM(CASE WHEN dp.stock_status = 'in_stock' THEN 1 ELSE 0 END) as in_stock_skus, ARRAY_AGG(DISTINCT dp.type) FILTER (WHERE dp.type IS NOT NULL) as categories, MAX(dp.updated_at) as last_updated FROM dutchie_products dp WHERE dp.brand_name IS NOT NULL GROUP BY dp.brand_name, dp.brand_id ORDER BY total_skus DESC; -- ============================================================ -- VIEW: v_category_summary -- Global category statistics -- ============================================================ CREATE OR REPLACE VIEW v_category_summary AS SELECT dp.type as category, COUNT(*) as total_skus, COUNT(DISTINCT dp.brand_name) as brand_count, COUNT(DISTINCT dp.dispensary_id) as store_count, AVG(extract_min_price(dp.latest_raw_payload)) as avg_price, MIN(extract_min_price(dp.latest_raw_payload)) as min_price, MAX(extract_max_price(dp.latest_raw_payload)) as max_price, SUM(CASE WHEN dp.stock_status = 'in_stock' THEN 1 ELSE 0 END) as in_stock_skus FROM dutchie_products dp WHERE dp.type IS NOT NULL GROUP BY dp.type ORDER BY total_skus DESC; -- ============================================================ -- VIEW: v_store_summary -- Store-level statistics -- ============================================================ CREATE OR REPLACE VIEW v_store_summary AS SELECT d.id as store_id, d.name as store_name, d.city, d.state, d.chain_id, COUNT(dp.id) as total_skus, COUNT(DISTINCT dp.brand_name) as brand_count, COUNT(DISTINCT dp.type) as category_count, AVG(extract_min_price(dp.latest_raw_payload)) as avg_price, SUM(CASE WHEN dp.stock_status = 'in_stock' THEN 1 ELSE 0 END) as in_stock_skus, d.last_crawl_at, d.product_count FROM dispensaries d LEFT JOIN dutchie_products dp ON d.id = dp.dispensary_id GROUP BY d.id, d.name, d.city, d.state, d.chain_id, d.last_crawl_at, d.product_count; -- ============================================================ -- TABLE: brand_snapshots (for historical brand tracking) -- ============================================================ CREATE TABLE IF NOT EXISTS brand_snapshots ( id SERIAL PRIMARY KEY, brand_name VARCHAR(255) NOT NULL, brand_id VARCHAR(255), snapshot_date DATE NOT NULL, store_count INTEGER NOT NULL DEFAULT 0, total_skus INTEGER NOT NULL DEFAULT 0, avg_price NUMERIC(10,2), in_stock_skus INTEGER NOT NULL DEFAULT 0, categories TEXT[], created_at TIMESTAMPTZ DEFAULT NOW(), UNIQUE(brand_name, snapshot_date) ); CREATE INDEX IF NOT EXISTS idx_brand_snapshots_brand ON brand_snapshots(brand_name); CREATE INDEX IF NOT EXISTS idx_brand_snapshots_date ON brand_snapshots(snapshot_date); -- ============================================================ -- TABLE: category_snapshots (for historical category tracking) -- ============================================================ CREATE TABLE IF NOT EXISTS category_snapshots ( id SERIAL PRIMARY KEY, category VARCHAR(255) NOT NULL, snapshot_date DATE NOT NULL, store_count INTEGER NOT NULL DEFAULT 0, brand_count INTEGER NOT NULL DEFAULT 0, total_skus INTEGER NOT NULL DEFAULT 0, avg_price NUMERIC(10,2), in_stock_skus INTEGER NOT NULL DEFAULT 0, created_at TIMESTAMPTZ DEFAULT NOW(), UNIQUE(category, snapshot_date) ); CREATE INDEX IF NOT EXISTS idx_category_snapshots_cat ON category_snapshots(category); CREATE INDEX IF NOT EXISTS idx_category_snapshots_date ON category_snapshots(snapshot_date); -- ============================================================ -- TABLE: store_change_events (for tracking store changes) -- ============================================================ CREATE TABLE IF NOT EXISTS store_change_events ( id SERIAL PRIMARY KEY, store_id INTEGER NOT NULL REFERENCES dispensaries(id), event_type VARCHAR(50) NOT NULL, -- brand_added, brand_removed, product_added, product_removed, price_change, stock_change event_date DATE NOT NULL, brand_name VARCHAR(255), product_id INTEGER, product_name VARCHAR(500), category VARCHAR(255), old_value TEXT, new_value TEXT, metadata JSONB, created_at TIMESTAMPTZ DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_store_events_store ON store_change_events(store_id); CREATE INDEX IF NOT EXISTS idx_store_events_type ON store_change_events(event_type); CREATE INDEX IF NOT EXISTS idx_store_events_date ON store_change_events(event_date); CREATE INDEX IF NOT EXISTS idx_store_events_brand ON store_change_events(brand_name); -- ============================================================ -- TABLE: analytics_alerts -- ============================================================ CREATE TABLE IF NOT EXISTS analytics_alerts ( id SERIAL PRIMARY KEY, alert_type VARCHAR(50) NOT NULL, -- price_warning, brand_dropped, competitive_intrusion, restock_event severity VARCHAR(20) NOT NULL DEFAULT 'info', -- info, warning, critical title VARCHAR(255) NOT NULL, description TEXT, store_id INTEGER REFERENCES dispensaries(id), brand_name VARCHAR(255), product_id INTEGER, category VARCHAR(255), metadata JSONB, is_read BOOLEAN DEFAULT FALSE, created_at TIMESTAMPTZ DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_analytics_alerts_type ON analytics_alerts(alert_type); CREATE INDEX IF NOT EXISTS idx_analytics_alerts_read ON analytics_alerts(is_read); CREATE INDEX IF NOT EXISTS idx_analytics_alerts_created ON analytics_alerts(created_at DESC); -- ============================================================ -- FUNCTION: Capture daily brand snapshots -- ============================================================ CREATE OR REPLACE FUNCTION capture_brand_snapshots() RETURNS INTEGER AS $$ DECLARE inserted_count INTEGER; BEGIN INSERT INTO brand_snapshots (brand_name, brand_id, snapshot_date, store_count, total_skus, avg_price, in_stock_skus, categories) SELECT brand_name, brand_id, CURRENT_DATE, COUNT(DISTINCT dispensary_id), COUNT(*), AVG(extract_min_price(latest_raw_payload)), SUM(CASE WHEN stock_status = 'in_stock' THEN 1 ELSE 0 END), ARRAY_AGG(DISTINCT type) FILTER (WHERE type IS NOT NULL) FROM dutchie_products WHERE brand_name IS NOT NULL GROUP BY brand_name, brand_id ON CONFLICT (brand_name, snapshot_date) DO UPDATE SET store_count = EXCLUDED.store_count, total_skus = EXCLUDED.total_skus, avg_price = EXCLUDED.avg_price, in_stock_skus = EXCLUDED.in_stock_skus, categories = EXCLUDED.categories; GET DIAGNOSTICS inserted_count = ROW_COUNT; RETURN inserted_count; END; $$ LANGUAGE plpgsql; -- ============================================================ -- FUNCTION: Capture daily category snapshots -- ============================================================ CREATE OR REPLACE FUNCTION capture_category_snapshots() RETURNS INTEGER AS $$ DECLARE inserted_count INTEGER; BEGIN INSERT INTO category_snapshots (category, snapshot_date, store_count, brand_count, total_skus, avg_price, in_stock_skus) SELECT type, CURRENT_DATE, COUNT(DISTINCT dispensary_id), COUNT(DISTINCT brand_name), COUNT(*), AVG(extract_min_price(latest_raw_payload)), SUM(CASE WHEN stock_status = 'in_stock' THEN 1 ELSE 0 END) FROM dutchie_products WHERE type IS NOT NULL GROUP BY type ON CONFLICT (category, snapshot_date) DO UPDATE SET store_count = EXCLUDED.store_count, brand_count = EXCLUDED.brand_count, total_skus = EXCLUDED.total_skus, avg_price = EXCLUDED.avg_price, in_stock_skus = EXCLUDED.in_stock_skus; GET DIAGNOSTICS inserted_count = ROW_COUNT; RETURN inserted_count; END; $$ LANGUAGE plpgsql; -- ============================================================ -- FUNCTION: Calculate price volatility for a product -- ============================================================ CREATE OR REPLACE FUNCTION calculate_price_volatility( p_product_id INTEGER, p_days INTEGER DEFAULT 30 ) RETURNS NUMERIC AS $$ DECLARE std_dev NUMERIC; avg_price NUMERIC; BEGIN -- Using dutchie_product_snapshots if available SELECT STDDEV(rec_min_price_cents / 100.0), AVG(rec_min_price_cents / 100.0) INTO std_dev, avg_price FROM dutchie_product_snapshots WHERE dutchie_product_id = p_product_id AND crawled_at >= NOW() - (p_days || ' days')::INTERVAL AND rec_min_price_cents IS NOT NULL; IF avg_price IS NULL OR avg_price = 0 THEN RETURN NULL; END IF; -- Return coefficient of variation (CV) RETURN ROUND((std_dev / avg_price) * 100, 2); END; $$ LANGUAGE plpgsql; -- ============================================================ -- FUNCTION: Get brand penetration stats -- ============================================================ CREATE OR REPLACE FUNCTION get_brand_penetration( p_brand_name VARCHAR, p_state VARCHAR DEFAULT NULL ) RETURNS TABLE ( total_stores BIGINT, stores_carrying BIGINT, penetration_pct NUMERIC, total_skus BIGINT, avg_skus_per_store NUMERIC, shelf_share_pct NUMERIC ) AS $$ BEGIN RETURN QUERY WITH store_counts AS ( SELECT COUNT(DISTINCT d.id) as total, COUNT(DISTINCT CASE WHEN dp.brand_name = p_brand_name THEN dp.dispensary_id END) as carrying FROM dispensaries d LEFT JOIN dutchie_products dp ON d.id = dp.dispensary_id WHERE (p_state IS NULL OR d.state = p_state) ), sku_counts AS ( SELECT COUNT(*) as brand_skus, COUNT(DISTINCT dispensary_id) as stores_with_brand FROM dutchie_products WHERE brand_name = p_brand_name ), total_skus AS ( SELECT COUNT(*) as total FROM dutchie_products dp JOIN dispensaries d ON dp.dispensary_id = d.id WHERE (p_state IS NULL OR d.state = p_state) ) SELECT sc.total, sc.carrying, ROUND((sc.carrying::NUMERIC / NULLIF(sc.total, 0)) * 100, 2), skc.brand_skus, ROUND(skc.brand_skus::NUMERIC / NULLIF(skc.stores_with_brand, 0), 2), ROUND((skc.brand_skus::NUMERIC / NULLIF(ts.total, 0)) * 100, 2) FROM store_counts sc, sku_counts skc, total_skus ts; END; $$ LANGUAGE plpgsql; -- ============================================================ -- Initial snapshot capture (run manually if needed) -- ============================================================ -- Note: Run these after migration to capture initial snapshots: -- SELECT capture_brand_snapshots(); -- SELECT capture_category_snapshots(); -- ============================================================ -- Grant permissions -- ============================================================ -- Views are accessible to all roles by default COMMENT ON VIEW v_product_pricing IS 'Flattened product view with extracted pricing from JSONB'; COMMENT ON VIEW v_brand_store_presence IS 'Brand presence across stores with SKU counts'; COMMENT ON VIEW v_brand_summary IS 'Global brand statistics'; COMMENT ON VIEW v_category_summary IS 'Global category statistics'; COMMENT ON VIEW v_store_summary IS 'Store-level statistics'; COMMENT ON TABLE analytics_cache IS 'Cache for expensive analytics queries'; COMMENT ON TABLE brand_snapshots IS 'Historical daily snapshots of brand metrics'; COMMENT ON TABLE category_snapshots IS 'Historical daily snapshots of category metrics'; COMMENT ON TABLE store_change_events IS 'Log of brand/product changes at stores'; COMMENT ON TABLE analytics_alerts IS 'Analytics-generated alerts and notifications';