-- Migration 121: Sales Analytics Materialized Views -- Pre-computed views for sales velocity, brand market share, and store performance -- ============================================================ -- VIEW 1: Daily Sales Estimates (per product/store) -- Calculates delta between consecutive snapshots -- ============================================================ CREATE MATERIALIZED VIEW IF NOT EXISTS mv_daily_sales_estimates AS WITH qty_deltas AS ( SELECT dispensary_id, product_id, brand_name, category, DATE(captured_at) AS sale_date, price_rec, quantity_available, LAG(quantity_available) OVER ( PARTITION BY dispensary_id, product_id ORDER BY captured_at ) AS prev_quantity FROM inventory_snapshots WHERE quantity_available IS NOT NULL AND captured_at >= NOW() - INTERVAL '30 days' ) SELECT dispensary_id, product_id, brand_name, category, sale_date, AVG(price_rec) AS avg_price, SUM(GREATEST(0, COALESCE(prev_quantity, 0) - quantity_available)) AS units_sold, SUM(GREATEST(0, quantity_available - COALESCE(prev_quantity, 0))) AS units_restocked, SUM(GREATEST(0, COALESCE(prev_quantity, 0) - quantity_available) * COALESCE(price_rec, 0)) AS revenue_estimate, COUNT(*) AS snapshot_count FROM qty_deltas WHERE prev_quantity IS NOT NULL GROUP BY dispensary_id, product_id, brand_name, category, sale_date; CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_daily_sales_pk ON mv_daily_sales_estimates(dispensary_id, product_id, sale_date); CREATE INDEX IF NOT EXISTS idx_mv_daily_sales_brand ON mv_daily_sales_estimates(brand_name, sale_date); CREATE INDEX IF NOT EXISTS idx_mv_daily_sales_category ON mv_daily_sales_estimates(category, sale_date); CREATE INDEX IF NOT EXISTS idx_mv_daily_sales_date ON mv_daily_sales_estimates(sale_date DESC); -- ============================================================ -- VIEW 2: Brand Market Share by State -- Weighted distribution across stores -- ============================================================ CREATE MATERIALIZED VIEW IF NOT EXISTS mv_brand_market_share AS WITH brand_presence AS ( SELECT sp.brand AS brand_name, d.state AS state_code, COUNT(DISTINCT sp.dispensary_id) AS stores_carrying, COUNT(*) AS sku_count, SUM(CASE WHEN sp.is_in_stock THEN 1 ELSE 0 END) AS in_stock_skus, AVG(sp.price_rec) AS avg_price FROM store_products sp JOIN dispensaries d ON d.id = sp.dispensary_id WHERE sp.brand IS NOT NULL AND d.state IS NOT NULL GROUP BY sp.brand, d.state ), state_totals AS ( SELECT d.state AS state_code, COUNT(DISTINCT d.id) FILTER (WHERE d.crawl_enabled) AS total_stores FROM dispensaries d WHERE d.state IS NOT NULL GROUP BY d.state ) SELECT bp.brand_name, bp.state_code, bp.stores_carrying, st.total_stores, ROUND(bp.stores_carrying::NUMERIC * 100 / NULLIF(st.total_stores, 0), 2) AS penetration_pct, bp.sku_count, bp.in_stock_skus, bp.avg_price, NOW() AS calculated_at FROM brand_presence bp JOIN state_totals st ON st.state_code = bp.state_code; CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_brand_market_pk ON mv_brand_market_share(brand_name, state_code); CREATE INDEX IF NOT EXISTS idx_mv_brand_market_state ON mv_brand_market_share(state_code); CREATE INDEX IF NOT EXISTS idx_mv_brand_market_penetration ON mv_brand_market_share(penetration_pct DESC); -- ============================================================ -- VIEW 3: SKU Velocity (30-day rolling) -- Average daily units sold per SKU -- ============================================================ CREATE MATERIALIZED VIEW IF NOT EXISTS mv_sku_velocity AS SELECT dse.product_id, dse.brand_name, dse.category, dse.dispensary_id, d.name AS dispensary_name, d.state AS state_code, SUM(dse.units_sold) AS total_units_30d, SUM(dse.revenue_estimate) AS total_revenue_30d, COUNT(DISTINCT dse.sale_date) AS days_with_sales, ROUND(SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0), 2) AS avg_daily_units, AVG(dse.avg_price) AS avg_price, CASE WHEN SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0) >= 5 THEN 'hot' WHEN SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0) >= 1 THEN 'steady' WHEN SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0) >= 0.1 THEN 'slow' ELSE 'stale' END AS velocity_tier, NOW() AS calculated_at FROM mv_daily_sales_estimates dse JOIN dispensaries d ON d.id = dse.dispensary_id WHERE dse.sale_date >= CURRENT_DATE - INTERVAL '30 days' GROUP BY dse.product_id, dse.brand_name, dse.category, dse.dispensary_id, d.name, d.state; CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_sku_velocity_pk ON mv_sku_velocity(dispensary_id, product_id); CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_brand ON mv_sku_velocity(brand_name); CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_tier ON mv_sku_velocity(velocity_tier); CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_state ON mv_sku_velocity(state_code); CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_units ON mv_sku_velocity(total_units_30d DESC); -- ============================================================ -- VIEW 4: Store Performance Rankings -- Revenue estimates and brand diversity per store -- ============================================================ CREATE MATERIALIZED VIEW IF NOT EXISTS mv_store_performance AS SELECT d.id AS dispensary_id, d.name AS dispensary_name, d.city, d.state AS state_code, -- Revenue metrics from sales estimates COALESCE(sales.total_revenue_30d, 0) AS total_revenue_30d, COALESCE(sales.total_units_30d, 0) AS total_units_30d, -- Inventory metrics COUNT(DISTINCT sp.id) AS total_skus, COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock) AS in_stock_skus, -- Brand diversity COUNT(DISTINCT sp.brand) AS unique_brands, COUNT(DISTINCT sp.category) AS unique_categories, -- Pricing AVG(sp.price_rec) AS avg_price, -- Activity MAX(sp.updated_at) AS last_updated, NOW() AS calculated_at FROM dispensaries d LEFT JOIN store_products sp ON sp.dispensary_id = d.id LEFT JOIN ( SELECT dispensary_id, SUM(revenue_estimate) AS total_revenue_30d, SUM(units_sold) AS total_units_30d FROM mv_daily_sales_estimates WHERE sale_date >= CURRENT_DATE - INTERVAL '30 days' GROUP BY dispensary_id ) sales ON sales.dispensary_id = d.id WHERE d.crawl_enabled = TRUE GROUP BY d.id, d.name, d.city, d.state, sales.total_revenue_30d, sales.total_units_30d; CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_store_perf_pk ON mv_store_performance(dispensary_id); CREATE INDEX IF NOT EXISTS idx_mv_store_perf_state ON mv_store_performance(state_code); CREATE INDEX IF NOT EXISTS idx_mv_store_perf_revenue ON mv_store_performance(total_revenue_30d DESC); -- ============================================================ -- VIEW 5: Weekly Category Trends -- Category performance over time -- ============================================================ CREATE MATERIALIZED VIEW IF NOT EXISTS mv_category_weekly_trends AS SELECT dse.category, d.state AS state_code, DATE_TRUNC('week', dse.sale_date)::DATE AS week_start, COUNT(DISTINCT dse.product_id) AS sku_count, COUNT(DISTINCT dse.dispensary_id) AS store_count, SUM(dse.units_sold) AS total_units, SUM(dse.revenue_estimate) AS total_revenue, AVG(dse.avg_price) AS avg_price, NOW() AS calculated_at FROM mv_daily_sales_estimates dse JOIN dispensaries d ON d.id = dse.dispensary_id WHERE dse.category IS NOT NULL AND dse.sale_date >= CURRENT_DATE - INTERVAL '90 days' GROUP BY dse.category, d.state, DATE_TRUNC('week', dse.sale_date); CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_cat_weekly_pk ON mv_category_weekly_trends(category, state_code, week_start); CREATE INDEX IF NOT EXISTS idx_mv_cat_weekly_state ON mv_category_weekly_trends(state_code, week_start); CREATE INDEX IF NOT EXISTS idx_mv_cat_weekly_date ON mv_category_weekly_trends(week_start DESC); -- ============================================================ -- VIEW 6: Product Intelligence (Hoodie-style per-product metrics) -- Includes stock diff, days since OOS, days until stockout -- ============================================================ CREATE MATERIALIZED VIEW IF NOT EXISTS mv_product_intelligence AS WITH -- Calculate stock diff over 120 days stock_diff AS ( SELECT dispensary_id, product_id, -- Get oldest and newest quantity in last 120 days FIRST_VALUE(quantity_available) OVER ( PARTITION BY dispensary_id, product_id ORDER BY captured_at ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) AS qty_120d_ago, LAST_VALUE(quantity_available) OVER ( PARTITION BY dispensary_id, product_id ORDER BY captured_at ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) AS qty_current FROM inventory_snapshots WHERE captured_at >= NOW() - INTERVAL '120 days' ), stock_diff_calc AS ( SELECT DISTINCT dispensary_id, product_id, qty_current - COALESCE(qty_120d_ago, qty_current) AS stock_diff_120 FROM stock_diff ), -- Get days since last OOS event last_oos AS ( SELECT dispensary_id, product_id, MAX(detected_at) AS last_oos_date FROM product_visibility_events WHERE event_type = 'oos' GROUP BY dispensary_id, product_id ), -- Calculate avg daily units sold (from velocity view) velocity AS ( SELECT dispensary_id, product_id, avg_daily_units FROM mv_sku_velocity ) SELECT sp.dispensary_id, d.name AS dispensary_name, d.state AS state_code, d.city, sp.provider_product_id AS sku, sp.name_raw AS product_name, sp.brand_name_raw AS brand, sp.category_raw AS category, sp.is_in_stock, sp.stock_status, sp.stock_quantity, sp.price_rec AS price, sp.first_seen_at AS first_seen, sp.last_seen_at AS last_seen, -- Calculated fields COALESCE(sd.stock_diff_120, 0) AS stock_diff_120, CASE WHEN lo.last_oos_date IS NOT NULL THEN EXTRACT(DAY FROM NOW() - lo.last_oos_date)::INT ELSE NULL END AS days_since_oos, -- Days until stockout = current stock / daily burn rate CASE WHEN v.avg_daily_units > 0 AND sp.stock_quantity > 0 THEN ROUND(sp.stock_quantity::NUMERIC / v.avg_daily_units)::INT ELSE NULL END AS days_until_stock_out, v.avg_daily_units, NOW() AS calculated_at FROM store_products sp JOIN dispensaries d ON d.id = sp.dispensary_id LEFT JOIN stock_diff_calc sd ON sd.dispensary_id = sp.dispensary_id AND sd.product_id = sp.provider_product_id LEFT JOIN last_oos lo ON lo.dispensary_id = sp.dispensary_id AND lo.product_id = sp.provider_product_id LEFT JOIN velocity v ON v.dispensary_id = sp.dispensary_id AND v.product_id = sp.provider_product_id WHERE d.crawl_enabled = TRUE; CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_prod_intel_pk ON mv_product_intelligence(dispensary_id, sku); CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_brand ON mv_product_intelligence(brand); CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_state ON mv_product_intelligence(state_code); CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_stock_out ON mv_product_intelligence(days_until_stock_out ASC NULLS LAST); CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_oos ON mv_product_intelligence(days_since_oos DESC NULLS LAST); -- ============================================================ -- REFRESH FUNCTION -- ============================================================ CREATE OR REPLACE FUNCTION refresh_sales_analytics_views() RETURNS TABLE(view_name TEXT, rows_affected BIGINT) AS $$ DECLARE row_count BIGINT; BEGIN -- Must refresh in dependency order: -- 1. daily_sales (base view) -- 2. sku_velocity (depends on daily_sales) -- 3. product_intelligence (depends on sku_velocity) -- 4. others (independent) REFRESH MATERIALIZED VIEW CONCURRENTLY mv_daily_sales_estimates; SELECT COUNT(*) INTO row_count FROM mv_daily_sales_estimates; view_name := 'mv_daily_sales_estimates'; rows_affected := row_count; RETURN NEXT; REFRESH MATERIALIZED VIEW CONCURRENTLY mv_brand_market_share; SELECT COUNT(*) INTO row_count FROM mv_brand_market_share; view_name := 'mv_brand_market_share'; rows_affected := row_count; RETURN NEXT; REFRESH MATERIALIZED VIEW CONCURRENTLY mv_sku_velocity; SELECT COUNT(*) INTO row_count FROM mv_sku_velocity; view_name := 'mv_sku_velocity'; rows_affected := row_count; RETURN NEXT; REFRESH MATERIALIZED VIEW CONCURRENTLY mv_store_performance; SELECT COUNT(*) INTO row_count FROM mv_store_performance; view_name := 'mv_store_performance'; rows_affected := row_count; RETURN NEXT; REFRESH MATERIALIZED VIEW CONCURRENTLY mv_category_weekly_trends; SELECT COUNT(*) INTO row_count FROM mv_category_weekly_trends; view_name := 'mv_category_weekly_trends'; rows_affected := row_count; RETURN NEXT; -- Product intelligence depends on sku_velocity, so refresh last REFRESH MATERIALIZED VIEW CONCURRENTLY mv_product_intelligence; SELECT COUNT(*) INTO row_count FROM mv_product_intelligence; view_name := 'mv_product_intelligence'; rows_affected := row_count; RETURN NEXT; END; $$ LANGUAGE plpgsql; COMMENT ON FUNCTION refresh_sales_analytics_views IS 'Refresh all sales analytics materialized views. Call hourly via scheduler.'; -- ============================================================ -- INITIAL REFRESH (populate views) -- ============================================================ -- Note: Initial refresh must be non-concurrent (no unique index yet populated) -- Run these manually after migration: -- REFRESH MATERIALIZED VIEW mv_daily_sales_estimates; -- REFRESH MATERIALIZED VIEW mv_brand_market_share; -- REFRESH MATERIALIZED VIEW mv_sku_velocity; -- REFRESH MATERIALIZED VIEW mv_store_performance; -- REFRESH MATERIALIZED VIEW mv_category_weekly_trends;