Files
cannaiq/backend/migrations/121_sales_analytics_views.sql
Kelly 9f3bc8a843
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
fix: Worker task concurrency limit and inventory tracking
- Fix claim_task to enforce max 5 tasks per worker (was unlimited)
- Add session_task_count check before ANY claiming path
- Add triggers to auto-decrement count on task complete/release
- Update MAX_CONCURRENT_TASKS default from 3 to 5
- Update frontend fallback to show 5 task slots

- Add Wasabi S3 storage for payload archival
- Add inventory snapshots service (delta-only tracking)
- Add sales analytics views and routes
- Add high-frequency manager UI components
- Reset hardcoded AZ 5-minute intervals (use UI to configure)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-17 01:34:38 -07:00

384 lines
14 KiB
PL/PgSQL

-- Migration 121: Sales Analytics Materialized Views
-- Pre-computed views for sales velocity, brand market share, and store performance
-- ============================================================
-- VIEW 1: Daily Sales Estimates (per product/store)
-- Calculates delta between consecutive snapshots
-- ============================================================
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_daily_sales_estimates AS
WITH qty_deltas AS (
SELECT
dispensary_id,
product_id,
brand_name,
category,
DATE(captured_at) AS sale_date,
price_rec,
quantity_available,
LAG(quantity_available) OVER (
PARTITION BY dispensary_id, product_id
ORDER BY captured_at
) AS prev_quantity
FROM inventory_snapshots
WHERE quantity_available IS NOT NULL
AND captured_at >= NOW() - INTERVAL '30 days'
)
SELECT
dispensary_id,
product_id,
brand_name,
category,
sale_date,
AVG(price_rec) AS avg_price,
SUM(GREATEST(0, COALESCE(prev_quantity, 0) - quantity_available)) AS units_sold,
SUM(GREATEST(0, quantity_available - COALESCE(prev_quantity, 0))) AS units_restocked,
SUM(GREATEST(0, COALESCE(prev_quantity, 0) - quantity_available) * COALESCE(price_rec, 0)) AS revenue_estimate,
COUNT(*) AS snapshot_count
FROM qty_deltas
WHERE prev_quantity IS NOT NULL
GROUP BY dispensary_id, product_id, brand_name, category, sale_date;
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_daily_sales_pk
ON mv_daily_sales_estimates(dispensary_id, product_id, sale_date);
CREATE INDEX IF NOT EXISTS idx_mv_daily_sales_brand
ON mv_daily_sales_estimates(brand_name, sale_date);
CREATE INDEX IF NOT EXISTS idx_mv_daily_sales_category
ON mv_daily_sales_estimates(category, sale_date);
CREATE INDEX IF NOT EXISTS idx_mv_daily_sales_date
ON mv_daily_sales_estimates(sale_date DESC);
-- ============================================================
-- VIEW 2: Brand Market Share by State
-- Weighted distribution across stores
-- ============================================================
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_brand_market_share AS
WITH brand_presence AS (
SELECT
sp.brand AS brand_name,
d.state AS state_code,
COUNT(DISTINCT sp.dispensary_id) AS stores_carrying,
COUNT(*) AS sku_count,
SUM(CASE WHEN sp.is_in_stock THEN 1 ELSE 0 END) AS in_stock_skus,
AVG(sp.price_rec) AS avg_price
FROM store_products sp
JOIN dispensaries d ON d.id = sp.dispensary_id
WHERE sp.brand IS NOT NULL
AND d.state IS NOT NULL
GROUP BY sp.brand, d.state
),
state_totals AS (
SELECT
d.state AS state_code,
COUNT(DISTINCT d.id) FILTER (WHERE d.crawl_enabled) AS total_stores
FROM dispensaries d
WHERE d.state IS NOT NULL
GROUP BY d.state
)
SELECT
bp.brand_name,
bp.state_code,
bp.stores_carrying,
st.total_stores,
ROUND(bp.stores_carrying::NUMERIC * 100 / NULLIF(st.total_stores, 0), 2) AS penetration_pct,
bp.sku_count,
bp.in_stock_skus,
bp.avg_price,
NOW() AS calculated_at
FROM brand_presence bp
JOIN state_totals st ON st.state_code = bp.state_code;
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_brand_market_pk
ON mv_brand_market_share(brand_name, state_code);
CREATE INDEX IF NOT EXISTS idx_mv_brand_market_state
ON mv_brand_market_share(state_code);
CREATE INDEX IF NOT EXISTS idx_mv_brand_market_penetration
ON mv_brand_market_share(penetration_pct DESC);
-- ============================================================
-- VIEW 3: SKU Velocity (30-day rolling)
-- Average daily units sold per SKU
-- ============================================================
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_sku_velocity AS
SELECT
dse.product_id,
dse.brand_name,
dse.category,
dse.dispensary_id,
d.name AS dispensary_name,
d.state AS state_code,
SUM(dse.units_sold) AS total_units_30d,
SUM(dse.revenue_estimate) AS total_revenue_30d,
COUNT(DISTINCT dse.sale_date) AS days_with_sales,
ROUND(SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0), 2) AS avg_daily_units,
AVG(dse.avg_price) AS avg_price,
CASE
WHEN SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0) >= 5 THEN 'hot'
WHEN SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0) >= 1 THEN 'steady'
WHEN SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0) >= 0.1 THEN 'slow'
ELSE 'stale'
END AS velocity_tier,
NOW() AS calculated_at
FROM mv_daily_sales_estimates dse
JOIN dispensaries d ON d.id = dse.dispensary_id
WHERE dse.sale_date >= CURRENT_DATE - INTERVAL '30 days'
GROUP BY dse.product_id, dse.brand_name, dse.category, dse.dispensary_id, d.name, d.state;
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_sku_velocity_pk
ON mv_sku_velocity(dispensary_id, product_id);
CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_brand
ON mv_sku_velocity(brand_name);
CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_tier
ON mv_sku_velocity(velocity_tier);
CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_state
ON mv_sku_velocity(state_code);
CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_units
ON mv_sku_velocity(total_units_30d DESC);
-- ============================================================
-- VIEW 4: Store Performance Rankings
-- Revenue estimates and brand diversity per store
-- ============================================================
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_store_performance AS
SELECT
d.id AS dispensary_id,
d.name AS dispensary_name,
d.city,
d.state AS state_code,
-- Revenue metrics from sales estimates
COALESCE(sales.total_revenue_30d, 0) AS total_revenue_30d,
COALESCE(sales.total_units_30d, 0) AS total_units_30d,
-- Inventory metrics
COUNT(DISTINCT sp.id) AS total_skus,
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock) AS in_stock_skus,
-- Brand diversity
COUNT(DISTINCT sp.brand) AS unique_brands,
COUNT(DISTINCT sp.category) AS unique_categories,
-- Pricing
AVG(sp.price_rec) AS avg_price,
-- Activity
MAX(sp.updated_at) AS last_updated,
NOW() AS calculated_at
FROM dispensaries d
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
LEFT JOIN (
SELECT
dispensary_id,
SUM(revenue_estimate) AS total_revenue_30d,
SUM(units_sold) AS total_units_30d
FROM mv_daily_sales_estimates
WHERE sale_date >= CURRENT_DATE - INTERVAL '30 days'
GROUP BY dispensary_id
) sales ON sales.dispensary_id = d.id
WHERE d.crawl_enabled = TRUE
GROUP BY d.id, d.name, d.city, d.state, sales.total_revenue_30d, sales.total_units_30d;
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_store_perf_pk
ON mv_store_performance(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_mv_store_perf_state
ON mv_store_performance(state_code);
CREATE INDEX IF NOT EXISTS idx_mv_store_perf_revenue
ON mv_store_performance(total_revenue_30d DESC);
-- ============================================================
-- VIEW 5: Weekly Category Trends
-- Category performance over time
-- ============================================================
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_category_weekly_trends AS
SELECT
dse.category,
d.state AS state_code,
DATE_TRUNC('week', dse.sale_date)::DATE AS week_start,
COUNT(DISTINCT dse.product_id) AS sku_count,
COUNT(DISTINCT dse.dispensary_id) AS store_count,
SUM(dse.units_sold) AS total_units,
SUM(dse.revenue_estimate) AS total_revenue,
AVG(dse.avg_price) AS avg_price,
NOW() AS calculated_at
FROM mv_daily_sales_estimates dse
JOIN dispensaries d ON d.id = dse.dispensary_id
WHERE dse.category IS NOT NULL
AND dse.sale_date >= CURRENT_DATE - INTERVAL '90 days'
GROUP BY dse.category, d.state, DATE_TRUNC('week', dse.sale_date);
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_cat_weekly_pk
ON mv_category_weekly_trends(category, state_code, week_start);
CREATE INDEX IF NOT EXISTS idx_mv_cat_weekly_state
ON mv_category_weekly_trends(state_code, week_start);
CREATE INDEX IF NOT EXISTS idx_mv_cat_weekly_date
ON mv_category_weekly_trends(week_start DESC);
-- ============================================================
-- VIEW 6: Product Intelligence (Hoodie-style per-product metrics)
-- Includes stock diff, days since OOS, days until stockout
-- ============================================================
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_product_intelligence AS
WITH
-- Calculate stock diff over 120 days
stock_diff AS (
SELECT
dispensary_id,
product_id,
-- Get oldest and newest quantity in last 120 days
FIRST_VALUE(quantity_available) OVER (
PARTITION BY dispensary_id, product_id
ORDER BY captured_at ASC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS qty_120d_ago,
LAST_VALUE(quantity_available) OVER (
PARTITION BY dispensary_id, product_id
ORDER BY captured_at ASC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS qty_current
FROM inventory_snapshots
WHERE captured_at >= NOW() - INTERVAL '120 days'
),
stock_diff_calc AS (
SELECT DISTINCT
dispensary_id,
product_id,
qty_current - COALESCE(qty_120d_ago, qty_current) AS stock_diff_120
FROM stock_diff
),
-- Get days since last OOS event
last_oos AS (
SELECT
dispensary_id,
product_id,
MAX(detected_at) AS last_oos_date
FROM product_visibility_events
WHERE event_type = 'oos'
GROUP BY dispensary_id, product_id
),
-- Calculate avg daily units sold (from velocity view)
velocity AS (
SELECT
dispensary_id,
product_id,
avg_daily_units
FROM mv_sku_velocity
)
SELECT
sp.dispensary_id,
d.name AS dispensary_name,
d.state AS state_code,
d.city,
sp.provider_product_id AS sku,
sp.name_raw AS product_name,
sp.brand_name_raw AS brand,
sp.category_raw AS category,
sp.is_in_stock,
sp.stock_status,
sp.stock_quantity,
sp.price_rec AS price,
sp.first_seen_at AS first_seen,
sp.last_seen_at AS last_seen,
-- Calculated fields
COALESCE(sd.stock_diff_120, 0) AS stock_diff_120,
CASE
WHEN lo.last_oos_date IS NOT NULL
THEN EXTRACT(DAY FROM NOW() - lo.last_oos_date)::INT
ELSE NULL
END AS days_since_oos,
-- Days until stockout = current stock / daily burn rate
CASE
WHEN v.avg_daily_units > 0 AND sp.stock_quantity > 0
THEN ROUND(sp.stock_quantity::NUMERIC / v.avg_daily_units)::INT
ELSE NULL
END AS days_until_stock_out,
v.avg_daily_units,
NOW() AS calculated_at
FROM store_products sp
JOIN dispensaries d ON d.id = sp.dispensary_id
LEFT JOIN stock_diff_calc sd ON sd.dispensary_id = sp.dispensary_id
AND sd.product_id = sp.provider_product_id
LEFT JOIN last_oos lo ON lo.dispensary_id = sp.dispensary_id
AND lo.product_id = sp.provider_product_id
LEFT JOIN velocity v ON v.dispensary_id = sp.dispensary_id
AND v.product_id = sp.provider_product_id
WHERE d.crawl_enabled = TRUE;
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_prod_intel_pk
ON mv_product_intelligence(dispensary_id, sku);
CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_brand
ON mv_product_intelligence(brand);
CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_state
ON mv_product_intelligence(state_code);
CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_stock_out
ON mv_product_intelligence(days_until_stock_out ASC NULLS LAST);
CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_oos
ON mv_product_intelligence(days_since_oos DESC NULLS LAST);
-- ============================================================
-- REFRESH FUNCTION
-- ============================================================
CREATE OR REPLACE FUNCTION refresh_sales_analytics_views()
RETURNS TABLE(view_name TEXT, rows_affected BIGINT) AS $$
DECLARE
row_count BIGINT;
BEGIN
-- Must refresh in dependency order:
-- 1. daily_sales (base view)
-- 2. sku_velocity (depends on daily_sales)
-- 3. product_intelligence (depends on sku_velocity)
-- 4. others (independent)
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_daily_sales_estimates;
SELECT COUNT(*) INTO row_count FROM mv_daily_sales_estimates;
view_name := 'mv_daily_sales_estimates';
rows_affected := row_count;
RETURN NEXT;
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_brand_market_share;
SELECT COUNT(*) INTO row_count FROM mv_brand_market_share;
view_name := 'mv_brand_market_share';
rows_affected := row_count;
RETURN NEXT;
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_sku_velocity;
SELECT COUNT(*) INTO row_count FROM mv_sku_velocity;
view_name := 'mv_sku_velocity';
rows_affected := row_count;
RETURN NEXT;
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_store_performance;
SELECT COUNT(*) INTO row_count FROM mv_store_performance;
view_name := 'mv_store_performance';
rows_affected := row_count;
RETURN NEXT;
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_category_weekly_trends;
SELECT COUNT(*) INTO row_count FROM mv_category_weekly_trends;
view_name := 'mv_category_weekly_trends';
rows_affected := row_count;
RETURN NEXT;
-- Product intelligence depends on sku_velocity, so refresh last
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_product_intelligence;
SELECT COUNT(*) INTO row_count FROM mv_product_intelligence;
view_name := 'mv_product_intelligence';
rows_affected := row_count;
RETURN NEXT;
END;
$$ LANGUAGE plpgsql;
COMMENT ON FUNCTION refresh_sales_analytics_views IS
'Refresh all sales analytics materialized views. Call hourly via scheduler.';
-- ============================================================
-- INITIAL REFRESH (populate views)
-- ============================================================
-- Note: Initial refresh must be non-concurrent (no unique index yet populated)
-- Run these manually after migration:
-- REFRESH MATERIALIZED VIEW mv_daily_sales_estimates;
-- REFRESH MATERIALIZED VIEW mv_brand_market_share;
-- REFRESH MATERIALIZED VIEW mv_sku_velocity;
-- REFRESH MATERIALIZED VIEW mv_store_performance;
-- REFRESH MATERIALIZED VIEW mv_category_weekly_trends;