-- ============================================================================ -- Migration 070: Product Variants Tables -- ============================================================================ -- -- Purpose: Store variant-level pricing and inventory as first-class entities -- to enable time-series analytics, price comparisons, and sale tracking. -- -- Enables queries like: -- - Price history for a specific variant (1g Blue Dream over time) -- - Sale frequency analysis (how often is this on special?) -- - Cross-store price comparison (who has cheapest 1g flower?) -- - Current specials across all stores -- -- RULES: -- - STRICTLY ADDITIVE (no DROP, DELETE, TRUNCATE) -- - All new tables use IF NOT EXISTS -- - All indexes use IF NOT EXISTS -- -- ============================================================================ -- ============================================================================ -- SECTION 1: PRODUCT_VARIANTS TABLE (Current State) -- ============================================================================ -- One row per product+option combination. Tracks current pricing/inventory. CREATE TABLE IF NOT EXISTS product_variants ( id SERIAL PRIMARY KEY, store_product_id INTEGER NOT NULL REFERENCES store_products(id) ON DELETE CASCADE, dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE, -- Variant identity (from Dutchie POSMetaData.children) option VARCHAR(100) NOT NULL, -- "1g", "3.5g", "1/8oz", "100mg" canonical_sku VARCHAR(100), -- Dutchie canonicalSKU canonical_id VARCHAR(100), -- Dutchie canonicalID canonical_name VARCHAR(500), -- Dutchie canonicalName -- Current pricing (in dollars, not cents) price_rec NUMERIC(10,2), price_med NUMERIC(10,2), price_rec_special NUMERIC(10,2), price_med_special NUMERIC(10,2), -- Current inventory quantity INTEGER, quantity_available INTEGER, in_stock BOOLEAN DEFAULT TRUE, -- Special/sale status is_on_special BOOLEAN DEFAULT FALSE, -- Weight/size parsing (for analytics) weight_value NUMERIC(10,2), -- 1, 3.5, 28, etc. weight_unit VARCHAR(20), -- g, oz, mg, ml, etc. -- Timestamps first_seen_at TIMESTAMPTZ DEFAULT NOW(), last_seen_at TIMESTAMPTZ DEFAULT NOW(), last_price_change_at TIMESTAMPTZ, last_stock_change_at TIMESTAMPTZ, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW(), UNIQUE(store_product_id, option) ); -- Indexes for common queries CREATE INDEX IF NOT EXISTS idx_variants_store_product ON product_variants(store_product_id); CREATE INDEX IF NOT EXISTS idx_variants_dispensary ON product_variants(dispensary_id); CREATE INDEX IF NOT EXISTS idx_variants_option ON product_variants(option); CREATE INDEX IF NOT EXISTS idx_variants_in_stock ON product_variants(dispensary_id, in_stock) WHERE in_stock = TRUE; CREATE INDEX IF NOT EXISTS idx_variants_on_special ON product_variants(dispensary_id, is_on_special) WHERE is_on_special = TRUE; CREATE INDEX IF NOT EXISTS idx_variants_canonical_sku ON product_variants(canonical_sku) WHERE canonical_sku IS NOT NULL; CREATE INDEX IF NOT EXISTS idx_variants_price_rec ON product_variants(price_rec) WHERE price_rec IS NOT NULL; COMMENT ON TABLE product_variants IS 'Current state of each product variant (weight/size option). One row per product+option.'; COMMENT ON COLUMN product_variants.option IS 'Weight/size option string from Dutchie (e.g., "1g", "3.5g", "1/8oz")'; COMMENT ON COLUMN product_variants.canonical_sku IS 'Dutchie POS SKU for cross-store matching'; -- ============================================================================ -- SECTION 2: PRODUCT_VARIANT_SNAPSHOTS TABLE (Historical Data) -- ============================================================================ -- Time-series data for variant pricing. One row per variant per crawl. -- CRITICAL: NEVER DELETE from this table. CREATE TABLE IF NOT EXISTS product_variant_snapshots ( id SERIAL PRIMARY KEY, product_variant_id INTEGER NOT NULL REFERENCES product_variants(id) ON DELETE CASCADE, store_product_id INTEGER REFERENCES store_products(id) ON DELETE SET NULL, dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE, crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL, -- Variant identity (denormalized for query performance) option VARCHAR(100) NOT NULL, -- Pricing at time of capture price_rec NUMERIC(10,2), price_med NUMERIC(10,2), price_rec_special NUMERIC(10,2), price_med_special NUMERIC(10,2), -- Inventory at time of capture quantity INTEGER, in_stock BOOLEAN DEFAULT TRUE, -- Special status at time of capture is_on_special BOOLEAN DEFAULT FALSE, -- Feed presence (FALSE = variant missing from crawl) is_present_in_feed BOOLEAN DEFAULT TRUE, -- Capture timestamp captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), created_at TIMESTAMPTZ DEFAULT NOW() ); -- Indexes for time-series queries CREATE INDEX IF NOT EXISTS idx_variant_snapshots_variant ON product_variant_snapshots(product_variant_id, captured_at DESC); CREATE INDEX IF NOT EXISTS idx_variant_snapshots_dispensary ON product_variant_snapshots(dispensary_id, captured_at DESC); CREATE INDEX IF NOT EXISTS idx_variant_snapshots_crawl ON product_variant_snapshots(crawl_run_id) WHERE crawl_run_id IS NOT NULL; CREATE INDEX IF NOT EXISTS idx_variant_snapshots_captured ON product_variant_snapshots(captured_at DESC); CREATE INDEX IF NOT EXISTS idx_variant_snapshots_special ON product_variant_snapshots(is_on_special, captured_at DESC) WHERE is_on_special = TRUE; CREATE INDEX IF NOT EXISTS idx_variant_snapshots_option ON product_variant_snapshots(option, captured_at DESC); COMMENT ON TABLE product_variant_snapshots IS 'Historical variant pricing/inventory. One row per variant per crawl. NEVER DELETE.'; -- ============================================================================ -- SECTION 3: USEFUL VIEWS -- ============================================================================ -- View: Current specials across all stores CREATE OR REPLACE VIEW v_current_specials AS SELECT pv.id as variant_id, sp.id as product_id, sp.name_raw as product_name, sp.brand_name_raw as brand_name, sp.category_raw as category, d.id as dispensary_id, d.name as dispensary_name, d.city, d.state, pv.option, pv.price_rec, pv.price_rec_special, ROUND(((pv.price_rec - pv.price_rec_special) / NULLIF(pv.price_rec, 0)) * 100, 1) as discount_percent, pv.quantity, pv.in_stock, pv.last_seen_at FROM product_variants pv JOIN store_products sp ON sp.id = pv.store_product_id JOIN dispensaries d ON d.id = pv.dispensary_id WHERE pv.is_on_special = TRUE AND pv.in_stock = TRUE AND pv.price_rec_special IS NOT NULL AND pv.price_rec_special < pv.price_rec; COMMENT ON VIEW v_current_specials IS 'All products currently on special across all stores'; -- View: Price comparison for a product across stores CREATE OR REPLACE VIEW v_price_comparison AS SELECT sp.name_raw as product_name, sp.brand_name_raw as brand_name, sp.category_raw as category, pv.option, d.id as dispensary_id, d.name as dispensary_name, d.city, pv.price_rec, pv.price_rec_special, pv.is_on_special, pv.in_stock, pv.quantity, RANK() OVER (PARTITION BY sp.name_raw, pv.option ORDER BY COALESCE(pv.price_rec_special, pv.price_rec) ASC) as price_rank FROM product_variants pv JOIN store_products sp ON sp.id = pv.store_product_id JOIN dispensaries d ON d.id = pv.dispensary_id WHERE pv.in_stock = TRUE AND (pv.price_rec IS NOT NULL OR pv.price_rec_special IS NOT NULL); COMMENT ON VIEW v_price_comparison IS 'Compare prices for same product across stores, ranked by price'; -- View: Latest snapshot per variant CREATE OR REPLACE VIEW v_latest_variant_snapshots AS SELECT DISTINCT ON (product_variant_id) pvs.* FROM product_variant_snapshots pvs ORDER BY product_variant_id, captured_at DESC; -- ============================================================================ -- SECTION 4: HELPER FUNCTION FOR SALE FREQUENCY -- ============================================================================ -- Function to calculate sale frequency for a variant CREATE OR REPLACE FUNCTION get_variant_sale_stats(p_variant_id INTEGER, p_days INTEGER DEFAULT 30) RETURNS TABLE ( total_snapshots BIGINT, times_on_special BIGINT, special_frequency_pct NUMERIC, avg_discount_pct NUMERIC, min_price NUMERIC, max_price NUMERIC, avg_price NUMERIC ) AS $$ BEGIN RETURN QUERY SELECT COUNT(*)::BIGINT as total_snapshots, COUNT(*) FILTER (WHERE is_on_special)::BIGINT as times_on_special, ROUND((COUNT(*) FILTER (WHERE is_on_special)::NUMERIC / NULLIF(COUNT(*), 0)) * 100, 1) as special_frequency_pct, ROUND(AVG( CASE WHEN is_on_special AND price_rec_special IS NOT NULL AND price_rec IS NOT NULL THEN ((price_rec - price_rec_special) / NULLIF(price_rec, 0)) * 100 END ), 1) as avg_discount_pct, MIN(COALESCE(price_rec_special, price_rec)) as min_price, MAX(price_rec) as max_price, ROUND(AVG(COALESCE(price_rec_special, price_rec)), 2) as avg_price FROM product_variant_snapshots WHERE product_variant_id = p_variant_id AND captured_at >= NOW() - (p_days || ' days')::INTERVAL; END; $$ LANGUAGE plpgsql; COMMENT ON FUNCTION get_variant_sale_stats IS 'Get sale frequency and price stats for a variant over N days'; -- ============================================================================ -- DONE -- ============================================================================ SELECT 'Migration 070 completed. Product variants tables ready for time-series analytics.' AS status;