-- Migration 123: Extract unmapped fields from provider_data -- These fields exist in our crawl payloads but weren't being stored in columns -- ============================================================ -- ADD NEW COLUMNS TO store_products -- ============================================================ -- Cannabis classification (SATIVA, INDICA, HYBRID, CBD) ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cannabis_type VARCHAR(50); -- Canonical IDs from POS systems ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_strain_id VARCHAR(100); ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_vendor_id VARCHAR(100); ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_brand_id VARCHAR(100); ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_category_id VARCHAR(100); -- Lab results ALTER TABLE store_products ADD COLUMN IF NOT EXISTS lab_result_url TEXT; -- Flavors (extracted from JSONB to text array for easier querying) ALTER TABLE store_products ADD COLUMN IF NOT EXISTS flavors_list TEXT[]; -- ============================================================ -- BACKFILL FROM provider_data -- ============================================================ -- Backfill cannabis_type from classification UPDATE store_products SET cannabis_type = CASE WHEN provider_data->>'classification' IN ('HYBRID', 'H') THEN 'HYBRID' WHEN provider_data->>'classification' IN ('INDICA', 'I') THEN 'INDICA' WHEN provider_data->>'classification' IN ('SATIVA', 'S') THEN 'SATIVA' WHEN provider_data->>'classification' = 'I/S' THEN 'INDICA_DOMINANT' WHEN provider_data->>'classification' = 'S/I' THEN 'SATIVA_DOMINANT' WHEN provider_data->>'classification' = 'CBD' THEN 'CBD' ELSE provider_data->>'classification' END WHERE provider_data->>'classification' IS NOT NULL AND cannabis_type IS NULL; -- Also backfill from strain_type if cannabis_type still null UPDATE store_products SET cannabis_type = CASE WHEN strain_type ILIKE '%indica%hybrid%' OR strain_type ILIKE '%hybrid%indica%' THEN 'INDICA_DOMINANT' WHEN strain_type ILIKE '%sativa%hybrid%' OR strain_type ILIKE '%hybrid%sativa%' THEN 'SATIVA_DOMINANT' WHEN strain_type ILIKE '%indica%' THEN 'INDICA' WHEN strain_type ILIKE '%sativa%' THEN 'SATIVA' WHEN strain_type ILIKE '%hybrid%' THEN 'HYBRID' WHEN strain_type ILIKE '%cbd%' THEN 'CBD' ELSE NULL END WHERE strain_type IS NOT NULL AND cannabis_type IS NULL; -- Backfill canonical IDs from POSMetaData UPDATE store_products SET canonical_strain_id = provider_data->'POSMetaData'->>'canonicalStrainId', canonical_vendor_id = provider_data->'POSMetaData'->>'canonicalVendorId', canonical_brand_id = provider_data->'POSMetaData'->>'canonicalBrandId', canonical_category_id = provider_data->'POSMetaData'->>'canonicalCategoryId' WHERE provider_data->'POSMetaData' IS NOT NULL AND canonical_strain_id IS NULL; -- Backfill lab result URLs UPDATE store_products SET lab_result_url = provider_data->'POSMetaData'->>'canonicalLabResultUrl' WHERE provider_data->'POSMetaData'->>'canonicalLabResultUrl' IS NOT NULL AND lab_result_url IS NULL; -- ============================================================ -- INDEXES -- ============================================================ CREATE INDEX IF NOT EXISTS idx_store_products_cannabis_type ON store_products(cannabis_type); CREATE INDEX IF NOT EXISTS idx_store_products_vendor_id ON store_products(canonical_vendor_id); CREATE INDEX IF NOT EXISTS idx_store_products_strain_id ON store_products(canonical_strain_id); -- ============================================================ -- ADD MSO FLAG TO DISPENSARIES -- ============================================================ -- Multi-State Operator flag (calculated from chain presence in multiple states) ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS is_mso BOOLEAN DEFAULT FALSE; -- Update MSO flag based on chain presence in multiple states WITH mso_chains AS ( SELECT chain_id FROM dispensaries WHERE chain_id IS NOT NULL GROUP BY chain_id HAVING COUNT(DISTINCT state) > 1 ) UPDATE dispensaries d SET is_mso = TRUE WHERE d.chain_id IN (SELECT chain_id FROM mso_chains); -- Index for MSO queries CREATE INDEX IF NOT EXISTS idx_dispensaries_is_mso ON dispensaries(is_mso) WHERE is_mso = TRUE; -- ============================================================ -- PRODUCT DISTRIBUTION VIEW -- ============================================================ -- View: How many stores carry each product (by brand + canonical name) CREATE OR REPLACE VIEW v_product_distribution AS SELECT sp.brand_name_raw as brand, sp.c_name as product_canonical_name, COUNT(DISTINCT sp.dispensary_id) as store_count, COUNT(DISTINCT d.state) as state_count, ARRAY_AGG(DISTINCT d.state) as states, AVG(sp.price_rec) as avg_price, MIN(sp.price_rec) as min_price, MAX(sp.price_rec) as max_price FROM store_products sp JOIN dispensaries d ON d.id = sp.dispensary_id WHERE sp.c_name IS NOT NULL AND sp.brand_name_raw IS NOT NULL AND sp.is_in_stock = TRUE GROUP BY sp.brand_name_raw, sp.c_name HAVING COUNT(DISTINCT sp.dispensary_id) > 1 ORDER BY store_count DESC; -- ============================================================ -- MSO SUMMARY VIEW -- ============================================================ CREATE OR REPLACE VIEW v_mso_summary AS SELECT c.name as chain_name, COUNT(DISTINCT d.id) as store_count, COUNT(DISTINCT d.state) as state_count, ARRAY_AGG(DISTINCT d.state ORDER BY d.state) as states, SUM(d.product_count) as total_products, TRUE as is_mso FROM dispensaries d JOIN chains c ON c.id = d.chain_id WHERE d.chain_id IN ( SELECT chain_id FROM dispensaries WHERE chain_id IS NOT NULL GROUP BY chain_id HAVING COUNT(DISTINCT state) > 1 ) GROUP BY c.id, c.name ORDER BY state_count DESC, store_count DESC; -- ============================================================ -- COMMENTS -- ============================================================ COMMENT ON COLUMN store_products.cannabis_type IS 'Normalized cannabis classification: SATIVA, INDICA, HYBRID, INDICA_DOMINANT, SATIVA_DOMINANT, CBD'; COMMENT ON COLUMN store_products.canonical_strain_id IS 'POS system strain identifier for cross-store matching'; COMMENT ON COLUMN store_products.canonical_vendor_id IS 'POS system vendor/supplier identifier'; COMMENT ON COLUMN store_products.lab_result_url IS 'Link to Certificate of Analysis / lab test results'; COMMENT ON COLUMN dispensaries.is_mso IS 'Multi-State Operator: chain operates in 2+ states'; COMMENT ON VIEW v_product_distribution IS 'Shows how many stores carry each product for distribution analysis'; COMMENT ON VIEW v_mso_summary IS 'Summary of multi-state operator chains';