Files
cannaiq/backend/migrations/123_extract_provider_fields.sql
Kelly 9f3bc8a843
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
fix: Worker task concurrency limit and inventory tracking
- Fix claim_task to enforce max 5 tasks per worker (was unlimited)
- Add session_task_count check before ANY claiming path
- Add triggers to auto-decrement count on task complete/release
- Update MAX_CONCURRENT_TASKS default from 3 to 5
- Update frontend fallback to show 5 task slots

- Add Wasabi S3 storage for payload archival
- Add inventory snapshots service (delta-only tracking)
- Add sales analytics views and routes
- Add high-frequency manager UI components
- Reset hardcoded AZ 5-minute intervals (use UI to configure)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-17 01:34:38 -07:00

160 lines
6.6 KiB
SQL

-- Migration 123: Extract unmapped fields from provider_data
-- These fields exist in our crawl payloads but weren't being stored in columns
-- ============================================================
-- ADD NEW COLUMNS TO store_products
-- ============================================================
-- Cannabis classification (SATIVA, INDICA, HYBRID, CBD)
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cannabis_type VARCHAR(50);
-- Canonical IDs from POS systems
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_strain_id VARCHAR(100);
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_vendor_id VARCHAR(100);
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_brand_id VARCHAR(100);
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_category_id VARCHAR(100);
-- Lab results
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS lab_result_url TEXT;
-- Flavors (extracted from JSONB to text array for easier querying)
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS flavors_list TEXT[];
-- ============================================================
-- BACKFILL FROM provider_data
-- ============================================================
-- Backfill cannabis_type from classification
UPDATE store_products
SET cannabis_type = CASE
WHEN provider_data->>'classification' IN ('HYBRID', 'H') THEN 'HYBRID'
WHEN provider_data->>'classification' IN ('INDICA', 'I') THEN 'INDICA'
WHEN provider_data->>'classification' IN ('SATIVA', 'S') THEN 'SATIVA'
WHEN provider_data->>'classification' = 'I/S' THEN 'INDICA_DOMINANT'
WHEN provider_data->>'classification' = 'S/I' THEN 'SATIVA_DOMINANT'
WHEN provider_data->>'classification' = 'CBD' THEN 'CBD'
ELSE provider_data->>'classification'
END
WHERE provider_data->>'classification' IS NOT NULL
AND cannabis_type IS NULL;
-- Also backfill from strain_type if cannabis_type still null
UPDATE store_products
SET cannabis_type = CASE
WHEN strain_type ILIKE '%indica%hybrid%' OR strain_type ILIKE '%hybrid%indica%' THEN 'INDICA_DOMINANT'
WHEN strain_type ILIKE '%sativa%hybrid%' OR strain_type ILIKE '%hybrid%sativa%' THEN 'SATIVA_DOMINANT'
WHEN strain_type ILIKE '%indica%' THEN 'INDICA'
WHEN strain_type ILIKE '%sativa%' THEN 'SATIVA'
WHEN strain_type ILIKE '%hybrid%' THEN 'HYBRID'
WHEN strain_type ILIKE '%cbd%' THEN 'CBD'
ELSE NULL
END
WHERE strain_type IS NOT NULL
AND cannabis_type IS NULL;
-- Backfill canonical IDs from POSMetaData
UPDATE store_products
SET
canonical_strain_id = provider_data->'POSMetaData'->>'canonicalStrainId',
canonical_vendor_id = provider_data->'POSMetaData'->>'canonicalVendorId',
canonical_brand_id = provider_data->'POSMetaData'->>'canonicalBrandId',
canonical_category_id = provider_data->'POSMetaData'->>'canonicalCategoryId'
WHERE provider_data->'POSMetaData' IS NOT NULL
AND canonical_strain_id IS NULL;
-- Backfill lab result URLs
UPDATE store_products
SET lab_result_url = provider_data->'POSMetaData'->>'canonicalLabResultUrl'
WHERE provider_data->'POSMetaData'->>'canonicalLabResultUrl' IS NOT NULL
AND lab_result_url IS NULL;
-- ============================================================
-- INDEXES
-- ============================================================
CREATE INDEX IF NOT EXISTS idx_store_products_cannabis_type ON store_products(cannabis_type);
CREATE INDEX IF NOT EXISTS idx_store_products_vendor_id ON store_products(canonical_vendor_id);
CREATE INDEX IF NOT EXISTS idx_store_products_strain_id ON store_products(canonical_strain_id);
-- ============================================================
-- ADD MSO FLAG TO DISPENSARIES
-- ============================================================
-- Multi-State Operator flag (calculated from chain presence in multiple states)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS is_mso BOOLEAN DEFAULT FALSE;
-- Update MSO flag based on chain presence in multiple states
WITH mso_chains AS (
SELECT chain_id
FROM dispensaries
WHERE chain_id IS NOT NULL
GROUP BY chain_id
HAVING COUNT(DISTINCT state) > 1
)
UPDATE dispensaries d
SET is_mso = TRUE
WHERE d.chain_id IN (SELECT chain_id FROM mso_chains);
-- Index for MSO queries
CREATE INDEX IF NOT EXISTS idx_dispensaries_is_mso ON dispensaries(is_mso) WHERE is_mso = TRUE;
-- ============================================================
-- PRODUCT DISTRIBUTION VIEW
-- ============================================================
-- View: How many stores carry each product (by brand + canonical name)
CREATE OR REPLACE VIEW v_product_distribution AS
SELECT
sp.brand_name_raw as brand,
sp.c_name as product_canonical_name,
COUNT(DISTINCT sp.dispensary_id) as store_count,
COUNT(DISTINCT d.state) as state_count,
ARRAY_AGG(DISTINCT d.state) as states,
AVG(sp.price_rec) as avg_price,
MIN(sp.price_rec) as min_price,
MAX(sp.price_rec) as max_price
FROM store_products sp
JOIN dispensaries d ON d.id = sp.dispensary_id
WHERE sp.c_name IS NOT NULL
AND sp.brand_name_raw IS NOT NULL
AND sp.is_in_stock = TRUE
GROUP BY sp.brand_name_raw, sp.c_name
HAVING COUNT(DISTINCT sp.dispensary_id) > 1
ORDER BY store_count DESC;
-- ============================================================
-- MSO SUMMARY VIEW
-- ============================================================
CREATE OR REPLACE VIEW v_mso_summary AS
SELECT
c.name as chain_name,
COUNT(DISTINCT d.id) as store_count,
COUNT(DISTINCT d.state) as state_count,
ARRAY_AGG(DISTINCT d.state ORDER BY d.state) as states,
SUM(d.product_count) as total_products,
TRUE as is_mso
FROM dispensaries d
JOIN chains c ON c.id = d.chain_id
WHERE d.chain_id IN (
SELECT chain_id
FROM dispensaries
WHERE chain_id IS NOT NULL
GROUP BY chain_id
HAVING COUNT(DISTINCT state) > 1
)
GROUP BY c.id, c.name
ORDER BY state_count DESC, store_count DESC;
-- ============================================================
-- COMMENTS
-- ============================================================
COMMENT ON COLUMN store_products.cannabis_type IS 'Normalized cannabis classification: SATIVA, INDICA, HYBRID, INDICA_DOMINANT, SATIVA_DOMINANT, CBD';
COMMENT ON COLUMN store_products.canonical_strain_id IS 'POS system strain identifier for cross-store matching';
COMMENT ON COLUMN store_products.canonical_vendor_id IS 'POS system vendor/supplier identifier';
COMMENT ON COLUMN store_products.lab_result_url IS 'Link to Certificate of Analysis / lab test results';
COMMENT ON COLUMN dispensaries.is_mso IS 'Multi-State Operator: chain operates in 2+ states';
COMMENT ON VIEW v_product_distribution IS 'Shows how many stores carry each product for distribution analysis';
COMMENT ON VIEW v_mso_summary IS 'Summary of multi-state operator chains';