feat: Add v2 architecture with multi-state support and orchestrator services
Major additions: - Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare - Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator - Discovery system: dutchie discovery service, geo validation, city seeding scripts - Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages - Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram) - Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata Frontend pages added: - Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores - StateHeatmap, CrossStateCompare, SyncInfoPanel Components added: - StateSelector, OrchestratorTraceModal, WorkflowStepper 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
376
backend/migrations/041_cannaiq_canonical_schema.sql
Normal file
376
backend/migrations/041_cannaiq_canonical_schema.sql
Normal file
@@ -0,0 +1,376 @@
|
||||
-- Migration 041: CannaiQ Canonical Schema
|
||||
--
|
||||
-- This migration adds the canonical CannaiQ schema tables and columns.
|
||||
-- ALL CHANGES ARE ADDITIVE - NO DROPS, NO DELETES, NO TRUNCATES.
|
||||
--
|
||||
-- Run with: psql $CANNAIQ_DB_URL -f migrations/041_cannaiq_canonical_schema.sql
|
||||
--
|
||||
-- Tables created:
|
||||
-- - states (new)
|
||||
-- - chains (new)
|
||||
-- - brands (new)
|
||||
-- - store_products (new - normalized view of current menu)
|
||||
-- - store_product_snapshots (new - historical crawl data)
|
||||
-- - crawl_runs (new - replaces/supplements dispensary_crawl_jobs)
|
||||
--
|
||||
-- Tables modified:
|
||||
-- - dispensaries (add state_id, chain_id FKs)
|
||||
-- - dispensary_crawler_profiles (add status, allow_autopromote, validated_at)
|
||||
-- - crawl_orchestration_traces (add run_id FK)
|
||||
--
|
||||
|
||||
-- =====================================================
|
||||
-- 1) STATES TABLE
|
||||
-- =====================================================
|
||||
CREATE TABLE IF NOT EXISTS states (
|
||||
id SERIAL PRIMARY KEY,
|
||||
code VARCHAR(2) NOT NULL UNIQUE,
|
||||
name VARCHAR(100) NOT NULL,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Insert known states
|
||||
INSERT INTO states (code, name) VALUES
|
||||
('AZ', 'Arizona'),
|
||||
('CA', 'California'),
|
||||
('CO', 'Colorado'),
|
||||
('FL', 'Florida'),
|
||||
('IL', 'Illinois'),
|
||||
('MA', 'Massachusetts'),
|
||||
('MD', 'Maryland'),
|
||||
('MI', 'Michigan'),
|
||||
('MO', 'Missouri'),
|
||||
('NV', 'Nevada'),
|
||||
('NJ', 'New Jersey'),
|
||||
('NY', 'New York'),
|
||||
('OH', 'Ohio'),
|
||||
('OK', 'Oklahoma'),
|
||||
('OR', 'Oregon'),
|
||||
('PA', 'Pennsylvania'),
|
||||
('WA', 'Washington')
|
||||
ON CONFLICT (code) DO NOTHING;
|
||||
|
||||
COMMENT ON TABLE states IS 'US states where CannaiQ operates. Single source of truth for state codes.';
|
||||
|
||||
-- =====================================================
|
||||
-- 2) CHAINS TABLE (retail groups)
|
||||
-- =====================================================
|
||||
CREATE TABLE IF NOT EXISTS chains (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
slug VARCHAR(255) NOT NULL UNIQUE,
|
||||
website_url TEXT,
|
||||
logo_url TEXT,
|
||||
description TEXT,
|
||||
is_active BOOLEAN DEFAULT TRUE,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_chains_slug ON chains(slug);
|
||||
CREATE INDEX IF NOT EXISTS idx_chains_active ON chains(is_active) WHERE is_active = TRUE;
|
||||
|
||||
COMMENT ON TABLE chains IS 'Retail chains/groups that own multiple dispensary locations (e.g., Curaleaf, Trulieve).';
|
||||
|
||||
-- =====================================================
|
||||
-- 3) BRANDS TABLE (canonical brand catalog)
|
||||
-- =====================================================
|
||||
CREATE TABLE IF NOT EXISTS brands (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
slug VARCHAR(255) NOT NULL UNIQUE,
|
||||
external_id VARCHAR(100), -- Provider-specific brand ID
|
||||
website_url TEXT,
|
||||
instagram_handle VARCHAR(100),
|
||||
logo_url TEXT,
|
||||
description TEXT,
|
||||
is_portfolio_brand BOOLEAN DEFAULT FALSE, -- TRUE if brand we represent
|
||||
is_active BOOLEAN DEFAULT TRUE,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_brands_slug ON brands(slug);
|
||||
CREATE INDEX IF NOT EXISTS idx_brands_external_id ON brands(external_id) WHERE external_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_brands_portfolio ON brands(is_portfolio_brand) WHERE is_portfolio_brand = TRUE;
|
||||
|
||||
COMMENT ON TABLE brands IS 'Canonical brand catalog. Brands may appear across multiple dispensaries.';
|
||||
COMMENT ON COLUMN brands.is_portfolio_brand IS 'TRUE if this is a brand we represent/manage (vs third-party brand)';
|
||||
|
||||
-- =====================================================
|
||||
-- 4) ADD state_id AND chain_id TO dispensaries
|
||||
-- =====================================================
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS state_id INTEGER REFERENCES states(id);
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS chain_id INTEGER REFERENCES chains(id);
|
||||
|
||||
-- NOTE: state_id backfill is done by ETL script (042_legacy_import.ts), not this migration.
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_state_id ON dispensaries(state_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_chain_id ON dispensaries(chain_id) WHERE chain_id IS NOT NULL;
|
||||
|
||||
COMMENT ON COLUMN dispensaries.state_id IS 'FK to states table. Canonical state reference.';
|
||||
COMMENT ON COLUMN dispensaries.chain_id IS 'FK to chains table. NULL if independent dispensary.';
|
||||
|
||||
-- =====================================================
|
||||
-- 5) STORE_PRODUCTS TABLE (current menu state)
|
||||
-- =====================================================
|
||||
-- This is the normalized "what is currently on the menu" table.
|
||||
-- It supplements dutchie_products with a provider-agnostic structure.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS store_products (
|
||||
id SERIAL PRIMARY KEY,
|
||||
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
product_id INTEGER REFERENCES products(id) ON DELETE SET NULL, -- Link to canonical product
|
||||
brand_id INTEGER REFERENCES brands(id) ON DELETE SET NULL, -- Link to canonical brand
|
||||
|
||||
-- Provider-specific identifiers
|
||||
provider VARCHAR(50) NOT NULL DEFAULT 'dutchie', -- dutchie, treez, jane, etc.
|
||||
provider_product_id VARCHAR(100), -- Platform-specific product ID
|
||||
provider_brand_id VARCHAR(100), -- Platform-specific brand ID
|
||||
|
||||
-- Raw data from platform (not normalized)
|
||||
name_raw VARCHAR(500) NOT NULL,
|
||||
brand_name_raw VARCHAR(255),
|
||||
category_raw VARCHAR(100),
|
||||
subcategory_raw VARCHAR(100),
|
||||
|
||||
-- Pricing
|
||||
price_rec NUMERIC(10,2),
|
||||
price_med NUMERIC(10,2),
|
||||
price_rec_special NUMERIC(10,2),
|
||||
price_med_special NUMERIC(10,2),
|
||||
is_on_special BOOLEAN DEFAULT FALSE,
|
||||
special_name TEXT,
|
||||
discount_percent NUMERIC(5,2),
|
||||
|
||||
-- Inventory
|
||||
is_in_stock BOOLEAN DEFAULT TRUE,
|
||||
stock_quantity INTEGER,
|
||||
stock_status VARCHAR(50) DEFAULT 'in_stock',
|
||||
|
||||
-- Potency
|
||||
thc_percent NUMERIC(5,2),
|
||||
cbd_percent NUMERIC(5,2),
|
||||
|
||||
-- Images
|
||||
image_url TEXT,
|
||||
local_image_path TEXT,
|
||||
|
||||
-- Timestamps
|
||||
first_seen_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
last_seen_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
|
||||
UNIQUE(dispensary_id, provider, provider_product_id)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_dispensary ON store_products(dispensary_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_product ON store_products(product_id) WHERE product_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_brand ON store_products(brand_id) WHERE brand_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_provider ON store_products(provider);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_in_stock ON store_products(dispensary_id, is_in_stock);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_special ON store_products(dispensary_id, is_on_special) WHERE is_on_special = TRUE;
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_last_seen ON store_products(last_seen_at DESC);
|
||||
|
||||
COMMENT ON TABLE store_products IS 'Current state of products on each dispensary menu. Provider-agnostic.';
|
||||
COMMENT ON COLUMN store_products.product_id IS 'FK to canonical products table. NULL if not yet mapped.';
|
||||
COMMENT ON COLUMN store_products.brand_id IS 'FK to canonical brands table. NULL if not yet mapped.';
|
||||
|
||||
-- =====================================================
|
||||
-- 6) STORE_PRODUCT_SNAPSHOTS TABLE (historical data)
|
||||
-- =====================================================
|
||||
-- This is the critical time-series table for analytics.
|
||||
-- One row per product per crawl.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS store_product_snapshots (
|
||||
id SERIAL PRIMARY KEY,
|
||||
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
store_product_id INTEGER REFERENCES store_products(id) ON DELETE SET NULL,
|
||||
product_id INTEGER REFERENCES products(id) ON DELETE SET NULL,
|
||||
|
||||
-- Provider info
|
||||
provider VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
||||
provider_product_id VARCHAR(100),
|
||||
|
||||
-- Link to crawl run
|
||||
crawl_run_id INTEGER, -- FK added after crawl_runs table created
|
||||
|
||||
-- Capture timestamp
|
||||
captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
-- Raw data from platform
|
||||
name_raw VARCHAR(500),
|
||||
brand_name_raw VARCHAR(255),
|
||||
category_raw VARCHAR(100),
|
||||
subcategory_raw VARCHAR(100),
|
||||
|
||||
-- Pricing at time of capture
|
||||
price_rec NUMERIC(10,2),
|
||||
price_med NUMERIC(10,2),
|
||||
price_rec_special NUMERIC(10,2),
|
||||
price_med_special NUMERIC(10,2),
|
||||
is_on_special BOOLEAN DEFAULT FALSE,
|
||||
discount_percent NUMERIC(5,2),
|
||||
|
||||
-- Inventory at time of capture
|
||||
is_in_stock BOOLEAN DEFAULT TRUE,
|
||||
stock_quantity INTEGER,
|
||||
stock_status VARCHAR(50) DEFAULT 'in_stock',
|
||||
|
||||
-- Potency at time of capture
|
||||
thc_percent NUMERIC(5,2),
|
||||
cbd_percent NUMERIC(5,2),
|
||||
|
||||
-- Image URL at time of capture
|
||||
image_url TEXT,
|
||||
|
||||
-- Full raw response for debugging
|
||||
raw_data JSONB,
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_dispensary_captured ON store_product_snapshots(dispensary_id, captured_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_product_captured ON store_product_snapshots(product_id, captured_at DESC) WHERE product_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_store_product ON store_product_snapshots(store_product_id) WHERE store_product_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_crawl_run ON store_product_snapshots(crawl_run_id) WHERE crawl_run_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_captured_at ON store_product_snapshots(captured_at DESC);
|
||||
|
||||
COMMENT ON TABLE store_product_snapshots IS 'Historical crawl data. One row per product per crawl. NEVER DELETE.';
|
||||
COMMENT ON COLUMN store_product_snapshots.captured_at IS 'When this snapshot was captured (crawl time).';
|
||||
|
||||
-- =====================================================
|
||||
-- 7) CRAWL_RUNS TABLE (job execution records)
|
||||
-- =====================================================
|
||||
CREATE TABLE IF NOT EXISTS crawl_runs (
|
||||
id SERIAL PRIMARY KEY,
|
||||
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
|
||||
-- Provider
|
||||
provider VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
||||
|
||||
-- Execution times
|
||||
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
finished_at TIMESTAMPTZ,
|
||||
duration_ms INTEGER,
|
||||
|
||||
-- Status
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'running', -- running, success, failed, partial
|
||||
error_message TEXT,
|
||||
|
||||
-- Results
|
||||
products_found INTEGER DEFAULT 0,
|
||||
products_new INTEGER DEFAULT 0,
|
||||
products_updated INTEGER DEFAULT 0,
|
||||
snapshots_written INTEGER DEFAULT 0,
|
||||
|
||||
-- Metadata
|
||||
worker_id VARCHAR(100),
|
||||
trigger_type VARCHAR(50) DEFAULT 'scheduled', -- scheduled, manual, api
|
||||
metadata JSONB DEFAULT '{}',
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_runs_dispensary ON crawl_runs(dispensary_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_runs_status ON crawl_runs(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_runs_started ON crawl_runs(started_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_runs_dispensary_started ON crawl_runs(dispensary_id, started_at DESC);
|
||||
|
||||
COMMENT ON TABLE crawl_runs IS 'Each crawl execution. Links to snapshots and traces.';
|
||||
|
||||
-- Add FK from store_product_snapshots to crawl_runs
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.table_constraints
|
||||
WHERE constraint_name = 'store_product_snapshots_crawl_run_id_fkey'
|
||||
) THEN
|
||||
ALTER TABLE store_product_snapshots
|
||||
ADD CONSTRAINT store_product_snapshots_crawl_run_id_fkey
|
||||
FOREIGN KEY (crawl_run_id) REFERENCES crawl_runs(id) ON DELETE SET NULL;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- =====================================================
|
||||
-- 8) UPDATE crawl_orchestration_traces
|
||||
-- =====================================================
|
||||
-- Add run_id FK if not exists
|
||||
ALTER TABLE crawl_orchestration_traces
|
||||
ADD COLUMN IF NOT EXISTS crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_traces_crawl_run
|
||||
ON crawl_orchestration_traces(crawl_run_id)
|
||||
WHERE crawl_run_id IS NOT NULL;
|
||||
|
||||
-- =====================================================
|
||||
-- 9) UPDATE dispensary_crawler_profiles
|
||||
-- =====================================================
|
||||
-- Add missing columns from canonical schema
|
||||
ALTER TABLE dispensary_crawler_profiles
|
||||
ADD COLUMN IF NOT EXISTS status VARCHAR(50) DEFAULT 'sandbox';
|
||||
|
||||
ALTER TABLE dispensary_crawler_profiles
|
||||
ADD COLUMN IF NOT EXISTS allow_autopromote BOOLEAN DEFAULT FALSE;
|
||||
|
||||
ALTER TABLE dispensary_crawler_profiles
|
||||
ADD COLUMN IF NOT EXISTS validated_at TIMESTAMPTZ;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_profiles_status
|
||||
ON dispensary_crawler_profiles(status);
|
||||
|
||||
COMMENT ON COLUMN dispensary_crawler_profiles.status IS 'Profile status: sandbox, production, needs_manual, disabled';
|
||||
COMMENT ON COLUMN dispensary_crawler_profiles.allow_autopromote IS 'Whether this profile can be auto-promoted from sandbox to production';
|
||||
COMMENT ON COLUMN dispensary_crawler_profiles.validated_at IS 'When this profile was last validated as working';
|
||||
|
||||
-- =====================================================
|
||||
-- 10) VIEWS FOR BACKWARD COMPATIBILITY
|
||||
-- =====================================================
|
||||
|
||||
-- View to get latest snapshot per store product
|
||||
CREATE OR REPLACE VIEW v_latest_store_snapshots AS
|
||||
SELECT DISTINCT ON (dispensary_id, provider_product_id)
|
||||
sps.*
|
||||
FROM store_product_snapshots sps
|
||||
ORDER BY dispensary_id, provider_product_id, captured_at DESC;
|
||||
|
||||
-- View to get crawl run summary per dispensary
|
||||
CREATE OR REPLACE VIEW v_dispensary_crawl_summary AS
|
||||
SELECT
|
||||
d.id AS dispensary_id,
|
||||
d.name AS dispensary_name,
|
||||
d.city,
|
||||
d.state,
|
||||
COUNT(DISTINCT sp.id) AS current_product_count,
|
||||
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock) AS in_stock_count,
|
||||
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_on_special) AS on_special_count,
|
||||
MAX(cr.finished_at) AS last_crawl_at,
|
||||
(SELECT status FROM crawl_runs WHERE dispensary_id = d.id ORDER BY started_at DESC LIMIT 1) AS last_crawl_status
|
||||
FROM dispensaries d
|
||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
|
||||
LEFT JOIN crawl_runs cr ON cr.dispensary_id = d.id
|
||||
GROUP BY d.id, d.name, d.city, d.state;
|
||||
|
||||
-- =====================================================
|
||||
-- 11) COMMENTS
|
||||
-- =====================================================
|
||||
COMMENT ON TABLE states IS 'Canonical list of US states. Use state_id FK in dispensaries.';
|
||||
COMMENT ON TABLE chains IS 'Retail chains (multi-location operators).';
|
||||
COMMENT ON TABLE brands IS 'Canonical brand catalog across all providers.';
|
||||
COMMENT ON TABLE store_products IS 'Current menu state per dispensary. Provider-agnostic.';
|
||||
COMMENT ON TABLE store_product_snapshots IS 'Historical price/stock data. One row per product per crawl.';
|
||||
COMMENT ON TABLE crawl_runs IS 'Crawl execution records. Links snapshots to runs.';
|
||||
|
||||
-- =====================================================
|
||||
-- MIGRATION COMPLETE
|
||||
-- =====================================================
|
||||
--
|
||||
-- Next steps (manual - not in this migration):
|
||||
-- 1. Populate chains table from known retail groups
|
||||
-- 2. Populate brands table from existing dutchie_products.brand_name
|
||||
-- 3. Migrate data from dutchie_products → store_products
|
||||
-- 4. Migrate data from dutchie_product_snapshots → store_product_snapshots
|
||||
-- 5. Link dispensaries.chain_id to chains where applicable
|
||||
--
|
||||
Reference in New Issue
Block a user