Major additions: - Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare - Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator - Discovery system: dutchie discovery service, geo validation, city seeding scripts - Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages - Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram) - Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata Frontend pages added: - Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores - StateHeatmap, CrossStateCompare, SyncInfoPanel Components added: - StateSelector, OrchestratorTraceModal, WorkflowStepper 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
643 lines
22 KiB
SQL
643 lines
22 KiB
SQL
-- ============================================================================
|
|
-- Migration 051: CannaiQ Canonical Schema - Safe Bootstrap
|
|
-- ============================================================================
|
|
--
|
|
-- Purpose: Create the canonical CannaiQ schema tables from scratch.
|
|
-- This migration is FULLY IDEMPOTENT and safe to run multiple times.
|
|
--
|
|
-- SAFETY RULES FOLLOWED:
|
|
-- 1. ALL tables use CREATE TABLE IF NOT EXISTS
|
|
-- 2. ALL columns use ALTER TABLE ADD COLUMN IF NOT EXISTS
|
|
-- 3. ALL indexes use CREATE INDEX IF NOT EXISTS
|
|
-- 4. NO DROP, DELETE, TRUNCATE, or destructive operations
|
|
-- 5. NO assumptions about existing data or column existence
|
|
-- 6. NO dependencies on migrations 041, 043, or 050
|
|
-- 7. Compatible with dutchie_menus database as it exists today
|
|
-- 8. Safe handling of pre-existing states table with missing columns
|
|
--
|
|
-- Tables Created:
|
|
-- - states (US state reference table)
|
|
-- - chains (retail chain/group table)
|
|
-- - crawl_runs (crawl execution records)
|
|
-- - store_products (current menu state)
|
|
-- - store_product_snapshots (historical price/stock data)
|
|
--
|
|
-- Columns Added:
|
|
-- - dispensaries.state_id (FK to states)
|
|
-- - dispensaries.chain_id (FK to chains)
|
|
--
|
|
-- Run with:
|
|
-- psql "postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \
|
|
-- -f migrations/051_cannaiq_canonical_safe_bootstrap.sql
|
|
--
|
|
-- ============================================================================
|
|
|
|
|
|
-- ============================================================================
|
|
-- SECTION 1: STATES TABLE
|
|
-- ============================================================================
|
|
-- Reference table for US states where CannaiQ operates.
|
|
-- This section handles the case where the table exists but is missing columns.
|
|
|
|
-- First, create the table if it doesn't exist (minimal definition)
|
|
CREATE TABLE IF NOT EXISTS states (
|
|
id SERIAL PRIMARY KEY,
|
|
code VARCHAR(2) NOT NULL,
|
|
name VARCHAR(100) NOT NULL,
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- Now safely add any missing columns (each is independent, won't fail if exists)
|
|
ALTER TABLE states ADD COLUMN IF NOT EXISTS timezone TEXT;
|
|
ALTER TABLE states ADD COLUMN IF NOT EXISTS is_active BOOLEAN DEFAULT TRUE;
|
|
ALTER TABLE states ADD COLUMN IF NOT EXISTS crawl_enabled BOOLEAN DEFAULT TRUE;
|
|
|
|
-- Add unique constraint on code if not exists
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_constraint
|
|
WHERE conname = 'states_code_key' AND conrelid = 'states'::regclass
|
|
) THEN
|
|
-- Check if there's already a unique constraint with a different name
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_indexes
|
|
WHERE tablename = 'states' AND indexdef LIKE '%UNIQUE%code%'
|
|
) THEN
|
|
ALTER TABLE states ADD CONSTRAINT states_code_key UNIQUE (code);
|
|
END IF;
|
|
END IF;
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN
|
|
NULL; -- Constraint already exists
|
|
WHEN OTHERS THEN
|
|
NULL; -- Handle any other errors gracefully
|
|
END $$;
|
|
|
|
-- Set default timezone values for existing rows that have NULL
|
|
UPDATE states SET timezone = 'America/Phoenix' WHERE timezone IS NULL AND code = 'AZ';
|
|
UPDATE states SET timezone = 'America/Los_Angeles' WHERE timezone IS NULL AND code IN ('CA', 'NV', 'OR', 'WA');
|
|
UPDATE states SET timezone = 'America/Denver' WHERE timezone IS NULL AND code = 'CO';
|
|
UPDATE states SET timezone = 'America/New_York' WHERE timezone IS NULL AND code IN ('FL', 'MA', 'MD', 'NJ', 'NY', 'OH', 'PA');
|
|
UPDATE states SET timezone = 'America/Chicago' WHERE timezone IS NULL AND code IN ('IL', 'MO', 'OK');
|
|
UPDATE states SET timezone = 'America/Detroit' WHERE timezone IS NULL AND code = 'MI';
|
|
|
|
-- Set default is_active for existing rows
|
|
UPDATE states SET is_active = TRUE WHERE is_active IS NULL;
|
|
UPDATE states SET crawl_enabled = TRUE WHERE crawl_enabled IS NULL;
|
|
|
|
-- Insert known states (idempotent - ON CONFLICT DO UPDATE to fill missing values)
|
|
INSERT INTO states (code, name, timezone, is_active, crawl_enabled) VALUES
|
|
('AZ', 'Arizona', 'America/Phoenix', TRUE, TRUE),
|
|
('CA', 'California', 'America/Los_Angeles', TRUE, TRUE),
|
|
('CO', 'Colorado', 'America/Denver', TRUE, TRUE),
|
|
('FL', 'Florida', 'America/New_York', TRUE, TRUE),
|
|
('IL', 'Illinois', 'America/Chicago', TRUE, TRUE),
|
|
('MA', 'Massachusetts', 'America/New_York', TRUE, TRUE),
|
|
('MD', 'Maryland', 'America/New_York', TRUE, TRUE),
|
|
('MI', 'Michigan', 'America/Detroit', TRUE, TRUE),
|
|
('MO', 'Missouri', 'America/Chicago', TRUE, TRUE),
|
|
('NV', 'Nevada', 'America/Los_Angeles', TRUE, TRUE),
|
|
('NJ', 'New Jersey', 'America/New_York', TRUE, TRUE),
|
|
('NY', 'New York', 'America/New_York', TRUE, TRUE),
|
|
('OH', 'Ohio', 'America/New_York', TRUE, TRUE),
|
|
('OK', 'Oklahoma', 'America/Chicago', TRUE, TRUE),
|
|
('OR', 'Oregon', 'America/Los_Angeles', TRUE, TRUE),
|
|
('PA', 'Pennsylvania', 'America/New_York', TRUE, TRUE),
|
|
('WA', 'Washington', 'America/Los_Angeles', TRUE, TRUE)
|
|
ON CONFLICT (code) DO UPDATE SET
|
|
timezone = COALESCE(states.timezone, EXCLUDED.timezone),
|
|
is_active = COALESCE(states.is_active, EXCLUDED.is_active),
|
|
crawl_enabled = COALESCE(states.crawl_enabled, EXCLUDED.crawl_enabled),
|
|
updated_at = NOW();
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_states_code ON states(code);
|
|
CREATE INDEX IF NOT EXISTS idx_states_active ON states(is_active) WHERE is_active = TRUE;
|
|
|
|
COMMENT ON TABLE states IS 'US states where CannaiQ operates. Single source of truth for state configuration.';
|
|
|
|
|
|
-- ============================================================================
|
|
-- SECTION 2: CHAINS TABLE
|
|
-- ============================================================================
|
|
-- Retail chains/groups that own multiple dispensary locations.
|
|
-- Examples: Curaleaf, Trulieve, Harvest, Columbia Care
|
|
|
|
CREATE TABLE IF NOT EXISTS chains (
|
|
id SERIAL PRIMARY KEY,
|
|
name VARCHAR(255) NOT NULL,
|
|
slug VARCHAR(255) NOT NULL,
|
|
website_url TEXT,
|
|
logo_url TEXT,
|
|
description TEXT,
|
|
headquarters_city VARCHAR(100),
|
|
headquarters_state_id INTEGER,
|
|
founded_year INTEGER,
|
|
is_active BOOLEAN DEFAULT TRUE,
|
|
is_public BOOLEAN DEFAULT FALSE,
|
|
stock_ticker VARCHAR(10),
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- Add unique constraint on slug if not exists
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_constraint
|
|
WHERE conname = 'chains_slug_key' AND conrelid = 'chains'::regclass
|
|
) THEN
|
|
ALTER TABLE chains ADD CONSTRAINT chains_slug_key UNIQUE (slug);
|
|
END IF;
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN
|
|
NULL;
|
|
WHEN OTHERS THEN
|
|
NULL;
|
|
END $$;
|
|
|
|
-- Add FK to states if not exists
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_constraint
|
|
WHERE conname = 'chains_headquarters_state_id_fkey'
|
|
) THEN
|
|
ALTER TABLE chains
|
|
ADD CONSTRAINT chains_headquarters_state_id_fkey
|
|
FOREIGN KEY (headquarters_state_id) REFERENCES states(id) ON DELETE SET NULL;
|
|
END IF;
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN
|
|
NULL;
|
|
WHEN OTHERS THEN
|
|
NULL;
|
|
END $$;
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_chains_slug ON chains(slug);
|
|
CREATE INDEX IF NOT EXISTS idx_chains_active ON chains(is_active) WHERE is_active = TRUE;
|
|
|
|
COMMENT ON TABLE chains IS 'Retail chains/groups that own multiple dispensary locations.';
|
|
|
|
|
|
-- ============================================================================
|
|
-- SECTION 3: ADD state_id AND chain_id TO DISPENSARIES
|
|
-- ============================================================================
|
|
-- Link existing dispensaries table to states and chains.
|
|
|
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS state_id INTEGER;
|
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS chain_id INTEGER;
|
|
|
|
-- Add FK constraints if not exist
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_constraint
|
|
WHERE conname = 'dispensaries_state_id_fkey'
|
|
) THEN
|
|
ALTER TABLE dispensaries
|
|
ADD CONSTRAINT dispensaries_state_id_fkey
|
|
FOREIGN KEY (state_id) REFERENCES states(id) ON DELETE SET NULL;
|
|
END IF;
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN
|
|
NULL;
|
|
WHEN OTHERS THEN
|
|
NULL;
|
|
END $$;
|
|
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_constraint
|
|
WHERE conname = 'dispensaries_chain_id_fkey'
|
|
) THEN
|
|
ALTER TABLE dispensaries
|
|
ADD CONSTRAINT dispensaries_chain_id_fkey
|
|
FOREIGN KEY (chain_id) REFERENCES chains(id) ON DELETE SET NULL;
|
|
END IF;
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN
|
|
NULL;
|
|
WHEN OTHERS THEN
|
|
NULL;
|
|
END $$;
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_state_id ON dispensaries(state_id);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_chain_id ON dispensaries(chain_id) WHERE chain_id IS NOT NULL;
|
|
|
|
-- Backfill state_id from existing state column (safe - only updates NULL values)
|
|
UPDATE dispensaries d
|
|
SET state_id = s.id
|
|
FROM states s
|
|
WHERE d.state = s.code
|
|
AND d.state_id IS NULL;
|
|
|
|
COMMENT ON COLUMN dispensaries.state_id IS 'FK to states table. Canonical state reference.';
|
|
COMMENT ON COLUMN dispensaries.chain_id IS 'FK to chains table. NULL if independent dispensary.';
|
|
|
|
|
|
-- ============================================================================
|
|
-- SECTION 4: CRAWL_RUNS TABLE
|
|
-- ============================================================================
|
|
-- One record per crawl execution. Links to snapshots.
|
|
|
|
CREATE TABLE IF NOT EXISTS crawl_runs (
|
|
id SERIAL PRIMARY KEY,
|
|
dispensary_id INTEGER NOT NULL,
|
|
state_id INTEGER,
|
|
|
|
-- Provider info
|
|
provider VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
|
|
|
-- Timing
|
|
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
finished_at TIMESTAMPTZ,
|
|
duration_ms INTEGER,
|
|
|
|
-- Status
|
|
status VARCHAR(20) NOT NULL DEFAULT 'running',
|
|
error_code VARCHAR(50),
|
|
error_message TEXT,
|
|
http_status INTEGER,
|
|
|
|
-- Results
|
|
products_found INTEGER DEFAULT 0,
|
|
products_new INTEGER DEFAULT 0,
|
|
products_updated INTEGER DEFAULT 0,
|
|
products_missing INTEGER DEFAULT 0,
|
|
snapshots_written INTEGER DEFAULT 0,
|
|
|
|
-- Infrastructure
|
|
worker_id VARCHAR(100),
|
|
worker_hostname VARCHAR(100),
|
|
proxy_used TEXT,
|
|
trigger_type VARCHAR(50) DEFAULT 'scheduled',
|
|
|
|
-- Metadata
|
|
metadata JSONB DEFAULT '{}',
|
|
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- Add FK constraints if not exist
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_constraint
|
|
WHERE conname = 'crawl_runs_dispensary_id_fkey'
|
|
) THEN
|
|
ALTER TABLE crawl_runs
|
|
ADD CONSTRAINT crawl_runs_dispensary_id_fkey
|
|
FOREIGN KEY (dispensary_id) REFERENCES dispensaries(id) ON DELETE CASCADE;
|
|
END IF;
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN
|
|
NULL;
|
|
WHEN OTHERS THEN
|
|
NULL;
|
|
END $$;
|
|
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_constraint
|
|
WHERE conname = 'crawl_runs_state_id_fkey'
|
|
) THEN
|
|
ALTER TABLE crawl_runs
|
|
ADD CONSTRAINT crawl_runs_state_id_fkey
|
|
FOREIGN KEY (state_id) REFERENCES states(id) ON DELETE SET NULL;
|
|
END IF;
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN
|
|
NULL;
|
|
WHEN OTHERS THEN
|
|
NULL;
|
|
END $$;
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_crawl_runs_dispensary ON crawl_runs(dispensary_id);
|
|
CREATE INDEX IF NOT EXISTS idx_crawl_runs_state ON crawl_runs(state_id) WHERE state_id IS NOT NULL;
|
|
CREATE INDEX IF NOT EXISTS idx_crawl_runs_status ON crawl_runs(status);
|
|
CREATE INDEX IF NOT EXISTS idx_crawl_runs_started ON crawl_runs(started_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_crawl_runs_dispensary_started ON crawl_runs(dispensary_id, started_at DESC);
|
|
|
|
COMMENT ON TABLE crawl_runs IS 'Each crawl execution. Links to snapshots and traces.';
|
|
|
|
|
|
-- ============================================================================
|
|
-- SECTION 5: STORE_PRODUCTS TABLE
|
|
-- ============================================================================
|
|
-- Current state of products on each dispensary menu.
|
|
-- Provider-agnostic structure for analytics.
|
|
|
|
CREATE TABLE IF NOT EXISTS store_products (
|
|
id SERIAL PRIMARY KEY,
|
|
dispensary_id INTEGER NOT NULL,
|
|
state_id INTEGER,
|
|
|
|
-- Provider-specific identifiers
|
|
provider VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
|
provider_product_id VARCHAR(100) NOT NULL,
|
|
provider_brand_id VARCHAR(100),
|
|
enterprise_product_id VARCHAR(100),
|
|
|
|
-- Raw data from platform (not normalized)
|
|
name VARCHAR(500) NOT NULL,
|
|
brand_name VARCHAR(255),
|
|
category VARCHAR(100),
|
|
subcategory VARCHAR(100),
|
|
strain_type VARCHAR(50),
|
|
description TEXT,
|
|
|
|
-- Pricing (current)
|
|
price_rec NUMERIC(10,2),
|
|
price_med NUMERIC(10,2),
|
|
price_rec_special NUMERIC(10,2),
|
|
price_med_special NUMERIC(10,2),
|
|
is_on_special BOOLEAN DEFAULT FALSE,
|
|
special_name TEXT,
|
|
discount_percent NUMERIC(5,2),
|
|
price_unit VARCHAR(20) DEFAULT 'each',
|
|
|
|
-- Inventory
|
|
is_in_stock BOOLEAN DEFAULT TRUE,
|
|
stock_quantity INTEGER,
|
|
stock_status VARCHAR(50) DEFAULT 'in_stock',
|
|
|
|
-- Potency
|
|
thc_percent NUMERIC(5,2),
|
|
cbd_percent NUMERIC(5,2),
|
|
thc_mg NUMERIC(10,2),
|
|
cbd_mg NUMERIC(10,2),
|
|
|
|
-- Weight/Size
|
|
weight_value NUMERIC(10,2),
|
|
weight_unit VARCHAR(20),
|
|
|
|
-- Images
|
|
image_url TEXT,
|
|
local_image_path TEXT,
|
|
thumbnail_url TEXT,
|
|
|
|
-- Flags
|
|
is_featured BOOLEAN DEFAULT FALSE,
|
|
medical_only BOOLEAN DEFAULT FALSE,
|
|
rec_only BOOLEAN DEFAULT FALSE,
|
|
|
|
-- Menu position (for tracking prominence)
|
|
menu_position INTEGER,
|
|
|
|
-- Timestamps
|
|
first_seen_at TIMESTAMPTZ DEFAULT NOW(),
|
|
last_seen_at TIMESTAMPTZ DEFAULT NOW(),
|
|
last_price_change_at TIMESTAMPTZ,
|
|
last_stock_change_at TIMESTAMPTZ,
|
|
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- Add unique constraint if not exists
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_constraint
|
|
WHERE conname = 'store_products_dispensary_provider_product_key'
|
|
) THEN
|
|
ALTER TABLE store_products
|
|
ADD CONSTRAINT store_products_dispensary_provider_product_key
|
|
UNIQUE (dispensary_id, provider, provider_product_id);
|
|
END IF;
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN
|
|
NULL;
|
|
WHEN OTHERS THEN
|
|
NULL;
|
|
END $$;
|
|
|
|
-- Add FK constraints if not exist
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_constraint
|
|
WHERE conname = 'store_products_dispensary_id_fkey'
|
|
) THEN
|
|
ALTER TABLE store_products
|
|
ADD CONSTRAINT store_products_dispensary_id_fkey
|
|
FOREIGN KEY (dispensary_id) REFERENCES dispensaries(id) ON DELETE CASCADE;
|
|
END IF;
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN
|
|
NULL;
|
|
WHEN OTHERS THEN
|
|
NULL;
|
|
END $$;
|
|
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_constraint
|
|
WHERE conname = 'store_products_state_id_fkey'
|
|
) THEN
|
|
ALTER TABLE store_products
|
|
ADD CONSTRAINT store_products_state_id_fkey
|
|
FOREIGN KEY (state_id) REFERENCES states(id) ON DELETE SET NULL;
|
|
END IF;
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN
|
|
NULL;
|
|
WHEN OTHERS THEN
|
|
NULL;
|
|
END $$;
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_store_products_dispensary ON store_products(dispensary_id);
|
|
CREATE INDEX IF NOT EXISTS idx_store_products_state ON store_products(state_id) WHERE state_id IS NOT NULL;
|
|
CREATE INDEX IF NOT EXISTS idx_store_products_category ON store_products(category) WHERE category IS NOT NULL;
|
|
CREATE INDEX IF NOT EXISTS idx_store_products_brand_name ON store_products(brand_name) WHERE brand_name IS NOT NULL;
|
|
CREATE INDEX IF NOT EXISTS idx_store_products_in_stock ON store_products(dispensary_id, is_in_stock);
|
|
CREATE INDEX IF NOT EXISTS idx_store_products_special ON store_products(dispensary_id, is_on_special) WHERE is_on_special = TRUE;
|
|
CREATE INDEX IF NOT EXISTS idx_store_products_last_seen ON store_products(last_seen_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_store_products_provider ON store_products(provider);
|
|
CREATE INDEX IF NOT EXISTS idx_store_products_enterprise ON store_products(enterprise_product_id) WHERE enterprise_product_id IS NOT NULL;
|
|
|
|
COMMENT ON TABLE store_products IS 'Current state of products on each dispensary menu. Provider-agnostic.';
|
|
|
|
|
|
-- ============================================================================
|
|
-- SECTION 6: STORE_PRODUCT_SNAPSHOTS TABLE
|
|
-- ============================================================================
|
|
-- Historical price/stock data. One row per product per crawl.
|
|
-- CRITICAL: NEVER DELETE from this table.
|
|
|
|
CREATE TABLE IF NOT EXISTS store_product_snapshots (
|
|
id SERIAL PRIMARY KEY,
|
|
dispensary_id INTEGER NOT NULL,
|
|
store_product_id INTEGER,
|
|
state_id INTEGER,
|
|
|
|
-- Provider info
|
|
provider VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
|
provider_product_id VARCHAR(100),
|
|
|
|
-- Link to crawl run
|
|
crawl_run_id INTEGER,
|
|
|
|
-- Capture timestamp
|
|
captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
|
|
-- Raw data from platform
|
|
name VARCHAR(500),
|
|
brand_name VARCHAR(255),
|
|
category VARCHAR(100),
|
|
subcategory VARCHAR(100),
|
|
|
|
-- Pricing at time of capture
|
|
price_rec NUMERIC(10,2),
|
|
price_med NUMERIC(10,2),
|
|
price_rec_special NUMERIC(10,2),
|
|
price_med_special NUMERIC(10,2),
|
|
is_on_special BOOLEAN DEFAULT FALSE,
|
|
discount_percent NUMERIC(5,2),
|
|
|
|
-- Inventory at time of capture
|
|
is_in_stock BOOLEAN DEFAULT TRUE,
|
|
stock_quantity INTEGER,
|
|
stock_status VARCHAR(50) DEFAULT 'in_stock',
|
|
is_present_in_feed BOOLEAN DEFAULT TRUE,
|
|
|
|
-- Potency at time of capture
|
|
thc_percent NUMERIC(5,2),
|
|
cbd_percent NUMERIC(5,2),
|
|
|
|
-- Menu position (for tracking prominence changes)
|
|
menu_position INTEGER,
|
|
|
|
-- Image URL at time of capture
|
|
image_url TEXT,
|
|
|
|
-- Full raw response for debugging
|
|
raw_data JSONB,
|
|
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- Add FK constraints if not exist
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_constraint
|
|
WHERE conname = 'store_product_snapshots_dispensary_id_fkey'
|
|
) THEN
|
|
ALTER TABLE store_product_snapshots
|
|
ADD CONSTRAINT store_product_snapshots_dispensary_id_fkey
|
|
FOREIGN KEY (dispensary_id) REFERENCES dispensaries(id) ON DELETE CASCADE;
|
|
END IF;
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN
|
|
NULL;
|
|
WHEN OTHERS THEN
|
|
NULL;
|
|
END $$;
|
|
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_constraint
|
|
WHERE conname = 'store_product_snapshots_store_product_id_fkey'
|
|
) THEN
|
|
ALTER TABLE store_product_snapshots
|
|
ADD CONSTRAINT store_product_snapshots_store_product_id_fkey
|
|
FOREIGN KEY (store_product_id) REFERENCES store_products(id) ON DELETE SET NULL;
|
|
END IF;
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN
|
|
NULL;
|
|
WHEN OTHERS THEN
|
|
NULL;
|
|
END $$;
|
|
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_constraint
|
|
WHERE conname = 'store_product_snapshots_state_id_fkey'
|
|
) THEN
|
|
ALTER TABLE store_product_snapshots
|
|
ADD CONSTRAINT store_product_snapshots_state_id_fkey
|
|
FOREIGN KEY (state_id) REFERENCES states(id) ON DELETE SET NULL;
|
|
END IF;
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN
|
|
NULL;
|
|
WHEN OTHERS THEN
|
|
NULL;
|
|
END $$;
|
|
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_constraint
|
|
WHERE conname = 'store_product_snapshots_crawl_run_id_fkey'
|
|
) THEN
|
|
ALTER TABLE store_product_snapshots
|
|
ADD CONSTRAINT store_product_snapshots_crawl_run_id_fkey
|
|
FOREIGN KEY (crawl_run_id) REFERENCES crawl_runs(id) ON DELETE SET NULL;
|
|
END IF;
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN
|
|
NULL;
|
|
WHEN OTHERS THEN
|
|
NULL;
|
|
END $$;
|
|
|
|
-- Indexes optimized for analytics queries
|
|
CREATE INDEX IF NOT EXISTS idx_snapshots_dispensary_captured ON store_product_snapshots(dispensary_id, captured_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_snapshots_state_captured ON store_product_snapshots(state_id, captured_at DESC) WHERE state_id IS NOT NULL;
|
|
CREATE INDEX IF NOT EXISTS idx_snapshots_product_captured ON store_product_snapshots(store_product_id, captured_at DESC) WHERE store_product_id IS NOT NULL;
|
|
CREATE INDEX IF NOT EXISTS idx_snapshots_crawl_run ON store_product_snapshots(crawl_run_id) WHERE crawl_run_id IS NOT NULL;
|
|
CREATE INDEX IF NOT EXISTS idx_snapshots_captured_at ON store_product_snapshots(captured_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_snapshots_brand ON store_product_snapshots(brand_name) WHERE brand_name IS NOT NULL;
|
|
CREATE INDEX IF NOT EXISTS idx_snapshots_provider_product ON store_product_snapshots(provider_product_id) WHERE provider_product_id IS NOT NULL;
|
|
|
|
COMMENT ON TABLE store_product_snapshots IS 'Historical crawl data. One row per product per crawl. NEVER DELETE.';
|
|
|
|
|
|
-- ============================================================================
|
|
-- SECTION 7: VIEWS FOR BACKWARD COMPATIBILITY
|
|
-- ============================================================================
|
|
|
|
-- View: Latest snapshot per store product
|
|
CREATE OR REPLACE VIEW v_latest_store_snapshots AS
|
|
SELECT DISTINCT ON (dispensary_id, provider_product_id)
|
|
sps.*
|
|
FROM store_product_snapshots sps
|
|
ORDER BY dispensary_id, provider_product_id, captured_at DESC;
|
|
|
|
-- View: Crawl run summary per dispensary
|
|
CREATE OR REPLACE VIEW v_dispensary_crawl_summary AS
|
|
SELECT
|
|
d.id AS dispensary_id,
|
|
COALESCE(d.dba_name, d.name) AS dispensary_name,
|
|
d.city,
|
|
d.state,
|
|
d.state_id,
|
|
s.name AS state_name,
|
|
COUNT(DISTINCT sp.id) AS current_product_count,
|
|
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock) AS in_stock_count,
|
|
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_on_special) AS on_special_count,
|
|
MAX(cr.finished_at) AS last_crawl_at,
|
|
(SELECT status FROM crawl_runs WHERE dispensary_id = d.id ORDER BY started_at DESC LIMIT 1) AS last_crawl_status
|
|
FROM dispensaries d
|
|
LEFT JOIN states s ON s.id = d.state_id
|
|
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
|
|
LEFT JOIN crawl_runs cr ON cr.dispensary_id = d.id
|
|
GROUP BY d.id, d.dba_name, d.name, d.city, d.state, d.state_id, s.name;
|
|
|
|
|
|
-- ============================================================================
|
|
-- MIGRATION 051 COMPLETE
|
|
-- ============================================================================
|
|
|
|
SELECT 'Migration 051 completed successfully. Canonical schema is ready.' AS status;
|