feat: Add v2 architecture with multi-state support and orchestrator services
Major additions: - Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare - Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator - Discovery system: dutchie discovery service, geo validation, city seeding scripts - Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages - Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram) - Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata Frontend pages added: - Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores - StateHeatmap, CrossStateCompare, SyncInfoPanel Components added: - StateSelector, OrchestratorTraceModal, WorkflowStepper 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
90
backend/migrations/037_dispensary_crawler_profiles.sql
Normal file
90
backend/migrations/037_dispensary_crawler_profiles.sql
Normal file
@@ -0,0 +1,90 @@
|
||||
-- Migration 037: Add per-store crawler profiles for Dutchie dispensaries
|
||||
-- This enables per-store crawler configuration without changing shared logic
|
||||
-- Phase 1: Schema only - no automatic behavior changes
|
||||
|
||||
-- Create the crawler profiles table
|
||||
CREATE TABLE IF NOT EXISTS dispensary_crawler_profiles (
|
||||
id SERIAL PRIMARY KEY,
|
||||
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
|
||||
-- Human readable name for this profile
|
||||
profile_name VARCHAR(255) NOT NULL,
|
||||
|
||||
-- High-level type, e.g. 'dutchie', 'treez', 'jane'
|
||||
crawler_type VARCHAR(50) NOT NULL,
|
||||
|
||||
-- Optional key for mapping to a per-store crawler module later,
|
||||
-- e.g. 'curaleaf-dispensary-gilbert'
|
||||
profile_key VARCHAR(255),
|
||||
|
||||
-- Generic configuration bucket; will hold selectors, URLs, flags, etc.
|
||||
config JSONB NOT NULL DEFAULT '{}'::jsonb,
|
||||
|
||||
-- Execution hints (safe defaults; can be overridden in config if needed)
|
||||
timeout_ms INTEGER DEFAULT 30000,
|
||||
download_images BOOLEAN DEFAULT TRUE,
|
||||
track_stock BOOLEAN DEFAULT TRUE,
|
||||
|
||||
version INTEGER DEFAULT 1,
|
||||
enabled BOOLEAN DEFAULT TRUE,
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Unique index on dispensary_id + profile_name
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS dispensary_crawler_profiles_unique_name
|
||||
ON dispensary_crawler_profiles (dispensary_id, profile_name);
|
||||
|
||||
-- Index for finding enabled profiles by type
|
||||
CREATE INDEX IF NOT EXISTS idx_crawler_profiles_type_enabled
|
||||
ON dispensary_crawler_profiles (crawler_type, enabled);
|
||||
|
||||
-- Index for dispensary lookup
|
||||
CREATE INDEX IF NOT EXISTS idx_crawler_profiles_dispensary
|
||||
ON dispensary_crawler_profiles (dispensary_id);
|
||||
|
||||
-- Add FK from dispensaries to active profile
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'dispensaries'
|
||||
AND column_name = 'active_crawler_profile_id') THEN
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN active_crawler_profile_id INTEGER NULL
|
||||
REFERENCES dispensary_crawler_profiles(id) ON DELETE SET NULL;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- Create index on the FK for faster joins
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_active_profile
|
||||
ON dispensaries (active_crawler_profile_id)
|
||||
WHERE active_crawler_profile_id IS NOT NULL;
|
||||
|
||||
-- Create or replace trigger function for updated_at
|
||||
CREATE OR REPLACE FUNCTION set_updated_at_timestamp()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.updated_at = NOW();
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Add trigger to keep updated_at fresh (drop first if exists to avoid duplicates)
|
||||
DROP TRIGGER IF EXISTS dispensary_crawler_profiles_set_timestamp ON dispensary_crawler_profiles;
|
||||
CREATE TRIGGER dispensary_crawler_profiles_set_timestamp
|
||||
BEFORE UPDATE ON dispensary_crawler_profiles
|
||||
FOR EACH ROW EXECUTE PROCEDURE set_updated_at_timestamp();
|
||||
|
||||
-- Add comments for documentation
|
||||
COMMENT ON TABLE dispensary_crawler_profiles IS 'Per-store crawler configuration profiles. Each dispensary can have multiple profiles but only one active at a time.';
|
||||
COMMENT ON COLUMN dispensary_crawler_profiles.profile_name IS 'Human readable name for the profile, e.g. "Curaleaf Gilbert - Dutchie v1"';
|
||||
COMMENT ON COLUMN dispensary_crawler_profiles.crawler_type IS 'The crawler implementation type: dutchie, treez, jane, sandbox, custom';
|
||||
COMMENT ON COLUMN dispensary_crawler_profiles.profile_key IS 'Optional identifier for per-store crawler module mapping';
|
||||
COMMENT ON COLUMN dispensary_crawler_profiles.config IS 'JSONB configuration for the crawler. Schema depends on crawler_type.';
|
||||
COMMENT ON COLUMN dispensary_crawler_profiles.timeout_ms IS 'Request timeout in milliseconds (default 30000)';
|
||||
COMMENT ON COLUMN dispensary_crawler_profiles.download_images IS 'Whether to download product images locally';
|
||||
COMMENT ON COLUMN dispensary_crawler_profiles.track_stock IS 'Whether to track inventory/stock levels';
|
||||
COMMENT ON COLUMN dispensary_crawler_profiles.version IS 'Profile version number for A/B testing or upgrades';
|
||||
COMMENT ON COLUMN dispensary_crawler_profiles.enabled IS 'Whether this profile can be used (soft delete)';
|
||||
COMMENT ON COLUMN dispensaries.active_crawler_profile_id IS 'FK to the currently active crawler profile for this dispensary';
|
||||
84
backend/migrations/038_profile_status_field.sql
Normal file
84
backend/migrations/038_profile_status_field.sql
Normal file
@@ -0,0 +1,84 @@
|
||||
-- Migration: Add status field to dispensary_crawler_profiles
|
||||
-- This adds a proper status column for crawler state machine
|
||||
-- Status values: 'production', 'sandbox', 'needs_manual', 'disabled'
|
||||
|
||||
-- Add status column with default 'production' for existing profiles
|
||||
ALTER TABLE dispensary_crawler_profiles
|
||||
ADD COLUMN IF NOT EXISTS status VARCHAR(50) DEFAULT 'production';
|
||||
|
||||
-- Add next_retry_at column for sandbox retry scheduling
|
||||
ALTER TABLE dispensary_crawler_profiles
|
||||
ADD COLUMN IF NOT EXISTS next_retry_at TIMESTAMPTZ;
|
||||
|
||||
-- Add sandbox_attempt_count for quick lookup
|
||||
ALTER TABLE dispensary_crawler_profiles
|
||||
ADD COLUMN IF NOT EXISTS sandbox_attempt_count INTEGER DEFAULT 0;
|
||||
|
||||
-- Add last_sandbox_at for tracking
|
||||
ALTER TABLE dispensary_crawler_profiles
|
||||
ADD COLUMN IF NOT EXISTS last_sandbox_at TIMESTAMPTZ;
|
||||
|
||||
-- Create index for finding profiles by status
|
||||
CREATE INDEX IF NOT EXISTS idx_crawler_profiles_status
|
||||
ON dispensary_crawler_profiles(status) WHERE enabled = true;
|
||||
|
||||
-- Create index for finding profiles needing retry
|
||||
CREATE INDEX IF NOT EXISTS idx_crawler_profiles_next_retry
|
||||
ON dispensary_crawler_profiles(next_retry_at) WHERE enabled = true AND status = 'sandbox';
|
||||
|
||||
-- Add comment explaining status values
|
||||
COMMENT ON COLUMN dispensary_crawler_profiles.status IS
|
||||
'Crawler status: production (ready for regular crawls), sandbox (discovery mode), needs_manual (max retries exceeded), disabled (turned off)';
|
||||
|
||||
-- Update existing profiles to have status based on config if present
|
||||
UPDATE dispensary_crawler_profiles
|
||||
SET status = COALESCE(config->>'status', 'production')
|
||||
WHERE status IS NULL OR status = '';
|
||||
|
||||
-- Backfill sandbox_attempt_count from config
|
||||
UPDATE dispensary_crawler_profiles
|
||||
SET sandbox_attempt_count = COALESCE(
|
||||
jsonb_array_length(config->'sandboxAttempts'),
|
||||
0
|
||||
)
|
||||
WHERE config->'sandboxAttempts' IS NOT NULL;
|
||||
|
||||
-- Backfill next_retry_at from config
|
||||
UPDATE dispensary_crawler_profiles
|
||||
SET next_retry_at = (config->>'nextRetryAt')::timestamptz
|
||||
WHERE config->>'nextRetryAt' IS NOT NULL;
|
||||
|
||||
-- Create view for crawler profile summary
|
||||
CREATE OR REPLACE VIEW v_crawler_profile_summary AS
|
||||
SELECT
|
||||
dcp.id,
|
||||
dcp.dispensary_id,
|
||||
d.name AS dispensary_name,
|
||||
d.city,
|
||||
d.menu_type,
|
||||
dcp.profile_name,
|
||||
dcp.profile_key,
|
||||
dcp.crawler_type,
|
||||
dcp.status,
|
||||
dcp.enabled,
|
||||
dcp.sandbox_attempt_count,
|
||||
dcp.next_retry_at,
|
||||
dcp.last_sandbox_at,
|
||||
dcp.created_at,
|
||||
dcp.updated_at,
|
||||
CASE
|
||||
WHEN dcp.profile_key IS NOT NULL THEN 'per-store'
|
||||
ELSE 'legacy'
|
||||
END AS crawler_mode,
|
||||
CASE
|
||||
WHEN dcp.status = 'production' THEN 'Ready'
|
||||
WHEN dcp.status = 'sandbox' AND dcp.next_retry_at <= NOW() THEN 'Retry Due'
|
||||
WHEN dcp.status = 'sandbox' THEN 'Waiting'
|
||||
WHEN dcp.status = 'needs_manual' THEN 'Needs Manual'
|
||||
WHEN dcp.status = 'disabled' THEN 'Disabled'
|
||||
ELSE 'Unknown'
|
||||
END AS status_display
|
||||
FROM dispensary_crawler_profiles dcp
|
||||
JOIN dispensaries d ON d.id = dcp.dispensary_id
|
||||
WHERE dcp.enabled = true
|
||||
ORDER BY dcp.status, dcp.updated_at DESC;
|
||||
73
backend/migrations/039_crawl_orchestration_traces.sql
Normal file
73
backend/migrations/039_crawl_orchestration_traces.sql
Normal file
@@ -0,0 +1,73 @@
|
||||
-- Migration: Create crawl_orchestration_traces table
|
||||
-- Purpose: Store detailed step-by-step traces for every crawl orchestration run
|
||||
-- This enables full visibility into per-store crawler behavior
|
||||
|
||||
CREATE TABLE IF NOT EXISTS crawl_orchestration_traces (
|
||||
id SERIAL PRIMARY KEY,
|
||||
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
run_id VARCHAR(255), -- UUID or job ID for this crawl run
|
||||
profile_id INTEGER REFERENCES dispensary_crawler_profiles(id) ON DELETE SET NULL,
|
||||
profile_key VARCHAR(255), -- e.g. "trulieve-scottsdale"
|
||||
crawler_module VARCHAR(255), -- Full path to .ts file loaded
|
||||
state_at_start VARCHAR(50), -- sandbox, production, legacy, disabled
|
||||
state_at_end VARCHAR(50), -- sandbox, production, needs_manual, etc.
|
||||
|
||||
-- The trace: ordered array of step objects
|
||||
trace JSONB NOT NULL DEFAULT '[]'::jsonb,
|
||||
|
||||
-- Summary metrics for quick querying
|
||||
total_steps INTEGER DEFAULT 0,
|
||||
duration_ms INTEGER,
|
||||
success BOOLEAN,
|
||||
error_message TEXT,
|
||||
products_found INTEGER,
|
||||
|
||||
-- Timestamps
|
||||
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
completed_at TIMESTAMPTZ,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Index for quick lookup by dispensary
|
||||
CREATE INDEX IF NOT EXISTS idx_traces_dispensary_id
|
||||
ON crawl_orchestration_traces(dispensary_id);
|
||||
|
||||
-- Index for finding latest trace per dispensary
|
||||
CREATE INDEX IF NOT EXISTS idx_traces_dispensary_created
|
||||
ON crawl_orchestration_traces(dispensary_id, created_at DESC);
|
||||
|
||||
-- Index for finding traces by run_id
|
||||
CREATE INDEX IF NOT EXISTS idx_traces_run_id
|
||||
ON crawl_orchestration_traces(run_id) WHERE run_id IS NOT NULL;
|
||||
|
||||
-- Index for finding traces by profile
|
||||
CREATE INDEX IF NOT EXISTS idx_traces_profile_id
|
||||
ON crawl_orchestration_traces(profile_id) WHERE profile_id IS NOT NULL;
|
||||
|
||||
-- Comment explaining trace structure
|
||||
COMMENT ON COLUMN crawl_orchestration_traces.trace IS
|
||||
'Ordered array of step objects. Each step has:
|
||||
{
|
||||
"step": 1,
|
||||
"action": "load_profile",
|
||||
"description": "Loading crawler profile for dispensary",
|
||||
"timestamp": 1701234567890,
|
||||
"duration_ms": 45,
|
||||
"input": { ... },
|
||||
"output": { ... },
|
||||
"what": "Description of what happened",
|
||||
"why": "Reason this step was taken",
|
||||
"where": "Code location / module",
|
||||
"how": "Method or approach used",
|
||||
"when": "ISO timestamp"
|
||||
}';
|
||||
|
||||
-- View for easy access to latest traces
|
||||
CREATE OR REPLACE VIEW v_latest_crawl_traces AS
|
||||
SELECT DISTINCT ON (dispensary_id)
|
||||
cot.*,
|
||||
d.name AS dispensary_name,
|
||||
d.city AS dispensary_city
|
||||
FROM crawl_orchestration_traces cot
|
||||
JOIN dispensaries d ON d.id = cot.dispensary_id
|
||||
ORDER BY dispensary_id, cot.created_at DESC;
|
||||
73
backend/migrations/040_dispensary_dba_name.sql
Normal file
73
backend/migrations/040_dispensary_dba_name.sql
Normal file
@@ -0,0 +1,73 @@
|
||||
-- Migration 040: Add dba_name column to dispensaries table
|
||||
-- DBA (Doing Business As) name - the name the dispensary operates under,
|
||||
-- which may differ from the legal entity name
|
||||
-- This migration is idempotent - safe to run multiple times
|
||||
|
||||
-- Add dba_name column
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'dispensaries' AND column_name = 'dba_name') THEN
|
||||
ALTER TABLE dispensaries ADD COLUMN dba_name TEXT DEFAULT NULL;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- Add company_name column (legal entity name)
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'dispensaries' AND column_name = 'company_name') THEN
|
||||
ALTER TABLE dispensaries ADD COLUMN company_name TEXT DEFAULT NULL;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- Add azdhs_id for Arizona Department of Health Services license number
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'dispensaries' AND column_name = 'azdhs_id') THEN
|
||||
ALTER TABLE dispensaries ADD COLUMN azdhs_id INTEGER DEFAULT NULL;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- Add phone column
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'dispensaries' AND column_name = 'phone') THEN
|
||||
ALTER TABLE dispensaries ADD COLUMN phone TEXT DEFAULT NULL;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- Add email column
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'dispensaries' AND column_name = 'email') THEN
|
||||
ALTER TABLE dispensaries ADD COLUMN email TEXT DEFAULT NULL;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- Add google_rating column
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'dispensaries' AND column_name = 'google_rating') THEN
|
||||
ALTER TABLE dispensaries ADD COLUMN google_rating NUMERIC(2,1) DEFAULT NULL;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- Add google_review_count column
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'dispensaries' AND column_name = 'google_review_count') THEN
|
||||
ALTER TABLE dispensaries ADD COLUMN google_review_count INTEGER DEFAULT NULL;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- Add comments for documentation
|
||||
COMMENT ON COLUMN dispensaries.dba_name IS 'DBA (Doing Business As) name - the public-facing name the dispensary operates under';
|
||||
COMMENT ON COLUMN dispensaries.company_name IS 'Legal entity/company name that owns the dispensary';
|
||||
COMMENT ON COLUMN dispensaries.azdhs_id IS 'Arizona Department of Health Services license number';
|
||||
COMMENT ON COLUMN dispensaries.phone IS 'Contact phone number';
|
||||
COMMENT ON COLUMN dispensaries.email IS 'Contact email address';
|
||||
COMMENT ON COLUMN dispensaries.google_rating IS 'Google Maps rating (1.0 to 5.0)';
|
||||
COMMENT ON COLUMN dispensaries.google_review_count IS 'Number of Google reviews';
|
||||
|
||||
-- Create index for searching by dba_name
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_dba_name ON dispensaries (dba_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_azdhs_id ON dispensaries (azdhs_id);
|
||||
376
backend/migrations/041_cannaiq_canonical_schema.sql
Normal file
376
backend/migrations/041_cannaiq_canonical_schema.sql
Normal file
@@ -0,0 +1,376 @@
|
||||
-- Migration 041: CannaiQ Canonical Schema
|
||||
--
|
||||
-- This migration adds the canonical CannaiQ schema tables and columns.
|
||||
-- ALL CHANGES ARE ADDITIVE - NO DROPS, NO DELETES, NO TRUNCATES.
|
||||
--
|
||||
-- Run with: psql $CANNAIQ_DB_URL -f migrations/041_cannaiq_canonical_schema.sql
|
||||
--
|
||||
-- Tables created:
|
||||
-- - states (new)
|
||||
-- - chains (new)
|
||||
-- - brands (new)
|
||||
-- - store_products (new - normalized view of current menu)
|
||||
-- - store_product_snapshots (new - historical crawl data)
|
||||
-- - crawl_runs (new - replaces/supplements dispensary_crawl_jobs)
|
||||
--
|
||||
-- Tables modified:
|
||||
-- - dispensaries (add state_id, chain_id FKs)
|
||||
-- - dispensary_crawler_profiles (add status, allow_autopromote, validated_at)
|
||||
-- - crawl_orchestration_traces (add run_id FK)
|
||||
--
|
||||
|
||||
-- =====================================================
|
||||
-- 1) STATES TABLE
|
||||
-- =====================================================
|
||||
CREATE TABLE IF NOT EXISTS states (
|
||||
id SERIAL PRIMARY KEY,
|
||||
code VARCHAR(2) NOT NULL UNIQUE,
|
||||
name VARCHAR(100) NOT NULL,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Insert known states
|
||||
INSERT INTO states (code, name) VALUES
|
||||
('AZ', 'Arizona'),
|
||||
('CA', 'California'),
|
||||
('CO', 'Colorado'),
|
||||
('FL', 'Florida'),
|
||||
('IL', 'Illinois'),
|
||||
('MA', 'Massachusetts'),
|
||||
('MD', 'Maryland'),
|
||||
('MI', 'Michigan'),
|
||||
('MO', 'Missouri'),
|
||||
('NV', 'Nevada'),
|
||||
('NJ', 'New Jersey'),
|
||||
('NY', 'New York'),
|
||||
('OH', 'Ohio'),
|
||||
('OK', 'Oklahoma'),
|
||||
('OR', 'Oregon'),
|
||||
('PA', 'Pennsylvania'),
|
||||
('WA', 'Washington')
|
||||
ON CONFLICT (code) DO NOTHING;
|
||||
|
||||
COMMENT ON TABLE states IS 'US states where CannaiQ operates. Single source of truth for state codes.';
|
||||
|
||||
-- =====================================================
|
||||
-- 2) CHAINS TABLE (retail groups)
|
||||
-- =====================================================
|
||||
CREATE TABLE IF NOT EXISTS chains (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
slug VARCHAR(255) NOT NULL UNIQUE,
|
||||
website_url TEXT,
|
||||
logo_url TEXT,
|
||||
description TEXT,
|
||||
is_active BOOLEAN DEFAULT TRUE,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_chains_slug ON chains(slug);
|
||||
CREATE INDEX IF NOT EXISTS idx_chains_active ON chains(is_active) WHERE is_active = TRUE;
|
||||
|
||||
COMMENT ON TABLE chains IS 'Retail chains/groups that own multiple dispensary locations (e.g., Curaleaf, Trulieve).';
|
||||
|
||||
-- =====================================================
|
||||
-- 3) BRANDS TABLE (canonical brand catalog)
|
||||
-- =====================================================
|
||||
CREATE TABLE IF NOT EXISTS brands (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
slug VARCHAR(255) NOT NULL UNIQUE,
|
||||
external_id VARCHAR(100), -- Provider-specific brand ID
|
||||
website_url TEXT,
|
||||
instagram_handle VARCHAR(100),
|
||||
logo_url TEXT,
|
||||
description TEXT,
|
||||
is_portfolio_brand BOOLEAN DEFAULT FALSE, -- TRUE if brand we represent
|
||||
is_active BOOLEAN DEFAULT TRUE,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_brands_slug ON brands(slug);
|
||||
CREATE INDEX IF NOT EXISTS idx_brands_external_id ON brands(external_id) WHERE external_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_brands_portfolio ON brands(is_portfolio_brand) WHERE is_portfolio_brand = TRUE;
|
||||
|
||||
COMMENT ON TABLE brands IS 'Canonical brand catalog. Brands may appear across multiple dispensaries.';
|
||||
COMMENT ON COLUMN brands.is_portfolio_brand IS 'TRUE if this is a brand we represent/manage (vs third-party brand)';
|
||||
|
||||
-- =====================================================
|
||||
-- 4) ADD state_id AND chain_id TO dispensaries
|
||||
-- =====================================================
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS state_id INTEGER REFERENCES states(id);
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS chain_id INTEGER REFERENCES chains(id);
|
||||
|
||||
-- NOTE: state_id backfill is done by ETL script (042_legacy_import.ts), not this migration.
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_state_id ON dispensaries(state_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_chain_id ON dispensaries(chain_id) WHERE chain_id IS NOT NULL;
|
||||
|
||||
COMMENT ON COLUMN dispensaries.state_id IS 'FK to states table. Canonical state reference.';
|
||||
COMMENT ON COLUMN dispensaries.chain_id IS 'FK to chains table. NULL if independent dispensary.';
|
||||
|
||||
-- =====================================================
|
||||
-- 5) STORE_PRODUCTS TABLE (current menu state)
|
||||
-- =====================================================
|
||||
-- This is the normalized "what is currently on the menu" table.
|
||||
-- It supplements dutchie_products with a provider-agnostic structure.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS store_products (
|
||||
id SERIAL PRIMARY KEY,
|
||||
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
product_id INTEGER REFERENCES products(id) ON DELETE SET NULL, -- Link to canonical product
|
||||
brand_id INTEGER REFERENCES brands(id) ON DELETE SET NULL, -- Link to canonical brand
|
||||
|
||||
-- Provider-specific identifiers
|
||||
provider VARCHAR(50) NOT NULL DEFAULT 'dutchie', -- dutchie, treez, jane, etc.
|
||||
provider_product_id VARCHAR(100), -- Platform-specific product ID
|
||||
provider_brand_id VARCHAR(100), -- Platform-specific brand ID
|
||||
|
||||
-- Raw data from platform (not normalized)
|
||||
name_raw VARCHAR(500) NOT NULL,
|
||||
brand_name_raw VARCHAR(255),
|
||||
category_raw VARCHAR(100),
|
||||
subcategory_raw VARCHAR(100),
|
||||
|
||||
-- Pricing
|
||||
price_rec NUMERIC(10,2),
|
||||
price_med NUMERIC(10,2),
|
||||
price_rec_special NUMERIC(10,2),
|
||||
price_med_special NUMERIC(10,2),
|
||||
is_on_special BOOLEAN DEFAULT FALSE,
|
||||
special_name TEXT,
|
||||
discount_percent NUMERIC(5,2),
|
||||
|
||||
-- Inventory
|
||||
is_in_stock BOOLEAN DEFAULT TRUE,
|
||||
stock_quantity INTEGER,
|
||||
stock_status VARCHAR(50) DEFAULT 'in_stock',
|
||||
|
||||
-- Potency
|
||||
thc_percent NUMERIC(5,2),
|
||||
cbd_percent NUMERIC(5,2),
|
||||
|
||||
-- Images
|
||||
image_url TEXT,
|
||||
local_image_path TEXT,
|
||||
|
||||
-- Timestamps
|
||||
first_seen_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
last_seen_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
|
||||
UNIQUE(dispensary_id, provider, provider_product_id)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_dispensary ON store_products(dispensary_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_product ON store_products(product_id) WHERE product_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_brand ON store_products(brand_id) WHERE brand_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_provider ON store_products(provider);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_in_stock ON store_products(dispensary_id, is_in_stock);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_special ON store_products(dispensary_id, is_on_special) WHERE is_on_special = TRUE;
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_last_seen ON store_products(last_seen_at DESC);
|
||||
|
||||
COMMENT ON TABLE store_products IS 'Current state of products on each dispensary menu. Provider-agnostic.';
|
||||
COMMENT ON COLUMN store_products.product_id IS 'FK to canonical products table. NULL if not yet mapped.';
|
||||
COMMENT ON COLUMN store_products.brand_id IS 'FK to canonical brands table. NULL if not yet mapped.';
|
||||
|
||||
-- =====================================================
|
||||
-- 6) STORE_PRODUCT_SNAPSHOTS TABLE (historical data)
|
||||
-- =====================================================
|
||||
-- This is the critical time-series table for analytics.
|
||||
-- One row per product per crawl.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS store_product_snapshots (
|
||||
id SERIAL PRIMARY KEY,
|
||||
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
store_product_id INTEGER REFERENCES store_products(id) ON DELETE SET NULL,
|
||||
product_id INTEGER REFERENCES products(id) ON DELETE SET NULL,
|
||||
|
||||
-- Provider info
|
||||
provider VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
||||
provider_product_id VARCHAR(100),
|
||||
|
||||
-- Link to crawl run
|
||||
crawl_run_id INTEGER, -- FK added after crawl_runs table created
|
||||
|
||||
-- Capture timestamp
|
||||
captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
-- Raw data from platform
|
||||
name_raw VARCHAR(500),
|
||||
brand_name_raw VARCHAR(255),
|
||||
category_raw VARCHAR(100),
|
||||
subcategory_raw VARCHAR(100),
|
||||
|
||||
-- Pricing at time of capture
|
||||
price_rec NUMERIC(10,2),
|
||||
price_med NUMERIC(10,2),
|
||||
price_rec_special NUMERIC(10,2),
|
||||
price_med_special NUMERIC(10,2),
|
||||
is_on_special BOOLEAN DEFAULT FALSE,
|
||||
discount_percent NUMERIC(5,2),
|
||||
|
||||
-- Inventory at time of capture
|
||||
is_in_stock BOOLEAN DEFAULT TRUE,
|
||||
stock_quantity INTEGER,
|
||||
stock_status VARCHAR(50) DEFAULT 'in_stock',
|
||||
|
||||
-- Potency at time of capture
|
||||
thc_percent NUMERIC(5,2),
|
||||
cbd_percent NUMERIC(5,2),
|
||||
|
||||
-- Image URL at time of capture
|
||||
image_url TEXT,
|
||||
|
||||
-- Full raw response for debugging
|
||||
raw_data JSONB,
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_dispensary_captured ON store_product_snapshots(dispensary_id, captured_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_product_captured ON store_product_snapshots(product_id, captured_at DESC) WHERE product_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_store_product ON store_product_snapshots(store_product_id) WHERE store_product_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_crawl_run ON store_product_snapshots(crawl_run_id) WHERE crawl_run_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_captured_at ON store_product_snapshots(captured_at DESC);
|
||||
|
||||
COMMENT ON TABLE store_product_snapshots IS 'Historical crawl data. One row per product per crawl. NEVER DELETE.';
|
||||
COMMENT ON COLUMN store_product_snapshots.captured_at IS 'When this snapshot was captured (crawl time).';
|
||||
|
||||
-- =====================================================
|
||||
-- 7) CRAWL_RUNS TABLE (job execution records)
|
||||
-- =====================================================
|
||||
CREATE TABLE IF NOT EXISTS crawl_runs (
|
||||
id SERIAL PRIMARY KEY,
|
||||
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
|
||||
-- Provider
|
||||
provider VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
||||
|
||||
-- Execution times
|
||||
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
finished_at TIMESTAMPTZ,
|
||||
duration_ms INTEGER,
|
||||
|
||||
-- Status
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'running', -- running, success, failed, partial
|
||||
error_message TEXT,
|
||||
|
||||
-- Results
|
||||
products_found INTEGER DEFAULT 0,
|
||||
products_new INTEGER DEFAULT 0,
|
||||
products_updated INTEGER DEFAULT 0,
|
||||
snapshots_written INTEGER DEFAULT 0,
|
||||
|
||||
-- Metadata
|
||||
worker_id VARCHAR(100),
|
||||
trigger_type VARCHAR(50) DEFAULT 'scheduled', -- scheduled, manual, api
|
||||
metadata JSONB DEFAULT '{}',
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_runs_dispensary ON crawl_runs(dispensary_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_runs_status ON crawl_runs(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_runs_started ON crawl_runs(started_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_runs_dispensary_started ON crawl_runs(dispensary_id, started_at DESC);
|
||||
|
||||
COMMENT ON TABLE crawl_runs IS 'Each crawl execution. Links to snapshots and traces.';
|
||||
|
||||
-- Add FK from store_product_snapshots to crawl_runs
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.table_constraints
|
||||
WHERE constraint_name = 'store_product_snapshots_crawl_run_id_fkey'
|
||||
) THEN
|
||||
ALTER TABLE store_product_snapshots
|
||||
ADD CONSTRAINT store_product_snapshots_crawl_run_id_fkey
|
||||
FOREIGN KEY (crawl_run_id) REFERENCES crawl_runs(id) ON DELETE SET NULL;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- =====================================================
|
||||
-- 8) UPDATE crawl_orchestration_traces
|
||||
-- =====================================================
|
||||
-- Add run_id FK if not exists
|
||||
ALTER TABLE crawl_orchestration_traces
|
||||
ADD COLUMN IF NOT EXISTS crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_traces_crawl_run
|
||||
ON crawl_orchestration_traces(crawl_run_id)
|
||||
WHERE crawl_run_id IS NOT NULL;
|
||||
|
||||
-- =====================================================
|
||||
-- 9) UPDATE dispensary_crawler_profiles
|
||||
-- =====================================================
|
||||
-- Add missing columns from canonical schema
|
||||
ALTER TABLE dispensary_crawler_profiles
|
||||
ADD COLUMN IF NOT EXISTS status VARCHAR(50) DEFAULT 'sandbox';
|
||||
|
||||
ALTER TABLE dispensary_crawler_profiles
|
||||
ADD COLUMN IF NOT EXISTS allow_autopromote BOOLEAN DEFAULT FALSE;
|
||||
|
||||
ALTER TABLE dispensary_crawler_profiles
|
||||
ADD COLUMN IF NOT EXISTS validated_at TIMESTAMPTZ;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_profiles_status
|
||||
ON dispensary_crawler_profiles(status);
|
||||
|
||||
COMMENT ON COLUMN dispensary_crawler_profiles.status IS 'Profile status: sandbox, production, needs_manual, disabled';
|
||||
COMMENT ON COLUMN dispensary_crawler_profiles.allow_autopromote IS 'Whether this profile can be auto-promoted from sandbox to production';
|
||||
COMMENT ON COLUMN dispensary_crawler_profiles.validated_at IS 'When this profile was last validated as working';
|
||||
|
||||
-- =====================================================
|
||||
-- 10) VIEWS FOR BACKWARD COMPATIBILITY
|
||||
-- =====================================================
|
||||
|
||||
-- View to get latest snapshot per store product
|
||||
CREATE OR REPLACE VIEW v_latest_store_snapshots AS
|
||||
SELECT DISTINCT ON (dispensary_id, provider_product_id)
|
||||
sps.*
|
||||
FROM store_product_snapshots sps
|
||||
ORDER BY dispensary_id, provider_product_id, captured_at DESC;
|
||||
|
||||
-- View to get crawl run summary per dispensary
|
||||
CREATE OR REPLACE VIEW v_dispensary_crawl_summary AS
|
||||
SELECT
|
||||
d.id AS dispensary_id,
|
||||
d.name AS dispensary_name,
|
||||
d.city,
|
||||
d.state,
|
||||
COUNT(DISTINCT sp.id) AS current_product_count,
|
||||
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock) AS in_stock_count,
|
||||
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_on_special) AS on_special_count,
|
||||
MAX(cr.finished_at) AS last_crawl_at,
|
||||
(SELECT status FROM crawl_runs WHERE dispensary_id = d.id ORDER BY started_at DESC LIMIT 1) AS last_crawl_status
|
||||
FROM dispensaries d
|
||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
|
||||
LEFT JOIN crawl_runs cr ON cr.dispensary_id = d.id
|
||||
GROUP BY d.id, d.name, d.city, d.state;
|
||||
|
||||
-- =====================================================
|
||||
-- 11) COMMENTS
|
||||
-- =====================================================
|
||||
COMMENT ON TABLE states IS 'Canonical list of US states. Use state_id FK in dispensaries.';
|
||||
COMMENT ON TABLE chains IS 'Retail chains (multi-location operators).';
|
||||
COMMENT ON TABLE brands IS 'Canonical brand catalog across all providers.';
|
||||
COMMENT ON TABLE store_products IS 'Current menu state per dispensary. Provider-agnostic.';
|
||||
COMMENT ON TABLE store_product_snapshots IS 'Historical price/stock data. One row per product per crawl.';
|
||||
COMMENT ON TABLE crawl_runs IS 'Crawl execution records. Links snapshots to runs.';
|
||||
|
||||
-- =====================================================
|
||||
-- MIGRATION COMPLETE
|
||||
-- =====================================================
|
||||
--
|
||||
-- Next steps (manual - not in this migration):
|
||||
-- 1. Populate chains table from known retail groups
|
||||
-- 2. Populate brands table from existing dutchie_products.brand_name
|
||||
-- 3. Migrate data from dutchie_products → store_products
|
||||
-- 4. Migrate data from dutchie_product_snapshots → store_product_snapshots
|
||||
-- 5. Link dispensaries.chain_id to chains where applicable
|
||||
--
|
||||
50
backend/migrations/043_add_states_table.sql
Normal file
50
backend/migrations/043_add_states_table.sql
Normal file
@@ -0,0 +1,50 @@
|
||||
-- Migration 043: Add States Table
|
||||
--
|
||||
-- Creates the states table if it does not exist.
|
||||
-- Safe to run multiple times (idempotent).
|
||||
--
|
||||
-- Run with:
|
||||
-- CANNAIQ_DB_URL="postgresql://..." psql $CANNAIQ_DB_URL -f migrations/043_add_states_table.sql
|
||||
|
||||
-- =====================================================
|
||||
-- 1) CREATE STATES TABLE
|
||||
-- =====================================================
|
||||
CREATE TABLE IF NOT EXISTS states (
|
||||
id SERIAL PRIMARY KEY,
|
||||
code TEXT NOT NULL UNIQUE,
|
||||
name TEXT NOT NULL,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- =====================================================
|
||||
-- 2) INSERT CORE US STATES
|
||||
-- =====================================================
|
||||
INSERT INTO states (code, name) VALUES
|
||||
('AZ', 'Arizona'),
|
||||
('CA', 'California'),
|
||||
('CO', 'Colorado'),
|
||||
('FL', 'Florida'),
|
||||
('IL', 'Illinois'),
|
||||
('MA', 'Massachusetts'),
|
||||
('MD', 'Maryland'),
|
||||
('MI', 'Michigan'),
|
||||
('MO', 'Missouri'),
|
||||
('NV', 'Nevada'),
|
||||
('NJ', 'New Jersey'),
|
||||
('NY', 'New York'),
|
||||
('OH', 'Ohio'),
|
||||
('OK', 'Oklahoma'),
|
||||
('OR', 'Oregon'),
|
||||
('PA', 'Pennsylvania'),
|
||||
('WA', 'Washington')
|
||||
ON CONFLICT (code) DO NOTHING;
|
||||
|
||||
-- =====================================================
|
||||
-- 3) ADD INDEX
|
||||
-- =====================================================
|
||||
CREATE INDEX IF NOT EXISTS idx_states_code ON states(code);
|
||||
|
||||
-- =====================================================
|
||||
-- DONE
|
||||
-- =====================================================
|
||||
45
backend/migrations/044_add_provider_detection_data.sql
Normal file
45
backend/migrations/044_add_provider_detection_data.sql
Normal file
@@ -0,0 +1,45 @@
|
||||
-- Migration 044: Add provider_detection_data column to dispensaries
|
||||
--
|
||||
-- This column stores detection metadata for menu provider discovery.
|
||||
-- Used by menu-detection.ts and discovery.ts to track:
|
||||
-- - Detected provider type
|
||||
-- - Resolution attempts
|
||||
-- - Error messages
|
||||
-- - not_crawlable flag
|
||||
--
|
||||
-- Run with: psql $CANNAIQ_DB_URL -f migrations/044_add_provider_detection_data.sql
|
||||
--
|
||||
-- ALL CHANGES ARE ADDITIVE - NO DROPS, NO DELETES, NO TRUNCATES.
|
||||
|
||||
-- Add provider_detection_data to dispensaries table
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'dispensaries' AND column_name = 'provider_detection_data'
|
||||
) THEN
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN provider_detection_data JSONB DEFAULT NULL;
|
||||
|
||||
RAISE NOTICE 'Added provider_detection_data column to dispensaries table';
|
||||
ELSE
|
||||
RAISE NOTICE 'provider_detection_data column already exists on dispensaries table';
|
||||
END IF;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Add index for querying by not_crawlable flag
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_provider_detection_not_crawlable
|
||||
ON dispensaries ((provider_detection_data->>'not_crawlable'))
|
||||
WHERE provider_detection_data IS NOT NULL;
|
||||
|
||||
-- Add index for querying by detected provider
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_provider_detection_provider
|
||||
ON dispensaries ((provider_detection_data->>'detected_provider'))
|
||||
WHERE provider_detection_data IS NOT NULL;
|
||||
|
||||
COMMENT ON COLUMN dispensaries.provider_detection_data IS 'JSONB metadata from menu provider detection. Keys: detected_provider, resolution_error, not_crawlable, detection_timestamp';
|
||||
|
||||
-- =====================================================
|
||||
-- MIGRATION COMPLETE
|
||||
-- =====================================================
|
||||
27
backend/migrations/045_add_image_columns.sql
Normal file
27
backend/migrations/045_add_image_columns.sql
Normal file
@@ -0,0 +1,27 @@
|
||||
-- Migration 045: Add thumbnail_url columns to canonical tables
|
||||
--
|
||||
-- NOTE: image_url already exists in both tables from migration 041.
|
||||
-- This migration adds thumbnail_url for cached thumbnail images.
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
-- Add thumbnail_url to store_products if not exists
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'store_products' AND column_name = 'thumbnail_url'
|
||||
) THEN
|
||||
ALTER TABLE store_products ADD COLUMN thumbnail_url TEXT NULL;
|
||||
END IF;
|
||||
|
||||
-- Add thumbnail_url to store_product_snapshots if not exists
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'store_product_snapshots' AND column_name = 'thumbnail_url'
|
||||
) THEN
|
||||
ALTER TABLE store_product_snapshots ADD COLUMN thumbnail_url TEXT NULL;
|
||||
END IF;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
COMMENT ON COLUMN store_products.thumbnail_url IS 'URL to cached thumbnail image';
|
||||
COMMENT ON COLUMN store_product_snapshots.thumbnail_url IS 'URL to cached thumbnail image at time of snapshot';
|
||||
351
backend/migrations/046_crawler_reliability.sql
Normal file
351
backend/migrations/046_crawler_reliability.sql
Normal file
@@ -0,0 +1,351 @@
|
||||
-- Migration 046: Crawler Reliability & Stabilization
|
||||
-- Phase 1: Add fields for error taxonomy, retry management, and self-healing
|
||||
|
||||
-- ============================================================
|
||||
-- PART 1: Error Taxonomy - Standardized error codes
|
||||
-- ============================================================
|
||||
|
||||
-- Create enum for standardized error codes
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'crawl_error_code') THEN
|
||||
CREATE TYPE crawl_error_code AS ENUM (
|
||||
'SUCCESS',
|
||||
'RATE_LIMITED',
|
||||
'BLOCKED_PROXY',
|
||||
'HTML_CHANGED',
|
||||
'TIMEOUT',
|
||||
'AUTH_FAILED',
|
||||
'NETWORK_ERROR',
|
||||
'PARSE_ERROR',
|
||||
'NO_PRODUCTS',
|
||||
'UNKNOWN_ERROR'
|
||||
);
|
||||
END IF;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- PART 2: Dispensary Crawl Configuration
|
||||
-- ============================================================
|
||||
|
||||
-- Add crawl config columns to dispensaries
|
||||
DO $$
|
||||
BEGIN
|
||||
-- Crawl frequency (minutes between crawls)
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'dispensaries' AND column_name = 'crawl_frequency_minutes'
|
||||
) THEN
|
||||
ALTER TABLE dispensaries ADD COLUMN crawl_frequency_minutes INTEGER DEFAULT 240;
|
||||
END IF;
|
||||
|
||||
-- Max retries per crawl
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'dispensaries' AND column_name = 'max_retries'
|
||||
) THEN
|
||||
ALTER TABLE dispensaries ADD COLUMN max_retries INTEGER DEFAULT 3;
|
||||
END IF;
|
||||
|
||||
-- Current proxy ID
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'dispensaries' AND column_name = 'current_proxy_id'
|
||||
) THEN
|
||||
ALTER TABLE dispensaries ADD COLUMN current_proxy_id INTEGER NULL;
|
||||
END IF;
|
||||
|
||||
-- Current user agent
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'dispensaries' AND column_name = 'current_user_agent'
|
||||
) THEN
|
||||
ALTER TABLE dispensaries ADD COLUMN current_user_agent TEXT NULL;
|
||||
END IF;
|
||||
|
||||
-- Next scheduled run
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'dispensaries' AND column_name = 'next_crawl_at'
|
||||
) THEN
|
||||
ALTER TABLE dispensaries ADD COLUMN next_crawl_at TIMESTAMPTZ NULL;
|
||||
END IF;
|
||||
|
||||
-- Last successful crawl
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'dispensaries' AND column_name = 'last_success_at'
|
||||
) THEN
|
||||
ALTER TABLE dispensaries ADD COLUMN last_success_at TIMESTAMPTZ NULL;
|
||||
END IF;
|
||||
|
||||
-- Last error code (using text for flexibility, validated in app)
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'dispensaries' AND column_name = 'last_error_code'
|
||||
) THEN
|
||||
ALTER TABLE dispensaries ADD COLUMN last_error_code TEXT NULL;
|
||||
END IF;
|
||||
|
||||
-- Crawl status: active, degraded, paused, failed
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'dispensaries' AND column_name = 'crawl_status'
|
||||
) THEN
|
||||
ALTER TABLE dispensaries ADD COLUMN crawl_status TEXT DEFAULT 'active';
|
||||
END IF;
|
||||
|
||||
-- Backoff multiplier (increases with failures)
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'dispensaries' AND column_name = 'backoff_multiplier'
|
||||
) THEN
|
||||
ALTER TABLE dispensaries ADD COLUMN backoff_multiplier NUMERIC(4,2) DEFAULT 1.0;
|
||||
END IF;
|
||||
|
||||
-- Total attempt count (lifetime)
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'dispensaries' AND column_name = 'total_attempts'
|
||||
) THEN
|
||||
ALTER TABLE dispensaries ADD COLUMN total_attempts INTEGER DEFAULT 0;
|
||||
END IF;
|
||||
|
||||
-- Total success count (lifetime)
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'dispensaries' AND column_name = 'total_successes'
|
||||
) THEN
|
||||
ALTER TABLE dispensaries ADD COLUMN total_successes INTEGER DEFAULT 0;
|
||||
END IF;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- PART 3: Enhanced Job Tracking
|
||||
-- ============================================================
|
||||
|
||||
-- Add columns to dispensary_crawl_jobs
|
||||
DO $$
|
||||
BEGIN
|
||||
-- Error code
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'dispensary_crawl_jobs' AND column_name = 'error_code'
|
||||
) THEN
|
||||
ALTER TABLE dispensary_crawl_jobs ADD COLUMN error_code TEXT NULL;
|
||||
END IF;
|
||||
|
||||
-- Proxy used for this job
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'dispensary_crawl_jobs' AND column_name = 'proxy_used'
|
||||
) THEN
|
||||
ALTER TABLE dispensary_crawl_jobs ADD COLUMN proxy_used TEXT NULL;
|
||||
END IF;
|
||||
|
||||
-- User agent used for this job
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'dispensary_crawl_jobs' AND column_name = 'user_agent_used'
|
||||
) THEN
|
||||
ALTER TABLE dispensary_crawl_jobs ADD COLUMN user_agent_used TEXT NULL;
|
||||
END IF;
|
||||
|
||||
-- Attempt number for this job
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'dispensary_crawl_jobs' AND column_name = 'attempt_number'
|
||||
) THEN
|
||||
ALTER TABLE dispensary_crawl_jobs ADD COLUMN attempt_number INTEGER DEFAULT 1;
|
||||
END IF;
|
||||
|
||||
-- Backoff delay applied (ms)
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'dispensary_crawl_jobs' AND column_name = 'backoff_delay_ms'
|
||||
) THEN
|
||||
ALTER TABLE dispensary_crawl_jobs ADD COLUMN backoff_delay_ms INTEGER DEFAULT 0;
|
||||
END IF;
|
||||
|
||||
-- HTTP status code received
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'dispensary_crawl_jobs' AND column_name = 'http_status'
|
||||
) THEN
|
||||
ALTER TABLE dispensary_crawl_jobs ADD COLUMN http_status INTEGER NULL;
|
||||
END IF;
|
||||
|
||||
-- Response time (ms)
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'dispensary_crawl_jobs' AND column_name = 'response_time_ms'
|
||||
) THEN
|
||||
ALTER TABLE dispensary_crawl_jobs ADD COLUMN response_time_ms INTEGER NULL;
|
||||
END IF;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- PART 4: Crawl History Table (for detailed tracking)
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS crawl_attempts (
|
||||
id SERIAL PRIMARY KEY,
|
||||
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id),
|
||||
job_id INTEGER REFERENCES dispensary_crawl_jobs(id),
|
||||
|
||||
-- Timing
|
||||
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
finished_at TIMESTAMPTZ,
|
||||
duration_ms INTEGER,
|
||||
|
||||
-- Result
|
||||
error_code TEXT NOT NULL DEFAULT 'UNKNOWN_ERROR',
|
||||
error_message TEXT,
|
||||
http_status INTEGER,
|
||||
|
||||
-- Context
|
||||
attempt_number INTEGER NOT NULL DEFAULT 1,
|
||||
proxy_used TEXT,
|
||||
user_agent_used TEXT,
|
||||
|
||||
-- Metrics
|
||||
products_found INTEGER DEFAULT 0,
|
||||
products_upserted INTEGER DEFAULT 0,
|
||||
snapshots_created INTEGER DEFAULT 0,
|
||||
|
||||
-- Metadata
|
||||
metadata JSONB,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Index for quick lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_attempts_dispensary_id ON crawl_attempts(dispensary_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_attempts_error_code ON crawl_attempts(error_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_attempts_started_at ON crawl_attempts(started_at DESC);
|
||||
|
||||
-- ============================================================
|
||||
-- PART 5: Views for Monitoring
|
||||
-- ============================================================
|
||||
|
||||
-- Drop existing view if exists
|
||||
DROP VIEW IF EXISTS v_crawler_status;
|
||||
|
||||
-- Crawler status view with all reliability fields
|
||||
CREATE VIEW v_crawler_status AS
|
||||
SELECT
|
||||
d.id,
|
||||
d.name,
|
||||
d.slug,
|
||||
d.menu_type,
|
||||
d.platform_dispensary_id,
|
||||
d.crawl_status,
|
||||
d.consecutive_failures,
|
||||
d.last_crawl_at,
|
||||
d.last_success_at,
|
||||
d.last_failure_at,
|
||||
d.last_error_code,
|
||||
d.next_crawl_at,
|
||||
d.crawl_frequency_minutes,
|
||||
d.max_retries,
|
||||
d.current_proxy_id,
|
||||
d.current_user_agent,
|
||||
d.backoff_multiplier,
|
||||
d.total_attempts,
|
||||
d.total_successes,
|
||||
d.product_count,
|
||||
CASE
|
||||
WHEN d.total_attempts > 0
|
||||
THEN ROUND(d.total_successes::NUMERIC / d.total_attempts * 100, 1)
|
||||
ELSE 0
|
||||
END AS success_rate,
|
||||
CASE
|
||||
WHEN d.crawl_status = 'failed' THEN 'FAILED'
|
||||
WHEN d.crawl_status = 'paused' THEN 'PAUSED'
|
||||
WHEN d.crawl_status = 'degraded' THEN 'DEGRADED'
|
||||
WHEN d.menu_type IS NULL OR d.menu_type = 'unknown' THEN 'NEEDS_DETECTION'
|
||||
WHEN d.platform_dispensary_id IS NULL THEN 'NEEDS_PLATFORM_ID'
|
||||
WHEN d.next_crawl_at IS NULL THEN 'NOT_SCHEDULED'
|
||||
WHEN d.next_crawl_at <= NOW() THEN 'DUE'
|
||||
ELSE 'SCHEDULED'
|
||||
END AS schedule_status,
|
||||
d.failed_at,
|
||||
d.failure_notes
|
||||
FROM dispensaries d
|
||||
WHERE d.state = 'AZ';
|
||||
|
||||
-- Drop existing view if exists
|
||||
DROP VIEW IF EXISTS v_crawl_error_summary;
|
||||
|
||||
-- Error summary view
|
||||
CREATE VIEW v_crawl_error_summary AS
|
||||
SELECT
|
||||
error_code,
|
||||
COUNT(*) as total_occurrences,
|
||||
COUNT(DISTINCT dispensary_id) as affected_stores,
|
||||
MAX(started_at) as last_occurrence,
|
||||
AVG(duration_ms)::INTEGER as avg_duration_ms
|
||||
FROM crawl_attempts
|
||||
WHERE started_at > NOW() - INTERVAL '7 days'
|
||||
GROUP BY error_code
|
||||
ORDER BY total_occurrences DESC;
|
||||
|
||||
-- Drop existing view if exists
|
||||
DROP VIEW IF EXISTS v_crawl_health;
|
||||
|
||||
-- Overall crawl health view
|
||||
CREATE VIEW v_crawl_health AS
|
||||
SELECT
|
||||
COUNT(*) FILTER (WHERE crawl_status = 'active') as active_crawlers,
|
||||
COUNT(*) FILTER (WHERE crawl_status = 'degraded') as degraded_crawlers,
|
||||
COUNT(*) FILTER (WHERE crawl_status = 'paused') as paused_crawlers,
|
||||
COUNT(*) FILTER (WHERE crawl_status = 'failed') as failed_crawlers,
|
||||
COUNT(*) FILTER (WHERE next_crawl_at <= NOW()) as due_now,
|
||||
COUNT(*) FILTER (WHERE consecutive_failures > 0) as stores_with_failures,
|
||||
AVG(consecutive_failures)::NUMERIC(4,2) as avg_consecutive_failures,
|
||||
COUNT(*) FILTER (WHERE last_success_at > NOW() - INTERVAL '24 hours') as successful_last_24h
|
||||
FROM dispensaries
|
||||
WHERE state = 'AZ' AND menu_type = 'dutchie';
|
||||
|
||||
-- ============================================================
|
||||
-- PART 6: Constraint for minimum crawl gap
|
||||
-- ============================================================
|
||||
|
||||
-- Function to check minimum crawl gap (2 minutes)
|
||||
CREATE OR REPLACE FUNCTION check_minimum_crawl_gap()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
-- Only check for new pending jobs
|
||||
IF NEW.status = 'pending' AND NEW.dispensary_id IS NOT NULL THEN
|
||||
-- Check if there's a recent job for same dispensary
|
||||
IF EXISTS (
|
||||
SELECT 1 FROM dispensary_crawl_jobs
|
||||
WHERE dispensary_id = NEW.dispensary_id
|
||||
AND id != NEW.id
|
||||
AND status IN ('pending', 'running')
|
||||
AND created_at > NOW() - INTERVAL '2 minutes'
|
||||
) THEN
|
||||
RAISE EXCEPTION 'Minimum 2-minute gap required between crawls for same dispensary';
|
||||
END IF;
|
||||
END IF;
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Create trigger (drop first if exists)
|
||||
DROP TRIGGER IF EXISTS enforce_minimum_crawl_gap ON dispensary_crawl_jobs;
|
||||
CREATE TRIGGER enforce_minimum_crawl_gap
|
||||
BEFORE INSERT ON dispensary_crawl_jobs
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION check_minimum_crawl_gap();
|
||||
|
||||
-- ============================================================
|
||||
-- PART 7: Comments
|
||||
-- ============================================================
|
||||
|
||||
COMMENT ON TABLE crawl_attempts IS 'Detailed history of every crawl attempt for analytics and debugging';
|
||||
COMMENT ON VIEW v_crawler_status IS 'Current status of all crawlers with reliability metrics';
|
||||
COMMENT ON VIEW v_crawl_error_summary IS 'Summary of errors by type over last 7 days';
|
||||
COMMENT ON VIEW v_crawl_health IS 'Overall health metrics for the crawling system';
|
||||
130
backend/migrations/046_raw_payloads_table.sql
Normal file
130
backend/migrations/046_raw_payloads_table.sql
Normal file
@@ -0,0 +1,130 @@
|
||||
-- Migration 046: Raw Payloads Table
|
||||
--
|
||||
-- Immutable event stream for raw crawler responses.
|
||||
-- NEVER delete or overwrite historical payloads.
|
||||
--
|
||||
-- Run with:
|
||||
-- DATABASE_URL="postgresql://..." psql $DATABASE_URL -f migrations/046_raw_payloads_table.sql
|
||||
|
||||
-- =====================================================
|
||||
-- 1) RAW_PAYLOADS TABLE
|
||||
-- =====================================================
|
||||
CREATE TABLE IF NOT EXISTS raw_payloads (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
|
||||
-- Store reference
|
||||
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
|
||||
-- Crawl run reference (nullable for backfilled data)
|
||||
crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL,
|
||||
|
||||
-- Platform identification
|
||||
platform VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
||||
|
||||
-- Versioning for schema evolution
|
||||
payload_version INTEGER NOT NULL DEFAULT 1,
|
||||
|
||||
-- The raw JSON response from the crawler (immutable)
|
||||
raw_json JSONB NOT NULL,
|
||||
|
||||
-- Metadata
|
||||
product_count INTEGER, -- Number of products in payload
|
||||
pricing_type VARCHAR(20), -- 'rec', 'med', or 'both'
|
||||
crawl_mode VARCHAR(20), -- 'mode_a', 'mode_b', 'dual'
|
||||
|
||||
-- Timestamps
|
||||
fetched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
-- Hydration status
|
||||
processed BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
normalized_at TIMESTAMPTZ,
|
||||
hydration_error TEXT,
|
||||
hydration_attempts INTEGER DEFAULT 0,
|
||||
|
||||
-- Audit
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- =====================================================
|
||||
-- 2) INDEXES FOR EFFICIENT QUERYING
|
||||
-- =====================================================
|
||||
|
||||
-- Primary lookup: unprocessed payloads in FIFO order
|
||||
CREATE INDEX IF NOT EXISTS idx_raw_payloads_unprocessed
|
||||
ON raw_payloads(fetched_at ASC)
|
||||
WHERE processed = FALSE;
|
||||
|
||||
-- Store-based lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_raw_payloads_dispensary
|
||||
ON raw_payloads(dispensary_id, fetched_at DESC);
|
||||
|
||||
-- Platform filtering
|
||||
CREATE INDEX IF NOT EXISTS idx_raw_payloads_platform
|
||||
ON raw_payloads(platform);
|
||||
|
||||
-- Crawl run linkage
|
||||
CREATE INDEX IF NOT EXISTS idx_raw_payloads_crawl_run
|
||||
ON raw_payloads(crawl_run_id)
|
||||
WHERE crawl_run_id IS NOT NULL;
|
||||
|
||||
-- Error tracking
|
||||
CREATE INDEX IF NOT EXISTS idx_raw_payloads_errors
|
||||
ON raw_payloads(hydration_attempts, processed)
|
||||
WHERE hydration_error IS NOT NULL;
|
||||
|
||||
-- =====================================================
|
||||
-- 3) HYDRATION LOCKS TABLE (distributed locking)
|
||||
-- =====================================================
|
||||
CREATE TABLE IF NOT EXISTS hydration_locks (
|
||||
id SERIAL PRIMARY KEY,
|
||||
lock_name VARCHAR(100) NOT NULL UNIQUE,
|
||||
worker_id VARCHAR(100) NOT NULL,
|
||||
acquired_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
expires_at TIMESTAMPTZ NOT NULL,
|
||||
heartbeat_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_hydration_locks_expires
|
||||
ON hydration_locks(expires_at);
|
||||
|
||||
-- =====================================================
|
||||
-- 4) HYDRATION_RUNS TABLE (audit trail)
|
||||
-- =====================================================
|
||||
CREATE TABLE IF NOT EXISTS hydration_runs (
|
||||
id SERIAL PRIMARY KEY,
|
||||
worker_id VARCHAR(100) NOT NULL,
|
||||
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
finished_at TIMESTAMPTZ,
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'running', -- running, completed, failed
|
||||
|
||||
-- Metrics
|
||||
payloads_processed INTEGER DEFAULT 0,
|
||||
products_upserted INTEGER DEFAULT 0,
|
||||
snapshots_created INTEGER DEFAULT 0,
|
||||
brands_created INTEGER DEFAULT 0,
|
||||
errors_count INTEGER DEFAULT 0,
|
||||
|
||||
-- Error details
|
||||
error_message TEXT,
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_hydration_runs_status
|
||||
ON hydration_runs(status, started_at DESC);
|
||||
|
||||
-- =====================================================
|
||||
-- 5) COMMENTS
|
||||
-- =====================================================
|
||||
COMMENT ON TABLE raw_payloads IS 'Immutable event stream of raw crawler responses. NEVER DELETE.';
|
||||
COMMENT ON COLUMN raw_payloads.raw_json IS 'Complete raw JSON from GraphQL/API response. Immutable.';
|
||||
COMMENT ON COLUMN raw_payloads.payload_version IS 'Schema version for normalization compatibility.';
|
||||
COMMENT ON COLUMN raw_payloads.processed IS 'TRUE when payload has been hydrated to canonical tables.';
|
||||
COMMENT ON COLUMN raw_payloads.normalized_at IS 'When the payload was successfully hydrated.';
|
||||
|
||||
COMMENT ON TABLE hydration_locks IS 'Distributed locks for hydration workers to prevent double-processing.';
|
||||
COMMENT ON TABLE hydration_runs IS 'Audit trail of hydration job executions.';
|
||||
|
||||
-- =====================================================
|
||||
-- MIGRATION COMPLETE
|
||||
-- =====================================================
|
||||
473
backend/migrations/047_analytics_infrastructure.sql
Normal file
473
backend/migrations/047_analytics_infrastructure.sql
Normal file
@@ -0,0 +1,473 @@
|
||||
-- Migration 047: Analytics Infrastructure
|
||||
-- Phase 3: Analytics Dashboards for CannaiQ
|
||||
-- Creates views, functions, and tables for price trends, brand penetration, category growth, etc.
|
||||
|
||||
-- ============================================================
|
||||
-- ANALYTICS CACHE TABLE (for expensive query results)
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS analytics_cache (
|
||||
id SERIAL PRIMARY KEY,
|
||||
cache_key VARCHAR(255) NOT NULL UNIQUE,
|
||||
cache_data JSONB NOT NULL,
|
||||
computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
expires_at TIMESTAMPTZ NOT NULL,
|
||||
query_time_ms INTEGER,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_analytics_cache_key ON analytics_cache(cache_key);
|
||||
CREATE INDEX IF NOT EXISTS idx_analytics_cache_expires ON analytics_cache(expires_at);
|
||||
|
||||
-- ============================================================
|
||||
-- PRICE EXTRACTION HELPER FUNCTION
|
||||
-- Extracts pricing from JSONB latest_raw_payload
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION extract_min_price(payload JSONB)
|
||||
RETURNS NUMERIC AS $$
|
||||
DECLARE
|
||||
prices JSONB;
|
||||
min_val NUMERIC;
|
||||
BEGIN
|
||||
-- Try recPrices first (retail prices)
|
||||
prices := payload->'recPrices';
|
||||
IF prices IS NOT NULL AND jsonb_array_length(prices) > 0 THEN
|
||||
SELECT MIN(value::NUMERIC) INTO min_val FROM jsonb_array_elements_text(prices) AS value WHERE value ~ '^[0-9.]+$';
|
||||
IF min_val IS NOT NULL THEN RETURN min_val; END IF;
|
||||
END IF;
|
||||
|
||||
-- Try Prices array
|
||||
prices := payload->'Prices';
|
||||
IF prices IS NOT NULL AND jsonb_array_length(prices) > 0 THEN
|
||||
SELECT MIN(value::NUMERIC) INTO min_val FROM jsonb_array_elements_text(prices) AS value WHERE value ~ '^[0-9.]+$';
|
||||
IF min_val IS NOT NULL THEN RETURN min_val; END IF;
|
||||
END IF;
|
||||
|
||||
RETURN NULL;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION extract_max_price(payload JSONB)
|
||||
RETURNS NUMERIC AS $$
|
||||
DECLARE
|
||||
prices JSONB;
|
||||
max_val NUMERIC;
|
||||
BEGIN
|
||||
prices := payload->'recPrices';
|
||||
IF prices IS NOT NULL AND jsonb_array_length(prices) > 0 THEN
|
||||
SELECT MAX(value::NUMERIC) INTO max_val FROM jsonb_array_elements_text(prices) AS value WHERE value ~ '^[0-9.]+$';
|
||||
IF max_val IS NOT NULL THEN RETURN max_val; END IF;
|
||||
END IF;
|
||||
|
||||
prices := payload->'Prices';
|
||||
IF prices IS NOT NULL AND jsonb_array_length(prices) > 0 THEN
|
||||
SELECT MAX(value::NUMERIC) INTO max_val FROM jsonb_array_elements_text(prices) AS value WHERE value ~ '^[0-9.]+$';
|
||||
IF max_val IS NOT NULL THEN RETURN max_val; END IF;
|
||||
END IF;
|
||||
|
||||
RETURN NULL;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION extract_wholesale_price(payload JSONB)
|
||||
RETURNS NUMERIC AS $$
|
||||
DECLARE
|
||||
prices JSONB;
|
||||
min_val NUMERIC;
|
||||
BEGIN
|
||||
prices := payload->'wholesalePrices';
|
||||
IF prices IS NOT NULL AND jsonb_array_length(prices) > 0 THEN
|
||||
SELECT MIN(value::NUMERIC) INTO min_val FROM jsonb_array_elements_text(prices) AS value WHERE value ~ '^[0-9.]+$';
|
||||
RETURN min_val;
|
||||
END IF;
|
||||
RETURN NULL;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: v_product_pricing
|
||||
-- Flattened view of products with extracted pricing
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_product_pricing AS
|
||||
SELECT
|
||||
dp.id,
|
||||
dp.dispensary_id,
|
||||
dp.name,
|
||||
dp.brand_name,
|
||||
dp.brand_id,
|
||||
dp.type as category,
|
||||
dp.subcategory,
|
||||
dp.strain_type,
|
||||
dp.stock_status,
|
||||
dp.status,
|
||||
d.name as store_name,
|
||||
d.city,
|
||||
d.state,
|
||||
extract_min_price(dp.latest_raw_payload) as min_price,
|
||||
extract_max_price(dp.latest_raw_payload) as max_price,
|
||||
extract_wholesale_price(dp.latest_raw_payload) as wholesale_price,
|
||||
dp.thc,
|
||||
dp.cbd,
|
||||
dp.updated_at,
|
||||
dp.created_at
|
||||
FROM dutchie_products dp
|
||||
JOIN dispensaries d ON dp.dispensary_id = d.id;
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: v_brand_store_presence
|
||||
-- Which brands are in which stores
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_brand_store_presence AS
|
||||
SELECT
|
||||
dp.brand_name,
|
||||
dp.brand_id,
|
||||
dp.dispensary_id,
|
||||
d.name as store_name,
|
||||
d.city,
|
||||
d.state,
|
||||
dp.type as category,
|
||||
COUNT(*) as sku_count,
|
||||
AVG(extract_min_price(dp.latest_raw_payload)) as avg_price,
|
||||
SUM(CASE WHEN dp.stock_status = 'in_stock' THEN 1 ELSE 0 END) as in_stock_count,
|
||||
MAX(dp.updated_at) as last_updated
|
||||
FROM dutchie_products dp
|
||||
JOIN dispensaries d ON dp.dispensary_id = d.id
|
||||
WHERE dp.brand_name IS NOT NULL
|
||||
GROUP BY dp.brand_name, dp.brand_id, dp.dispensary_id, d.name, d.city, d.state, dp.type;
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: v_category_store_summary
|
||||
-- Category breakdown per store
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_category_store_summary AS
|
||||
SELECT
|
||||
dp.dispensary_id,
|
||||
d.name as store_name,
|
||||
d.city,
|
||||
d.state,
|
||||
dp.type as category,
|
||||
COUNT(*) as sku_count,
|
||||
COUNT(DISTINCT dp.brand_name) as brand_count,
|
||||
AVG(extract_min_price(dp.latest_raw_payload)) as avg_price,
|
||||
MIN(extract_min_price(dp.latest_raw_payload)) as min_price,
|
||||
MAX(extract_max_price(dp.latest_raw_payload)) as max_price,
|
||||
SUM(CASE WHEN dp.stock_status = 'in_stock' THEN 1 ELSE 0 END) as in_stock_count
|
||||
FROM dutchie_products dp
|
||||
JOIN dispensaries d ON dp.dispensary_id = d.id
|
||||
WHERE dp.type IS NOT NULL
|
||||
GROUP BY dp.dispensary_id, d.name, d.city, d.state, dp.type;
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: v_brand_summary
|
||||
-- Global brand statistics
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_brand_summary AS
|
||||
SELECT
|
||||
dp.brand_name,
|
||||
dp.brand_id,
|
||||
COUNT(*) as total_skus,
|
||||
COUNT(DISTINCT dp.dispensary_id) as store_count,
|
||||
COUNT(DISTINCT dp.type) as category_count,
|
||||
AVG(extract_min_price(dp.latest_raw_payload)) as avg_price,
|
||||
MIN(extract_min_price(dp.latest_raw_payload)) as min_price,
|
||||
MAX(extract_max_price(dp.latest_raw_payload)) as max_price,
|
||||
SUM(CASE WHEN dp.stock_status = 'in_stock' THEN 1 ELSE 0 END) as in_stock_skus,
|
||||
ARRAY_AGG(DISTINCT dp.type) FILTER (WHERE dp.type IS NOT NULL) as categories,
|
||||
MAX(dp.updated_at) as last_updated
|
||||
FROM dutchie_products dp
|
||||
WHERE dp.brand_name IS NOT NULL
|
||||
GROUP BY dp.brand_name, dp.brand_id
|
||||
ORDER BY total_skus DESC;
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: v_category_summary
|
||||
-- Global category statistics
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_category_summary AS
|
||||
SELECT
|
||||
dp.type as category,
|
||||
COUNT(*) as total_skus,
|
||||
COUNT(DISTINCT dp.brand_name) as brand_count,
|
||||
COUNT(DISTINCT dp.dispensary_id) as store_count,
|
||||
AVG(extract_min_price(dp.latest_raw_payload)) as avg_price,
|
||||
MIN(extract_min_price(dp.latest_raw_payload)) as min_price,
|
||||
MAX(extract_max_price(dp.latest_raw_payload)) as max_price,
|
||||
SUM(CASE WHEN dp.stock_status = 'in_stock' THEN 1 ELSE 0 END) as in_stock_skus
|
||||
FROM dutchie_products dp
|
||||
WHERE dp.type IS NOT NULL
|
||||
GROUP BY dp.type
|
||||
ORDER BY total_skus DESC;
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: v_store_summary
|
||||
-- Store-level statistics
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_store_summary AS
|
||||
SELECT
|
||||
d.id as store_id,
|
||||
d.name as store_name,
|
||||
d.city,
|
||||
d.state,
|
||||
d.chain_id,
|
||||
COUNT(dp.id) as total_skus,
|
||||
COUNT(DISTINCT dp.brand_name) as brand_count,
|
||||
COUNT(DISTINCT dp.type) as category_count,
|
||||
AVG(extract_min_price(dp.latest_raw_payload)) as avg_price,
|
||||
SUM(CASE WHEN dp.stock_status = 'in_stock' THEN 1 ELSE 0 END) as in_stock_skus,
|
||||
d.last_crawl_at,
|
||||
d.product_count
|
||||
FROM dispensaries d
|
||||
LEFT JOIN dutchie_products dp ON d.id = dp.dispensary_id
|
||||
GROUP BY d.id, d.name, d.city, d.state, d.chain_id, d.last_crawl_at, d.product_count;
|
||||
|
||||
-- ============================================================
|
||||
-- TABLE: brand_snapshots (for historical brand tracking)
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS brand_snapshots (
|
||||
id SERIAL PRIMARY KEY,
|
||||
brand_name VARCHAR(255) NOT NULL,
|
||||
brand_id VARCHAR(255),
|
||||
snapshot_date DATE NOT NULL,
|
||||
store_count INTEGER NOT NULL DEFAULT 0,
|
||||
total_skus INTEGER NOT NULL DEFAULT 0,
|
||||
avg_price NUMERIC(10,2),
|
||||
in_stock_skus INTEGER NOT NULL DEFAULT 0,
|
||||
categories TEXT[],
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
UNIQUE(brand_name, snapshot_date)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_brand_snapshots_brand ON brand_snapshots(brand_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_brand_snapshots_date ON brand_snapshots(snapshot_date);
|
||||
|
||||
-- ============================================================
|
||||
-- TABLE: category_snapshots (for historical category tracking)
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS category_snapshots (
|
||||
id SERIAL PRIMARY KEY,
|
||||
category VARCHAR(255) NOT NULL,
|
||||
snapshot_date DATE NOT NULL,
|
||||
store_count INTEGER NOT NULL DEFAULT 0,
|
||||
brand_count INTEGER NOT NULL DEFAULT 0,
|
||||
total_skus INTEGER NOT NULL DEFAULT 0,
|
||||
avg_price NUMERIC(10,2),
|
||||
in_stock_skus INTEGER NOT NULL DEFAULT 0,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
UNIQUE(category, snapshot_date)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_category_snapshots_cat ON category_snapshots(category);
|
||||
CREATE INDEX IF NOT EXISTS idx_category_snapshots_date ON category_snapshots(snapshot_date);
|
||||
|
||||
-- ============================================================
|
||||
-- TABLE: store_change_events (for tracking store changes)
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS store_change_events (
|
||||
id SERIAL PRIMARY KEY,
|
||||
store_id INTEGER NOT NULL REFERENCES dispensaries(id),
|
||||
event_type VARCHAR(50) NOT NULL, -- brand_added, brand_removed, product_added, product_removed, price_change, stock_change
|
||||
event_date DATE NOT NULL,
|
||||
brand_name VARCHAR(255),
|
||||
product_id INTEGER,
|
||||
product_name VARCHAR(500),
|
||||
category VARCHAR(255),
|
||||
old_value TEXT,
|
||||
new_value TEXT,
|
||||
metadata JSONB,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_store_events_store ON store_change_events(store_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_events_type ON store_change_events(event_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_events_date ON store_change_events(event_date);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_events_brand ON store_change_events(brand_name);
|
||||
|
||||
-- ============================================================
|
||||
-- TABLE: analytics_alerts
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS analytics_alerts (
|
||||
id SERIAL PRIMARY KEY,
|
||||
alert_type VARCHAR(50) NOT NULL, -- price_warning, brand_dropped, competitive_intrusion, restock_event
|
||||
severity VARCHAR(20) NOT NULL DEFAULT 'info', -- info, warning, critical
|
||||
title VARCHAR(255) NOT NULL,
|
||||
description TEXT,
|
||||
store_id INTEGER REFERENCES dispensaries(id),
|
||||
brand_name VARCHAR(255),
|
||||
product_id INTEGER,
|
||||
category VARCHAR(255),
|
||||
metadata JSONB,
|
||||
is_read BOOLEAN DEFAULT FALSE,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_analytics_alerts_type ON analytics_alerts(alert_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_analytics_alerts_read ON analytics_alerts(is_read);
|
||||
CREATE INDEX IF NOT EXISTS idx_analytics_alerts_created ON analytics_alerts(created_at DESC);
|
||||
|
||||
-- ============================================================
|
||||
-- FUNCTION: Capture daily brand snapshots
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION capture_brand_snapshots()
|
||||
RETURNS INTEGER AS $$
|
||||
DECLARE
|
||||
inserted_count INTEGER;
|
||||
BEGIN
|
||||
INSERT INTO brand_snapshots (brand_name, brand_id, snapshot_date, store_count, total_skus, avg_price, in_stock_skus, categories)
|
||||
SELECT
|
||||
brand_name,
|
||||
brand_id,
|
||||
CURRENT_DATE,
|
||||
COUNT(DISTINCT dispensary_id),
|
||||
COUNT(*),
|
||||
AVG(extract_min_price(latest_raw_payload)),
|
||||
SUM(CASE WHEN stock_status = 'in_stock' THEN 1 ELSE 0 END),
|
||||
ARRAY_AGG(DISTINCT type) FILTER (WHERE type IS NOT NULL)
|
||||
FROM dutchie_products
|
||||
WHERE brand_name IS NOT NULL
|
||||
GROUP BY brand_name, brand_id
|
||||
ON CONFLICT (brand_name, snapshot_date)
|
||||
DO UPDATE SET
|
||||
store_count = EXCLUDED.store_count,
|
||||
total_skus = EXCLUDED.total_skus,
|
||||
avg_price = EXCLUDED.avg_price,
|
||||
in_stock_skus = EXCLUDED.in_stock_skus,
|
||||
categories = EXCLUDED.categories;
|
||||
|
||||
GET DIAGNOSTICS inserted_count = ROW_COUNT;
|
||||
RETURN inserted_count;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- FUNCTION: Capture daily category snapshots
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION capture_category_snapshots()
|
||||
RETURNS INTEGER AS $$
|
||||
DECLARE
|
||||
inserted_count INTEGER;
|
||||
BEGIN
|
||||
INSERT INTO category_snapshots (category, snapshot_date, store_count, brand_count, total_skus, avg_price, in_stock_skus)
|
||||
SELECT
|
||||
type,
|
||||
CURRENT_DATE,
|
||||
COUNT(DISTINCT dispensary_id),
|
||||
COUNT(DISTINCT brand_name),
|
||||
COUNT(*),
|
||||
AVG(extract_min_price(latest_raw_payload)),
|
||||
SUM(CASE WHEN stock_status = 'in_stock' THEN 1 ELSE 0 END)
|
||||
FROM dutchie_products
|
||||
WHERE type IS NOT NULL
|
||||
GROUP BY type
|
||||
ON CONFLICT (category, snapshot_date)
|
||||
DO UPDATE SET
|
||||
store_count = EXCLUDED.store_count,
|
||||
brand_count = EXCLUDED.brand_count,
|
||||
total_skus = EXCLUDED.total_skus,
|
||||
avg_price = EXCLUDED.avg_price,
|
||||
in_stock_skus = EXCLUDED.in_stock_skus;
|
||||
|
||||
GET DIAGNOSTICS inserted_count = ROW_COUNT;
|
||||
RETURN inserted_count;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- FUNCTION: Calculate price volatility for a product
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION calculate_price_volatility(
|
||||
p_product_id INTEGER,
|
||||
p_days INTEGER DEFAULT 30
|
||||
)
|
||||
RETURNS NUMERIC AS $$
|
||||
DECLARE
|
||||
std_dev NUMERIC;
|
||||
avg_price NUMERIC;
|
||||
BEGIN
|
||||
-- Using dutchie_product_snapshots if available
|
||||
SELECT
|
||||
STDDEV(rec_min_price_cents / 100.0),
|
||||
AVG(rec_min_price_cents / 100.0)
|
||||
INTO std_dev, avg_price
|
||||
FROM dutchie_product_snapshots
|
||||
WHERE dutchie_product_id = p_product_id
|
||||
AND crawled_at >= NOW() - (p_days || ' days')::INTERVAL
|
||||
AND rec_min_price_cents IS NOT NULL;
|
||||
|
||||
IF avg_price IS NULL OR avg_price = 0 THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- Return coefficient of variation (CV)
|
||||
RETURN ROUND((std_dev / avg_price) * 100, 2);
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- FUNCTION: Get brand penetration stats
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION get_brand_penetration(
|
||||
p_brand_name VARCHAR,
|
||||
p_state VARCHAR DEFAULT NULL
|
||||
)
|
||||
RETURNS TABLE (
|
||||
total_stores BIGINT,
|
||||
stores_carrying BIGINT,
|
||||
penetration_pct NUMERIC,
|
||||
total_skus BIGINT,
|
||||
avg_skus_per_store NUMERIC,
|
||||
shelf_share_pct NUMERIC
|
||||
) AS $$
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
WITH store_counts AS (
|
||||
SELECT
|
||||
COUNT(DISTINCT d.id) as total,
|
||||
COUNT(DISTINCT CASE WHEN dp.brand_name = p_brand_name THEN dp.dispensary_id END) as carrying
|
||||
FROM dispensaries d
|
||||
LEFT JOIN dutchie_products dp ON d.id = dp.dispensary_id
|
||||
WHERE (p_state IS NULL OR d.state = p_state)
|
||||
),
|
||||
sku_counts AS (
|
||||
SELECT
|
||||
COUNT(*) as brand_skus,
|
||||
COUNT(DISTINCT dispensary_id) as stores_with_brand
|
||||
FROM dutchie_products
|
||||
WHERE brand_name = p_brand_name
|
||||
),
|
||||
total_skus AS (
|
||||
SELECT COUNT(*) as total FROM dutchie_products dp
|
||||
JOIN dispensaries d ON dp.dispensary_id = d.id
|
||||
WHERE (p_state IS NULL OR d.state = p_state)
|
||||
)
|
||||
SELECT
|
||||
sc.total,
|
||||
sc.carrying,
|
||||
ROUND((sc.carrying::NUMERIC / NULLIF(sc.total, 0)) * 100, 2),
|
||||
skc.brand_skus,
|
||||
ROUND(skc.brand_skus::NUMERIC / NULLIF(skc.stores_with_brand, 0), 2),
|
||||
ROUND((skc.brand_skus::NUMERIC / NULLIF(ts.total, 0)) * 100, 2)
|
||||
FROM store_counts sc, sku_counts skc, total_skus ts;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- Initial snapshot capture (run manually if needed)
|
||||
-- ============================================================
|
||||
-- Note: Run these after migration to capture initial snapshots:
|
||||
-- SELECT capture_brand_snapshots();
|
||||
-- SELECT capture_category_snapshots();
|
||||
|
||||
-- ============================================================
|
||||
-- Grant permissions
|
||||
-- ============================================================
|
||||
-- Views are accessible to all roles by default
|
||||
|
||||
COMMENT ON VIEW v_product_pricing IS 'Flattened product view with extracted pricing from JSONB';
|
||||
COMMENT ON VIEW v_brand_store_presence IS 'Brand presence across stores with SKU counts';
|
||||
COMMENT ON VIEW v_brand_summary IS 'Global brand statistics';
|
||||
COMMENT ON VIEW v_category_summary IS 'Global category statistics';
|
||||
COMMENT ON VIEW v_store_summary IS 'Store-level statistics';
|
||||
COMMENT ON TABLE analytics_cache IS 'Cache for expensive analytics queries';
|
||||
COMMENT ON TABLE brand_snapshots IS 'Historical daily snapshots of brand metrics';
|
||||
COMMENT ON TABLE category_snapshots IS 'Historical daily snapshots of category metrics';
|
||||
COMMENT ON TABLE store_change_events IS 'Log of brand/product changes at stores';
|
||||
COMMENT ON TABLE analytics_alerts IS 'Analytics-generated alerts and notifications';
|
||||
598
backend/migrations/048_production_sync_monitoring.sql
Normal file
598
backend/migrations/048_production_sync_monitoring.sql
Normal file
@@ -0,0 +1,598 @@
|
||||
-- Migration 048: Production Sync + Monitoring Infrastructure
|
||||
-- Phase 5: Full Production Sync + Monitoring
|
||||
--
|
||||
-- Creates:
|
||||
-- 1. Sync orchestrator tables
|
||||
-- 2. Dead-letter queue (DLQ)
|
||||
-- 3. System metrics tracking
|
||||
-- 4. Integrity check results
|
||||
-- 5. Auto-fix audit log
|
||||
|
||||
-- ============================================================
|
||||
-- SYNC ORCHESTRATOR TABLES
|
||||
-- ============================================================
|
||||
|
||||
-- Orchestrator state and control
|
||||
CREATE TABLE IF NOT EXISTS sync_orchestrator_state (
|
||||
id INTEGER PRIMARY KEY DEFAULT 1 CHECK (id = 1), -- Singleton row
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'SLEEPING', -- RUNNING, SLEEPING, LOCKED, PAUSED
|
||||
current_worker_id VARCHAR(100),
|
||||
last_heartbeat_at TIMESTAMPTZ,
|
||||
last_run_started_at TIMESTAMPTZ,
|
||||
last_run_completed_at TIMESTAMPTZ,
|
||||
last_run_duration_ms INTEGER,
|
||||
last_run_payloads_processed INTEGER DEFAULT 0,
|
||||
last_run_errors INTEGER DEFAULT 0,
|
||||
consecutive_failures INTEGER DEFAULT 0,
|
||||
is_paused BOOLEAN DEFAULT FALSE,
|
||||
pause_reason TEXT,
|
||||
config JSONB DEFAULT '{
|
||||
"batchSize": 50,
|
||||
"pollIntervalMs": 5000,
|
||||
"maxRetries": 3,
|
||||
"lockTimeoutMs": 300000,
|
||||
"enableAnalyticsPrecompute": true,
|
||||
"enableIntegrityChecks": true
|
||||
}'::jsonb,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Insert singleton row if not exists
|
||||
INSERT INTO sync_orchestrator_state (id) VALUES (1) ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
-- Sync run history
|
||||
CREATE TABLE IF NOT EXISTS sync_runs (
|
||||
id SERIAL PRIMARY KEY,
|
||||
run_id UUID DEFAULT gen_random_uuid() UNIQUE NOT NULL,
|
||||
worker_id VARCHAR(100) NOT NULL,
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'running', -- running, completed, failed, cancelled
|
||||
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
finished_at TIMESTAMPTZ,
|
||||
duration_ms INTEGER,
|
||||
|
||||
-- Metrics
|
||||
payloads_queued INTEGER DEFAULT 0,
|
||||
payloads_processed INTEGER DEFAULT 0,
|
||||
payloads_skipped INTEGER DEFAULT 0,
|
||||
payloads_failed INTEGER DEFAULT 0,
|
||||
payloads_dlq INTEGER DEFAULT 0,
|
||||
|
||||
products_upserted INTEGER DEFAULT 0,
|
||||
products_inserted INTEGER DEFAULT 0,
|
||||
products_updated INTEGER DEFAULT 0,
|
||||
products_discontinued INTEGER DEFAULT 0,
|
||||
|
||||
snapshots_created INTEGER DEFAULT 0,
|
||||
|
||||
-- Error tracking
|
||||
errors JSONB DEFAULT '[]'::jsonb,
|
||||
error_summary TEXT,
|
||||
|
||||
-- Diff stats (before/after)
|
||||
diff_stats JSONB DEFAULT '{}'::jsonb,
|
||||
|
||||
-- Analytics precompute triggered
|
||||
analytics_updated BOOLEAN DEFAULT FALSE,
|
||||
analytics_duration_ms INTEGER,
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_sync_runs_status ON sync_runs(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_sync_runs_started_at ON sync_runs(started_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_sync_runs_run_id ON sync_runs(run_id);
|
||||
|
||||
-- ============================================================
|
||||
-- DEAD-LETTER QUEUE (DLQ)
|
||||
-- ============================================================
|
||||
|
||||
-- DLQ for failed payloads
|
||||
CREATE TABLE IF NOT EXISTS raw_payloads_dlq (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
original_payload_id UUID NOT NULL,
|
||||
dispensary_id INTEGER REFERENCES dispensaries(id),
|
||||
state_code VARCHAR(2),
|
||||
platform VARCHAR(50) DEFAULT 'dutchie',
|
||||
|
||||
-- Original payload data (preserved)
|
||||
raw_json JSONB NOT NULL,
|
||||
product_count INTEGER,
|
||||
pricing_type VARCHAR(10),
|
||||
crawl_mode VARCHAR(20),
|
||||
|
||||
-- DLQ metadata
|
||||
moved_to_dlq_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
failure_count INTEGER DEFAULT 0,
|
||||
|
||||
-- Error history (array of error objects)
|
||||
error_history JSONB DEFAULT '[]'::jsonb,
|
||||
last_error_type VARCHAR(50),
|
||||
last_error_message TEXT,
|
||||
last_error_at TIMESTAMPTZ,
|
||||
|
||||
-- Retry tracking
|
||||
retry_count INTEGER DEFAULT 0,
|
||||
last_retry_at TIMESTAMPTZ,
|
||||
next_retry_at TIMESTAMPTZ,
|
||||
|
||||
-- Resolution
|
||||
status VARCHAR(20) DEFAULT 'pending', -- pending, retrying, resolved, abandoned
|
||||
resolved_at TIMESTAMPTZ,
|
||||
resolved_by VARCHAR(100),
|
||||
resolution_notes TEXT,
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dlq_status ON raw_payloads_dlq(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_dlq_dispensary ON raw_payloads_dlq(dispensary_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_dlq_error_type ON raw_payloads_dlq(last_error_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_dlq_moved_at ON raw_payloads_dlq(moved_to_dlq_at DESC);
|
||||
|
||||
-- ============================================================
|
||||
-- SYSTEM METRICS
|
||||
-- ============================================================
|
||||
|
||||
-- System metrics time series
|
||||
CREATE TABLE IF NOT EXISTS system_metrics (
|
||||
id SERIAL PRIMARY KEY,
|
||||
metric_name VARCHAR(100) NOT NULL,
|
||||
metric_value NUMERIC NOT NULL,
|
||||
labels JSONB DEFAULT '{}',
|
||||
recorded_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_metrics_name_time ON system_metrics(metric_name, recorded_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_metrics_recorded_at ON system_metrics(recorded_at DESC);
|
||||
|
||||
-- Metrics snapshot (current state, updated continuously)
|
||||
CREATE TABLE IF NOT EXISTS system_metrics_current (
|
||||
metric_name VARCHAR(100) PRIMARY KEY,
|
||||
metric_value NUMERIC NOT NULL,
|
||||
labels JSONB DEFAULT '{}',
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Error buckets for classification
|
||||
CREATE TABLE IF NOT EXISTS error_buckets (
|
||||
id SERIAL PRIMARY KEY,
|
||||
error_type VARCHAR(50) NOT NULL,
|
||||
error_message TEXT,
|
||||
source_table VARCHAR(50),
|
||||
source_id TEXT,
|
||||
dispensary_id INTEGER,
|
||||
state_code VARCHAR(2),
|
||||
context JSONB DEFAULT '{}',
|
||||
occurred_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
acknowledged BOOLEAN DEFAULT FALSE,
|
||||
acknowledged_at TIMESTAMPTZ,
|
||||
acknowledged_by VARCHAR(100)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_error_buckets_type ON error_buckets(error_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_error_buckets_occurred ON error_buckets(occurred_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_error_buckets_unacked ON error_buckets(acknowledged) WHERE acknowledged = FALSE;
|
||||
|
||||
-- ============================================================
|
||||
-- INTEGRITY CHECK RESULTS
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS integrity_check_runs (
|
||||
id SERIAL PRIMARY KEY,
|
||||
run_id UUID DEFAULT gen_random_uuid() UNIQUE NOT NULL,
|
||||
check_type VARCHAR(50) NOT NULL, -- daily, on_demand, scheduled
|
||||
triggered_by VARCHAR(100),
|
||||
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
finished_at TIMESTAMPTZ,
|
||||
status VARCHAR(20) DEFAULT 'running', -- running, completed, failed
|
||||
|
||||
-- Results summary
|
||||
total_checks INTEGER DEFAULT 0,
|
||||
passed_checks INTEGER DEFAULT 0,
|
||||
failed_checks INTEGER DEFAULT 0,
|
||||
warning_checks INTEGER DEFAULT 0,
|
||||
|
||||
-- Detailed results
|
||||
results JSONB DEFAULT '[]'::jsonb,
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_integrity_runs_status ON integrity_check_runs(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_integrity_runs_started ON integrity_check_runs(started_at DESC);
|
||||
|
||||
-- Individual integrity check results
|
||||
CREATE TABLE IF NOT EXISTS integrity_check_results (
|
||||
id SERIAL PRIMARY KEY,
|
||||
run_id UUID REFERENCES integrity_check_runs(run_id) ON DELETE CASCADE,
|
||||
check_name VARCHAR(100) NOT NULL,
|
||||
check_category VARCHAR(50) NOT NULL,
|
||||
status VARCHAR(20) NOT NULL, -- passed, failed, warning, skipped
|
||||
|
||||
-- Check details
|
||||
expected_value TEXT,
|
||||
actual_value TEXT,
|
||||
difference TEXT,
|
||||
affected_count INTEGER DEFAULT 0,
|
||||
|
||||
-- Context
|
||||
details JSONB DEFAULT '{}',
|
||||
affected_ids JSONB DEFAULT '[]'::jsonb,
|
||||
|
||||
-- Remediation
|
||||
can_auto_fix BOOLEAN DEFAULT FALSE,
|
||||
fix_routine VARCHAR(100),
|
||||
|
||||
checked_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_integrity_results_run ON integrity_check_results(run_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_integrity_results_status ON integrity_check_results(status);
|
||||
|
||||
-- ============================================================
|
||||
-- AUTO-FIX AUDIT LOG
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS auto_fix_runs (
|
||||
id SERIAL PRIMARY KEY,
|
||||
run_id UUID DEFAULT gen_random_uuid() UNIQUE NOT NULL,
|
||||
routine_name VARCHAR(100) NOT NULL,
|
||||
triggered_by VARCHAR(100) NOT NULL,
|
||||
trigger_type VARCHAR(20) NOT NULL, -- manual, auto, scheduled
|
||||
|
||||
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
finished_at TIMESTAMPTZ,
|
||||
status VARCHAR(20) DEFAULT 'running', -- running, completed, failed, rolled_back
|
||||
|
||||
-- What was changed
|
||||
rows_affected INTEGER DEFAULT 0,
|
||||
changes JSONB DEFAULT '[]'::jsonb,
|
||||
|
||||
-- Dry run support
|
||||
is_dry_run BOOLEAN DEFAULT FALSE,
|
||||
dry_run_preview JSONB,
|
||||
|
||||
-- Error handling
|
||||
error_message TEXT,
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_fix_runs_routine ON auto_fix_runs(routine_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_fix_runs_started ON auto_fix_runs(started_at DESC);
|
||||
|
||||
-- ============================================================
|
||||
-- ALERTS TABLE
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS system_alerts (
|
||||
id SERIAL PRIMARY KEY,
|
||||
alert_type VARCHAR(50) NOT NULL,
|
||||
severity VARCHAR(20) NOT NULL, -- info, warning, error, critical
|
||||
title VARCHAR(255) NOT NULL,
|
||||
message TEXT,
|
||||
source VARCHAR(100),
|
||||
|
||||
-- Context
|
||||
context JSONB DEFAULT '{}',
|
||||
|
||||
-- State
|
||||
status VARCHAR(20) DEFAULT 'active', -- active, acknowledged, resolved, muted
|
||||
acknowledged_at TIMESTAMPTZ,
|
||||
acknowledged_by VARCHAR(100),
|
||||
resolved_at TIMESTAMPTZ,
|
||||
resolved_by VARCHAR(100),
|
||||
|
||||
-- Deduplication
|
||||
fingerprint VARCHAR(64), -- Hash for dedup
|
||||
occurrence_count INTEGER DEFAULT 1,
|
||||
first_occurred_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
last_occurred_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_alerts_status ON system_alerts(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_alerts_severity ON system_alerts(severity);
|
||||
CREATE INDEX IF NOT EXISTS idx_alerts_type ON system_alerts(alert_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_alerts_fingerprint ON system_alerts(fingerprint);
|
||||
CREATE INDEX IF NOT EXISTS idx_alerts_active ON system_alerts(status, created_at DESC) WHERE status = 'active';
|
||||
|
||||
-- ============================================================
|
||||
-- HELPER VIEWS
|
||||
-- ============================================================
|
||||
|
||||
-- Current sync status view
|
||||
CREATE OR REPLACE VIEW v_sync_status AS
|
||||
SELECT
|
||||
sos.status as orchestrator_status,
|
||||
sos.current_worker_id,
|
||||
sos.last_heartbeat_at,
|
||||
sos.is_paused,
|
||||
sos.pause_reason,
|
||||
sos.consecutive_failures,
|
||||
sos.last_run_started_at,
|
||||
sos.last_run_completed_at,
|
||||
sos.last_run_duration_ms,
|
||||
sos.last_run_payloads_processed,
|
||||
sos.last_run_errors,
|
||||
sos.config,
|
||||
(SELECT COUNT(*) FROM raw_payloads WHERE processed = FALSE) as unprocessed_payloads,
|
||||
(SELECT COUNT(*) FROM raw_payloads_dlq WHERE status = 'pending') as dlq_pending,
|
||||
(SELECT COUNT(*) FROM system_alerts WHERE status = 'active') as active_alerts,
|
||||
(
|
||||
SELECT json_build_object(
|
||||
'total', COUNT(*),
|
||||
'completed', COUNT(*) FILTER (WHERE status = 'completed'),
|
||||
'failed', COUNT(*) FILTER (WHERE status = 'failed')
|
||||
)
|
||||
FROM sync_runs
|
||||
WHERE started_at >= NOW() - INTERVAL '24 hours'
|
||||
) as runs_24h
|
||||
FROM sync_orchestrator_state sos
|
||||
WHERE sos.id = 1;
|
||||
|
||||
-- DLQ summary view
|
||||
CREATE OR REPLACE VIEW v_dlq_summary AS
|
||||
SELECT
|
||||
status,
|
||||
last_error_type,
|
||||
COUNT(*) as count,
|
||||
MIN(moved_to_dlq_at) as oldest,
|
||||
MAX(moved_to_dlq_at) as newest
|
||||
FROM raw_payloads_dlq
|
||||
GROUP BY status, last_error_type
|
||||
ORDER BY count DESC;
|
||||
|
||||
-- Error bucket summary (last 24h)
|
||||
CREATE OR REPLACE VIEW v_error_summary AS
|
||||
SELECT
|
||||
error_type,
|
||||
COUNT(*) as count,
|
||||
COUNT(*) FILTER (WHERE acknowledged = FALSE) as unacknowledged,
|
||||
MIN(occurred_at) as first_occurred,
|
||||
MAX(occurred_at) as last_occurred
|
||||
FROM error_buckets
|
||||
WHERE occurred_at >= NOW() - INTERVAL '24 hours'
|
||||
GROUP BY error_type
|
||||
ORDER BY count DESC;
|
||||
|
||||
-- Metrics summary view
|
||||
CREATE OR REPLACE VIEW v_metrics_summary AS
|
||||
SELECT
|
||||
metric_name,
|
||||
metric_value,
|
||||
labels,
|
||||
updated_at,
|
||||
NOW() - updated_at as age
|
||||
FROM system_metrics_current
|
||||
ORDER BY metric_name;
|
||||
|
||||
-- ============================================================
|
||||
-- HELPER FUNCTIONS
|
||||
-- ============================================================
|
||||
|
||||
-- Record a metric
|
||||
CREATE OR REPLACE FUNCTION record_metric(
|
||||
p_name VARCHAR(100),
|
||||
p_value NUMERIC,
|
||||
p_labels JSONB DEFAULT '{}'
|
||||
) RETURNS VOID AS $$
|
||||
BEGIN
|
||||
-- Insert into time series
|
||||
INSERT INTO system_metrics (metric_name, metric_value, labels)
|
||||
VALUES (p_name, p_value, p_labels);
|
||||
|
||||
-- Upsert current value
|
||||
INSERT INTO system_metrics_current (metric_name, metric_value, labels, updated_at)
|
||||
VALUES (p_name, p_value, p_labels, NOW())
|
||||
ON CONFLICT (metric_name) DO UPDATE SET
|
||||
metric_value = EXCLUDED.metric_value,
|
||||
labels = EXCLUDED.labels,
|
||||
updated_at = NOW();
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Record an error
|
||||
CREATE OR REPLACE FUNCTION record_error(
|
||||
p_type VARCHAR(50),
|
||||
p_message TEXT,
|
||||
p_source_table VARCHAR(50) DEFAULT NULL,
|
||||
p_source_id TEXT DEFAULT NULL,
|
||||
p_dispensary_id INTEGER DEFAULT NULL,
|
||||
p_context JSONB DEFAULT '{}'
|
||||
) RETURNS INTEGER AS $$
|
||||
DECLARE
|
||||
v_id INTEGER;
|
||||
BEGIN
|
||||
INSERT INTO error_buckets (
|
||||
error_type, error_message, source_table, source_id,
|
||||
dispensary_id, context
|
||||
)
|
||||
VALUES (
|
||||
p_type, p_message, p_source_table, p_source_id,
|
||||
p_dispensary_id, p_context
|
||||
)
|
||||
RETURNING id INTO v_id;
|
||||
|
||||
-- Update error count metric
|
||||
PERFORM record_metric(
|
||||
'error_count_' || p_type,
|
||||
COALESCE((SELECT metric_value FROM system_metrics_current WHERE metric_name = 'error_count_' || p_type), 0) + 1
|
||||
);
|
||||
|
||||
RETURN v_id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Create or update alert (with deduplication)
|
||||
CREATE OR REPLACE FUNCTION upsert_alert(
|
||||
p_type VARCHAR(50),
|
||||
p_severity VARCHAR(20),
|
||||
p_title VARCHAR(255),
|
||||
p_message TEXT DEFAULT NULL,
|
||||
p_source VARCHAR(100) DEFAULT NULL,
|
||||
p_context JSONB DEFAULT '{}'
|
||||
) RETURNS INTEGER AS $$
|
||||
DECLARE
|
||||
v_fingerprint VARCHAR(64);
|
||||
v_id INTEGER;
|
||||
BEGIN
|
||||
-- Generate fingerprint for dedup
|
||||
v_fingerprint := md5(p_type || p_title || COALESCE(p_source, ''));
|
||||
|
||||
-- Try to find existing active alert
|
||||
SELECT id INTO v_id
|
||||
FROM system_alerts
|
||||
WHERE fingerprint = v_fingerprint AND status = 'active';
|
||||
|
||||
IF v_id IS NOT NULL THEN
|
||||
-- Update existing alert
|
||||
UPDATE system_alerts
|
||||
SET occurrence_count = occurrence_count + 1,
|
||||
last_occurred_at = NOW(),
|
||||
context = p_context
|
||||
WHERE id = v_id;
|
||||
ELSE
|
||||
-- Create new alert
|
||||
INSERT INTO system_alerts (
|
||||
alert_type, severity, title, message, source, context, fingerprint
|
||||
)
|
||||
VALUES (
|
||||
p_type, p_severity, p_title, p_message, p_source, p_context, v_fingerprint
|
||||
)
|
||||
RETURNING id INTO v_id;
|
||||
END IF;
|
||||
|
||||
RETURN v_id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Move payload to DLQ
|
||||
CREATE OR REPLACE FUNCTION move_to_dlq(
|
||||
p_payload_id UUID,
|
||||
p_error_type VARCHAR(50),
|
||||
p_error_message TEXT
|
||||
) RETURNS UUID AS $$
|
||||
DECLARE
|
||||
v_dlq_id UUID;
|
||||
v_payload RECORD;
|
||||
BEGIN
|
||||
-- Get the original payload
|
||||
SELECT * INTO v_payload
|
||||
FROM raw_payloads
|
||||
WHERE id = p_payload_id;
|
||||
|
||||
IF v_payload IS NULL THEN
|
||||
RAISE EXCEPTION 'Payload not found: %', p_payload_id;
|
||||
END IF;
|
||||
|
||||
-- Insert into DLQ
|
||||
INSERT INTO raw_payloads_dlq (
|
||||
original_payload_id, dispensary_id, state_code, platform,
|
||||
raw_json, product_count, pricing_type, crawl_mode,
|
||||
failure_count, last_error_type, last_error_message, last_error_at,
|
||||
error_history
|
||||
)
|
||||
VALUES (
|
||||
p_payload_id, v_payload.dispensary_id,
|
||||
(SELECT state FROM dispensaries WHERE id = v_payload.dispensary_id),
|
||||
v_payload.platform,
|
||||
v_payload.raw_json, v_payload.product_count, v_payload.pricing_type, v_payload.crawl_mode,
|
||||
v_payload.hydration_attempts,
|
||||
p_error_type, p_error_message, NOW(),
|
||||
COALESCE(v_payload.hydration_error::jsonb, '[]'::jsonb) || jsonb_build_object(
|
||||
'type', p_error_type,
|
||||
'message', p_error_message,
|
||||
'at', NOW()
|
||||
)
|
||||
)
|
||||
RETURNING id INTO v_dlq_id;
|
||||
|
||||
-- Mark original as processed (moved to DLQ)
|
||||
UPDATE raw_payloads
|
||||
SET processed = TRUE,
|
||||
hydration_error = 'Moved to DLQ: ' || p_error_message
|
||||
WHERE id = p_payload_id;
|
||||
|
||||
-- Record metric
|
||||
PERFORM record_metric('payloads_dlq_total',
|
||||
COALESCE((SELECT metric_value FROM system_metrics_current WHERE metric_name = 'payloads_dlq_total'), 0) + 1
|
||||
);
|
||||
|
||||
-- Create alert for DLQ
|
||||
PERFORM upsert_alert(
|
||||
'DLQ_ARRIVAL',
|
||||
'warning',
|
||||
'Payload moved to Dead-Letter Queue',
|
||||
p_error_message,
|
||||
'hydration',
|
||||
jsonb_build_object('payload_id', p_payload_id, 'dlq_id', v_dlq_id, 'error_type', p_error_type)
|
||||
);
|
||||
|
||||
RETURN v_dlq_id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Cleanup old metrics (keep 7 days of time series)
|
||||
CREATE OR REPLACE FUNCTION cleanup_old_metrics() RETURNS INTEGER AS $$
|
||||
DECLARE
|
||||
v_deleted INTEGER;
|
||||
BEGIN
|
||||
DELETE FROM system_metrics
|
||||
WHERE recorded_at < NOW() - INTERVAL '7 days';
|
||||
|
||||
GET DIAGNOSTICS v_deleted = ROW_COUNT;
|
||||
RETURN v_deleted;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- ENSURE RAW_PAYLOADS HAS REQUIRED COLUMNS
|
||||
-- ============================================================
|
||||
|
||||
-- Add state column to raw_payloads if not exists
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'raw_payloads' AND column_name = 'state_code'
|
||||
) THEN
|
||||
ALTER TABLE raw_payloads ADD COLUMN state_code VARCHAR(2);
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- ============================================================
|
||||
-- INITIAL METRICS
|
||||
-- ============================================================
|
||||
|
||||
-- Initialize core metrics
|
||||
INSERT INTO system_metrics_current (metric_name, metric_value, labels)
|
||||
VALUES
|
||||
('payloads_unprocessed', 0, '{}'),
|
||||
('payloads_processed_today', 0, '{}'),
|
||||
('hydration_errors', 0, '{}'),
|
||||
('hydration_success_rate', 100, '{}'),
|
||||
('canonical_rows_inserted', 0, '{}'),
|
||||
('canonical_rows_updated', 0, '{}'),
|
||||
('canonical_rows_discontinued', 0, '{}'),
|
||||
('snapshot_volume', 0, '{}'),
|
||||
('ingestion_latency_avg_ms', 0, '{}'),
|
||||
('payloads_dlq_total', 0, '{}')
|
||||
ON CONFLICT (metric_name) DO NOTHING;
|
||||
|
||||
-- ============================================================
|
||||
-- COMMENTS
|
||||
-- ============================================================
|
||||
|
||||
COMMENT ON TABLE sync_orchestrator_state IS 'Singleton table tracking orchestrator status and config';
|
||||
COMMENT ON TABLE sync_runs IS 'History of sync runs with metrics';
|
||||
COMMENT ON TABLE raw_payloads_dlq IS 'Dead-letter queue for failed payloads';
|
||||
COMMENT ON TABLE system_metrics IS 'Time-series metrics storage';
|
||||
COMMENT ON TABLE system_metrics_current IS 'Current metric values (fast lookup)';
|
||||
COMMENT ON TABLE error_buckets IS 'Classified errors for monitoring';
|
||||
COMMENT ON TABLE integrity_check_runs IS 'Integrity check execution history';
|
||||
COMMENT ON TABLE integrity_check_results IS 'Individual check results';
|
||||
COMMENT ON TABLE auto_fix_runs IS 'Audit log for auto-fix routines';
|
||||
COMMENT ON TABLE system_alerts IS 'System alerts with deduplication';
|
||||
750
backend/migrations/050_cannaiq_canonical_v2.sql
Normal file
750
backend/migrations/050_cannaiq_canonical_v2.sql
Normal file
@@ -0,0 +1,750 @@
|
||||
-- ============================================================================
|
||||
-- Migration 050: CannaiQ Canonical Schema v2
|
||||
-- ============================================================================
|
||||
--
|
||||
-- Purpose: Add canonical tables for multi-state analytics, pricing engine,
|
||||
-- promotions, intelligence, and brand/buyer portals.
|
||||
--
|
||||
-- RULES:
|
||||
-- - STRICTLY ADDITIVE (no DROP, DELETE, TRUNCATE, or ALTER column type)
|
||||
-- - All new tables use IF NOT EXISTS
|
||||
-- - All new columns use ADD COLUMN IF NOT EXISTS
|
||||
-- - All indexes use IF NOT EXISTS
|
||||
-- - Compatible with existing dutchie_products, dispensaries, etc.
|
||||
--
|
||||
-- Run with:
|
||||
-- psql $CANNAIQ_DB_URL -f migrations/050_cannaiq_canonical_v2.sql
|
||||
--
|
||||
-- ============================================================================
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 1: STATES TABLE
|
||||
-- ============================================================================
|
||||
-- Reference table for US states. Already may exist from 041/043.
|
||||
-- This is idempotent.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS states (
|
||||
id SERIAL PRIMARY KEY,
|
||||
code VARCHAR(2) NOT NULL UNIQUE,
|
||||
name VARCHAR(100) NOT NULL,
|
||||
timezone VARCHAR(50) DEFAULT 'America/Phoenix',
|
||||
is_active BOOLEAN DEFAULT TRUE,
|
||||
crawl_enabled BOOLEAN DEFAULT TRUE,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Insert states if not present
|
||||
INSERT INTO states (code, name, timezone) VALUES
|
||||
('AZ', 'Arizona', 'America/Phoenix'),
|
||||
('CA', 'California', 'America/Los_Angeles'),
|
||||
('CO', 'Colorado', 'America/Denver'),
|
||||
('FL', 'Florida', 'America/New_York'),
|
||||
('IL', 'Illinois', 'America/Chicago'),
|
||||
('MA', 'Massachusetts', 'America/New_York'),
|
||||
('MD', 'Maryland', 'America/New_York'),
|
||||
('MI', 'Michigan', 'America/Detroit'),
|
||||
('MO', 'Missouri', 'America/Chicago'),
|
||||
('NV', 'Nevada', 'America/Los_Angeles'),
|
||||
('NJ', 'New Jersey', 'America/New_York'),
|
||||
('NY', 'New York', 'America/New_York'),
|
||||
('OH', 'Ohio', 'America/New_York'),
|
||||
('OK', 'Oklahoma', 'America/Chicago'),
|
||||
('OR', 'Oregon', 'America/Los_Angeles'),
|
||||
('PA', 'Pennsylvania', 'America/New_York'),
|
||||
('WA', 'Washington', 'America/Los_Angeles')
|
||||
ON CONFLICT (code) DO UPDATE SET
|
||||
timezone = EXCLUDED.timezone,
|
||||
updated_at = NOW();
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_states_code ON states(code);
|
||||
CREATE INDEX IF NOT EXISTS idx_states_active ON states(is_active) WHERE is_active = TRUE;
|
||||
|
||||
COMMENT ON TABLE states IS 'US states where CannaiQ operates. Single source of truth for state configuration.';
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 2: CHAINS TABLE (Retail Groups)
|
||||
-- ============================================================================
|
||||
-- Chains are multi-location operators like Curaleaf, Trulieve, Harvest, etc.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS chains (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
slug VARCHAR(255) NOT NULL UNIQUE,
|
||||
|
||||
-- Branding
|
||||
website_url TEXT,
|
||||
logo_url TEXT,
|
||||
description TEXT,
|
||||
|
||||
-- Business info
|
||||
headquarters_city VARCHAR(100),
|
||||
headquarters_state_id INTEGER REFERENCES states(id),
|
||||
founded_year INTEGER,
|
||||
|
||||
-- Status
|
||||
is_active BOOLEAN DEFAULT TRUE,
|
||||
is_public BOOLEAN DEFAULT FALSE, -- Publicly traded?
|
||||
stock_ticker VARCHAR(10),
|
||||
|
||||
-- Metadata
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_chains_slug ON chains(slug);
|
||||
CREATE INDEX IF NOT EXISTS idx_chains_active ON chains(is_active) WHERE is_active = TRUE;
|
||||
|
||||
COMMENT ON TABLE chains IS 'Retail chains/groups that own multiple dispensary locations.';
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 3: CANONICAL BRANDS TABLE
|
||||
-- ============================================================================
|
||||
-- This is the master brand catalog across all providers and states.
|
||||
-- Distinct from the per-store `brands` table which tracks store-level brand presence.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS canonical_brands (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
slug VARCHAR(255) NOT NULL UNIQUE,
|
||||
|
||||
-- External IDs from various platforms
|
||||
dutchie_brand_id VARCHAR(100),
|
||||
jane_brand_id VARCHAR(100),
|
||||
treez_brand_id VARCHAR(100),
|
||||
weedmaps_brand_id VARCHAR(100),
|
||||
|
||||
-- Branding
|
||||
logo_url TEXT,
|
||||
local_logo_path TEXT, -- Local storage path
|
||||
website_url TEXT,
|
||||
instagram_handle VARCHAR(100),
|
||||
description TEXT,
|
||||
|
||||
-- Classification
|
||||
is_portfolio_brand BOOLEAN DEFAULT FALSE, -- TRUE if brand we represent
|
||||
is_house_brand BOOLEAN DEFAULT FALSE, -- TRUE if dispensary house brand
|
||||
parent_company VARCHAR(255), -- Parent company name if subsidiary
|
||||
|
||||
-- State presence
|
||||
states_available TEXT[], -- Array of state codes where brand is present
|
||||
|
||||
-- Status
|
||||
is_active BOOLEAN DEFAULT TRUE,
|
||||
is_verified BOOLEAN DEFAULT FALSE, -- Manually verified brand info
|
||||
verified_at TIMESTAMPTZ,
|
||||
|
||||
-- Metadata
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_canonical_brands_slug ON canonical_brands(slug);
|
||||
CREATE INDEX IF NOT EXISTS idx_canonical_brands_dutchie ON canonical_brands(dutchie_brand_id) WHERE dutchie_brand_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_canonical_brands_portfolio ON canonical_brands(is_portfolio_brand) WHERE is_portfolio_brand = TRUE;
|
||||
CREATE INDEX IF NOT EXISTS idx_canonical_brands_states ON canonical_brands USING GIN(states_available);
|
||||
|
||||
COMMENT ON TABLE canonical_brands IS 'Canonical brand catalog across all providers. Master brand reference.';
|
||||
COMMENT ON COLUMN canonical_brands.is_portfolio_brand IS 'TRUE if this is a brand CannaiQ represents/manages.';
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 4: CRAWL_RUNS TABLE
|
||||
-- ============================================================================
|
||||
-- One record per crawl execution. Links to snapshots.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS crawl_runs (
|
||||
id SERIAL PRIMARY KEY,
|
||||
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
state_id INTEGER REFERENCES states(id),
|
||||
|
||||
-- Provider info
|
||||
provider VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
||||
|
||||
-- Timing
|
||||
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
finished_at TIMESTAMPTZ,
|
||||
duration_ms INTEGER,
|
||||
|
||||
-- Status
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'running', -- running, success, failed, partial
|
||||
error_code VARCHAR(50),
|
||||
error_message TEXT,
|
||||
http_status INTEGER,
|
||||
|
||||
-- Results
|
||||
products_found INTEGER DEFAULT 0,
|
||||
products_new INTEGER DEFAULT 0,
|
||||
products_updated INTEGER DEFAULT 0,
|
||||
products_missing INTEGER DEFAULT 0, -- Products gone from feed
|
||||
snapshots_written INTEGER DEFAULT 0,
|
||||
|
||||
-- Infrastructure
|
||||
worker_id VARCHAR(100),
|
||||
worker_hostname VARCHAR(100),
|
||||
proxy_used TEXT,
|
||||
trigger_type VARCHAR(50) DEFAULT 'scheduled', -- scheduled, manual, api
|
||||
|
||||
-- Metadata
|
||||
metadata JSONB DEFAULT '{}',
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_runs_dispensary ON crawl_runs(dispensary_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_runs_state ON crawl_runs(state_id) WHERE state_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_runs_status ON crawl_runs(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_runs_started ON crawl_runs(started_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_runs_dispensary_started ON crawl_runs(dispensary_id, started_at DESC);
|
||||
|
||||
COMMENT ON TABLE crawl_runs IS 'Each crawl execution. Links to snapshots and traces.';
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 5: STORE_PRODUCTS TABLE (Current Menu State)
|
||||
-- ============================================================================
|
||||
-- Canonical representation of what's currently on the menu.
|
||||
-- Provider-agnostic structure for analytics.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS store_products (
|
||||
id SERIAL PRIMARY KEY,
|
||||
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
state_id INTEGER REFERENCES states(id),
|
||||
|
||||
-- Links to canonical entities
|
||||
canonical_brand_id INTEGER REFERENCES canonical_brands(id) ON DELETE SET NULL,
|
||||
category_id INTEGER REFERENCES categories(id) ON DELETE SET NULL,
|
||||
|
||||
-- Provider-specific identifiers
|
||||
provider VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
||||
provider_product_id VARCHAR(100) NOT NULL, -- Platform product ID
|
||||
provider_brand_id VARCHAR(100), -- Platform brand ID
|
||||
enterprise_product_id VARCHAR(100), -- Cross-store product ID
|
||||
|
||||
-- Raw data from platform (not normalized)
|
||||
name VARCHAR(500) NOT NULL,
|
||||
brand_name VARCHAR(255),
|
||||
category VARCHAR(100),
|
||||
subcategory VARCHAR(100),
|
||||
strain_type VARCHAR(50),
|
||||
description TEXT,
|
||||
|
||||
-- Pricing (current)
|
||||
price_rec NUMERIC(10,2),
|
||||
price_med NUMERIC(10,2),
|
||||
price_rec_special NUMERIC(10,2),
|
||||
price_med_special NUMERIC(10,2),
|
||||
is_on_special BOOLEAN DEFAULT FALSE,
|
||||
special_name TEXT,
|
||||
discount_percent NUMERIC(5,2),
|
||||
price_unit VARCHAR(20) DEFAULT 'each', -- gram, ounce, each, mg
|
||||
|
||||
-- Inventory
|
||||
is_in_stock BOOLEAN DEFAULT TRUE,
|
||||
stock_quantity INTEGER,
|
||||
stock_status VARCHAR(50) DEFAULT 'in_stock', -- in_stock, out_of_stock, low_stock, missing_from_feed
|
||||
|
||||
-- Potency
|
||||
thc_percent NUMERIC(5,2),
|
||||
cbd_percent NUMERIC(5,2),
|
||||
thc_mg NUMERIC(10,2),
|
||||
cbd_mg NUMERIC(10,2),
|
||||
|
||||
-- Weight/Size
|
||||
weight_value NUMERIC(10,2),
|
||||
weight_unit VARCHAR(20), -- g, oz, mg
|
||||
|
||||
-- Images
|
||||
image_url TEXT,
|
||||
local_image_path TEXT,
|
||||
thumbnail_url TEXT,
|
||||
|
||||
-- Flags
|
||||
is_featured BOOLEAN DEFAULT FALSE,
|
||||
medical_only BOOLEAN DEFAULT FALSE,
|
||||
rec_only BOOLEAN DEFAULT FALSE,
|
||||
|
||||
-- Menu position (for tracking prominence)
|
||||
menu_position INTEGER,
|
||||
|
||||
-- Timestamps
|
||||
first_seen_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
last_seen_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
last_price_change_at TIMESTAMPTZ,
|
||||
last_stock_change_at TIMESTAMPTZ,
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
|
||||
UNIQUE(dispensary_id, provider, provider_product_id)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_dispensary ON store_products(dispensary_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_state ON store_products(state_id) WHERE state_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_brand ON store_products(canonical_brand_id) WHERE canonical_brand_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_category ON store_products(category) WHERE category IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_in_stock ON store_products(dispensary_id, is_in_stock);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_special ON store_products(dispensary_id, is_on_special) WHERE is_on_special = TRUE;
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_last_seen ON store_products(last_seen_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_provider ON store_products(provider);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_enterprise ON store_products(enterprise_product_id) WHERE enterprise_product_id IS NOT NULL;
|
||||
|
||||
COMMENT ON TABLE store_products IS 'Current state of products on each dispensary menu. Provider-agnostic.';
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 6: STORE_PRODUCT_SNAPSHOTS TABLE (Historical Data)
|
||||
-- ============================================================================
|
||||
-- Time-series data for analytics. One row per product per crawl.
|
||||
-- CRITICAL: NEVER DELETE from this table.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS store_product_snapshots (
|
||||
id SERIAL PRIMARY KEY,
|
||||
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
store_product_id INTEGER REFERENCES store_products(id) ON DELETE SET NULL,
|
||||
state_id INTEGER REFERENCES states(id),
|
||||
|
||||
-- Provider info
|
||||
provider VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
||||
provider_product_id VARCHAR(100),
|
||||
|
||||
-- Link to crawl run
|
||||
crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL,
|
||||
|
||||
-- Capture timestamp
|
||||
captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
-- Raw data from platform
|
||||
name VARCHAR(500),
|
||||
brand_name VARCHAR(255),
|
||||
category VARCHAR(100),
|
||||
subcategory VARCHAR(100),
|
||||
|
||||
-- Pricing at time of capture
|
||||
price_rec NUMERIC(10,2),
|
||||
price_med NUMERIC(10,2),
|
||||
price_rec_special NUMERIC(10,2),
|
||||
price_med_special NUMERIC(10,2),
|
||||
is_on_special BOOLEAN DEFAULT FALSE,
|
||||
discount_percent NUMERIC(5,2),
|
||||
|
||||
-- Inventory at time of capture
|
||||
is_in_stock BOOLEAN DEFAULT TRUE,
|
||||
stock_quantity INTEGER,
|
||||
stock_status VARCHAR(50) DEFAULT 'in_stock',
|
||||
is_present_in_feed BOOLEAN DEFAULT TRUE, -- FALSE = missing from feed
|
||||
|
||||
-- Potency at time of capture
|
||||
thc_percent NUMERIC(5,2),
|
||||
cbd_percent NUMERIC(5,2),
|
||||
|
||||
-- Menu position (for tracking prominence changes)
|
||||
menu_position INTEGER,
|
||||
|
||||
-- Image URL at time of capture
|
||||
image_url TEXT,
|
||||
|
||||
-- Full raw response for debugging
|
||||
raw_data JSONB,
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Partitioning-ready indexes (for future table partitioning by month)
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_dispensary_captured ON store_product_snapshots(dispensary_id, captured_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_state_captured ON store_product_snapshots(state_id, captured_at DESC) WHERE state_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_product_captured ON store_product_snapshots(store_product_id, captured_at DESC) WHERE store_product_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_crawl_run ON store_product_snapshots(crawl_run_id) WHERE crawl_run_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_captured_at ON store_product_snapshots(captured_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_brand ON store_product_snapshots(brand_name) WHERE brand_name IS NOT NULL;
|
||||
|
||||
COMMENT ON TABLE store_product_snapshots IS 'Historical crawl data. One row per product per crawl. NEVER DELETE.';
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 7: ADD state_id AND chain_id TO DISPENSARIES
|
||||
-- ============================================================================
|
||||
-- Link dispensaries to states and chains tables.
|
||||
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS state_id INTEGER REFERENCES states(id);
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS chain_id INTEGER REFERENCES chains(id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_state_id ON dispensaries(state_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_chain_id ON dispensaries(chain_id) WHERE chain_id IS NOT NULL;
|
||||
|
||||
-- Backfill state_id from existing state column
|
||||
UPDATE dispensaries d
|
||||
SET state_id = s.id
|
||||
FROM states s
|
||||
WHERE d.state = s.code
|
||||
AND d.state_id IS NULL;
|
||||
|
||||
COMMENT ON COLUMN dispensaries.state_id IS 'FK to states table. Canonical state reference.';
|
||||
COMMENT ON COLUMN dispensaries.chain_id IS 'FK to chains table. NULL if independent dispensary.';
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 8: BRAND PENETRATION TABLE
|
||||
-- ============================================================================
|
||||
-- Pre-computed brand presence across stores for analytics dashboards.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS brand_penetration (
|
||||
id SERIAL PRIMARY KEY,
|
||||
canonical_brand_id INTEGER NOT NULL REFERENCES canonical_brands(id) ON DELETE CASCADE,
|
||||
state_id INTEGER NOT NULL REFERENCES states(id) ON DELETE CASCADE,
|
||||
|
||||
-- Metrics
|
||||
stores_carrying INTEGER DEFAULT 0,
|
||||
stores_total INTEGER DEFAULT 0,
|
||||
penetration_pct NUMERIC(5,2) DEFAULT 0,
|
||||
|
||||
-- Product breakdown
|
||||
products_count INTEGER DEFAULT 0,
|
||||
products_in_stock INTEGER DEFAULT 0,
|
||||
products_on_special INTEGER DEFAULT 0,
|
||||
|
||||
-- Pricing
|
||||
avg_price NUMERIC(10,2),
|
||||
min_price NUMERIC(10,2),
|
||||
max_price NUMERIC(10,2),
|
||||
|
||||
-- Time range
|
||||
calculated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
period_start TIMESTAMPTZ,
|
||||
period_end TIMESTAMPTZ,
|
||||
|
||||
UNIQUE(canonical_brand_id, state_id, calculated_at)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_brand_penetration_brand ON brand_penetration(canonical_brand_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_brand_penetration_state ON brand_penetration(state_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_brand_penetration_calculated ON brand_penetration(calculated_at DESC);
|
||||
|
||||
COMMENT ON TABLE brand_penetration IS 'Pre-computed brand penetration metrics by state.';
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 9: PRICE_ALERTS TABLE
|
||||
-- ============================================================================
|
||||
-- Track significant price changes for intelligence/alerts.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS price_alerts (
|
||||
id SERIAL PRIMARY KEY,
|
||||
store_product_id INTEGER REFERENCES store_products(id) ON DELETE CASCADE,
|
||||
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
state_id INTEGER REFERENCES states(id),
|
||||
|
||||
-- What changed
|
||||
alert_type VARCHAR(50) NOT NULL, -- price_drop, price_increase, new_special, special_ended
|
||||
|
||||
-- Values
|
||||
old_price NUMERIC(10,2),
|
||||
new_price NUMERIC(10,2),
|
||||
change_amount NUMERIC(10,2),
|
||||
change_percent NUMERIC(5,2),
|
||||
|
||||
-- Context
|
||||
product_name VARCHAR(500),
|
||||
brand_name VARCHAR(255),
|
||||
category VARCHAR(100),
|
||||
|
||||
-- Status
|
||||
is_processed BOOLEAN DEFAULT FALSE,
|
||||
processed_at TIMESTAMPTZ,
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_price_alerts_dispensary ON price_alerts(dispensary_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_price_alerts_state ON price_alerts(state_id) WHERE state_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_price_alerts_type ON price_alerts(alert_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_price_alerts_unprocessed ON price_alerts(is_processed) WHERE is_processed = FALSE;
|
||||
CREATE INDEX IF NOT EXISTS idx_price_alerts_created ON price_alerts(created_at DESC);
|
||||
|
||||
COMMENT ON TABLE price_alerts IS 'Significant price changes for intelligence/alerting.';
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 10: RAW_PAYLOADS TABLE
|
||||
-- ============================================================================
|
||||
-- Store raw API responses for replay/debugging. Separate from snapshots.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS raw_payloads (
|
||||
id SERIAL PRIMARY KEY,
|
||||
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL,
|
||||
|
||||
-- Payload info
|
||||
provider VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
||||
payload_type VARCHAR(50) NOT NULL DEFAULT 'products', -- products, brands, specials
|
||||
|
||||
-- The raw data
|
||||
payload JSONB NOT NULL,
|
||||
payload_size_bytes INTEGER,
|
||||
|
||||
-- Deduplication
|
||||
payload_hash VARCHAR(64), -- SHA256 for deduplication
|
||||
|
||||
-- Processing status
|
||||
is_processed BOOLEAN DEFAULT FALSE,
|
||||
processed_at TIMESTAMPTZ,
|
||||
|
||||
captured_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_raw_payloads_dispensary ON raw_payloads(dispensary_id, captured_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_raw_payloads_crawl_run ON raw_payloads(crawl_run_id) WHERE crawl_run_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_raw_payloads_unprocessed ON raw_payloads(is_processed) WHERE is_processed = FALSE;
|
||||
CREATE INDEX IF NOT EXISTS idx_raw_payloads_hash ON raw_payloads(payload_hash) WHERE payload_hash IS NOT NULL;
|
||||
|
||||
COMMENT ON TABLE raw_payloads IS 'Raw API responses for replay/debugging. Enables re-hydration.';
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 11: ANALYTICS CACHE TABLES
|
||||
-- ============================================================================
|
||||
-- Pre-computed analytics for dashboard performance.
|
||||
|
||||
-- Daily store metrics
|
||||
CREATE TABLE IF NOT EXISTS analytics_store_daily (
|
||||
id SERIAL PRIMARY KEY,
|
||||
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
state_id INTEGER REFERENCES states(id),
|
||||
date DATE NOT NULL,
|
||||
|
||||
-- Product counts
|
||||
total_products INTEGER DEFAULT 0,
|
||||
in_stock_products INTEGER DEFAULT 0,
|
||||
out_of_stock_products INTEGER DEFAULT 0,
|
||||
on_special_products INTEGER DEFAULT 0,
|
||||
|
||||
-- Brand/category diversity
|
||||
unique_brands INTEGER DEFAULT 0,
|
||||
unique_categories INTEGER DEFAULT 0,
|
||||
|
||||
-- Pricing
|
||||
avg_price NUMERIC(10,2),
|
||||
median_price NUMERIC(10,2),
|
||||
|
||||
-- Crawl health
|
||||
crawl_count INTEGER DEFAULT 0,
|
||||
successful_crawls INTEGER DEFAULT 0,
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
|
||||
UNIQUE(dispensary_id, date)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_analytics_store_daily_dispensary ON analytics_store_daily(dispensary_id, date DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_analytics_store_daily_state ON analytics_store_daily(state_id, date DESC) WHERE state_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_analytics_store_daily_date ON analytics_store_daily(date DESC);
|
||||
|
||||
|
||||
-- Daily brand metrics
|
||||
CREATE TABLE IF NOT EXISTS analytics_brand_daily (
|
||||
id SERIAL PRIMARY KEY,
|
||||
canonical_brand_id INTEGER NOT NULL REFERENCES canonical_brands(id) ON DELETE CASCADE,
|
||||
state_id INTEGER REFERENCES states(id),
|
||||
date DATE NOT NULL,
|
||||
|
||||
-- Presence
|
||||
stores_carrying INTEGER DEFAULT 0,
|
||||
products_count INTEGER DEFAULT 0,
|
||||
|
||||
-- Stock
|
||||
in_stock_count INTEGER DEFAULT 0,
|
||||
out_of_stock_count INTEGER DEFAULT 0,
|
||||
|
||||
-- Pricing
|
||||
avg_price NUMERIC(10,2),
|
||||
min_price NUMERIC(10,2),
|
||||
max_price NUMERIC(10,2),
|
||||
on_special_count INTEGER DEFAULT 0,
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
|
||||
UNIQUE(canonical_brand_id, state_id, date)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_analytics_brand_daily_brand ON analytics_brand_daily(canonical_brand_id, date DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_analytics_brand_daily_state ON analytics_brand_daily(state_id, date DESC) WHERE state_id IS NOT NULL;
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 12: VIEWS FOR COMPATIBILITY
|
||||
-- ============================================================================
|
||||
|
||||
-- View: Latest snapshot per store product
|
||||
CREATE OR REPLACE VIEW v_latest_store_snapshots AS
|
||||
SELECT DISTINCT ON (dispensary_id, provider_product_id)
|
||||
sps.*
|
||||
FROM store_product_snapshots sps
|
||||
ORDER BY dispensary_id, provider_product_id, captured_at DESC;
|
||||
|
||||
-- View: Crawl run summary per dispensary
|
||||
CREATE OR REPLACE VIEW v_dispensary_crawl_summary AS
|
||||
SELECT
|
||||
d.id AS dispensary_id,
|
||||
COALESCE(d.dba_name, d.name) AS dispensary_name,
|
||||
d.city,
|
||||
d.state,
|
||||
d.state_id,
|
||||
s.name AS state_name,
|
||||
COUNT(DISTINCT sp.id) AS current_product_count,
|
||||
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock) AS in_stock_count,
|
||||
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_on_special) AS on_special_count,
|
||||
MAX(cr.finished_at) AS last_crawl_at,
|
||||
(SELECT status FROM crawl_runs WHERE dispensary_id = d.id ORDER BY started_at DESC LIMIT 1) AS last_crawl_status
|
||||
FROM dispensaries d
|
||||
LEFT JOIN states s ON s.id = d.state_id
|
||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
|
||||
LEFT JOIN crawl_runs cr ON cr.dispensary_id = d.id
|
||||
GROUP BY d.id, d.dba_name, d.name, d.city, d.state, d.state_id, s.name;
|
||||
|
||||
-- View: Brand presence across stores
|
||||
CREATE OR REPLACE VIEW v_brand_store_presence AS
|
||||
SELECT
|
||||
cb.id AS brand_id,
|
||||
cb.name AS brand_name,
|
||||
cb.slug AS brand_slug,
|
||||
s.id AS state_id,
|
||||
s.code AS state_code,
|
||||
COUNT(DISTINCT sp.dispensary_id) AS store_count,
|
||||
COUNT(sp.id) AS product_count,
|
||||
COUNT(sp.id) FILTER (WHERE sp.is_in_stock) AS in_stock_count,
|
||||
AVG(sp.price_rec) AS avg_price,
|
||||
MIN(sp.price_rec) AS min_price,
|
||||
MAX(sp.price_rec) AS max_price
|
||||
FROM canonical_brands cb
|
||||
JOIN store_products sp ON sp.canonical_brand_id = cb.id
|
||||
LEFT JOIN states s ON s.id = sp.state_id
|
||||
GROUP BY cb.id, cb.name, cb.slug, s.id, s.code;
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 13: ADD FK FROM store_product_snapshots TO crawl_runs
|
||||
-- ============================================================================
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.table_constraints
|
||||
WHERE constraint_name = 'store_product_snapshots_crawl_run_id_fkey'
|
||||
) THEN
|
||||
ALTER TABLE store_product_snapshots
|
||||
ADD CONSTRAINT store_product_snapshots_crawl_run_id_fkey
|
||||
FOREIGN KEY (crawl_run_id) REFERENCES crawl_runs(id) ON DELETE SET NULL;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 14: ADD crawl_run_id TO crawl_orchestration_traces
|
||||
-- ============================================================================
|
||||
|
||||
ALTER TABLE crawl_orchestration_traces
|
||||
ADD COLUMN IF NOT EXISTS crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_traces_crawl_run
|
||||
ON crawl_orchestration_traces(crawl_run_id)
|
||||
WHERE crawl_run_id IS NOT NULL;
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 15: UPDATE dispensary_crawler_profiles
|
||||
-- ============================================================================
|
||||
-- Add status columns for profile lifecycle.
|
||||
|
||||
ALTER TABLE dispensary_crawler_profiles
|
||||
ADD COLUMN IF NOT EXISTS status VARCHAR(50) DEFAULT 'sandbox';
|
||||
|
||||
ALTER TABLE dispensary_crawler_profiles
|
||||
ADD COLUMN IF NOT EXISTS allow_autopromote BOOLEAN DEFAULT FALSE;
|
||||
|
||||
ALTER TABLE dispensary_crawler_profiles
|
||||
ADD COLUMN IF NOT EXISTS validated_at TIMESTAMPTZ;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_profiles_status
|
||||
ON dispensary_crawler_profiles(status);
|
||||
|
||||
COMMENT ON COLUMN dispensary_crawler_profiles.status IS 'Profile status: sandbox, production, needs_manual, disabled';
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 16: UPDATE dispensary_crawl_jobs WITH ADDITIONAL COLUMNS
|
||||
-- ============================================================================
|
||||
-- Add columns needed for enhanced job tracking.
|
||||
|
||||
ALTER TABLE dispensary_crawl_jobs
|
||||
ADD COLUMN IF NOT EXISTS worker_id VARCHAR(100);
|
||||
|
||||
ALTER TABLE dispensary_crawl_jobs
|
||||
ADD COLUMN IF NOT EXISTS worker_hostname VARCHAR(100);
|
||||
|
||||
ALTER TABLE dispensary_crawl_jobs
|
||||
ADD COLUMN IF NOT EXISTS claimed_by VARCHAR(100);
|
||||
|
||||
ALTER TABLE dispensary_crawl_jobs
|
||||
ADD COLUMN IF NOT EXISTS claimed_at TIMESTAMPTZ;
|
||||
|
||||
ALTER TABLE dispensary_crawl_jobs
|
||||
ADD COLUMN IF NOT EXISTS locked_until TIMESTAMPTZ;
|
||||
|
||||
ALTER TABLE dispensary_crawl_jobs
|
||||
ADD COLUMN IF NOT EXISTS last_heartbeat_at TIMESTAMPTZ;
|
||||
|
||||
ALTER TABLE dispensary_crawl_jobs
|
||||
ADD COLUMN IF NOT EXISTS max_retries INTEGER DEFAULT 3;
|
||||
|
||||
ALTER TABLE dispensary_crawl_jobs
|
||||
ADD COLUMN IF NOT EXISTS products_upserted INTEGER DEFAULT 0;
|
||||
|
||||
ALTER TABLE dispensary_crawl_jobs
|
||||
ADD COLUMN IF NOT EXISTS snapshots_created INTEGER DEFAULT 0;
|
||||
|
||||
ALTER TABLE dispensary_crawl_jobs
|
||||
ADD COLUMN IF NOT EXISTS current_page INTEGER DEFAULT 0;
|
||||
|
||||
ALTER TABLE dispensary_crawl_jobs
|
||||
ADD COLUMN IF NOT EXISTS total_pages INTEGER;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_jobs_status_pending ON dispensary_crawl_jobs(status) WHERE status = 'pending';
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_jobs_claimed_by ON dispensary_crawl_jobs(claimed_by) WHERE claimed_by IS NOT NULL;
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 17: QUEUE MONITORING VIEWS
|
||||
-- ============================================================================
|
||||
|
||||
CREATE OR REPLACE VIEW v_queue_stats AS
|
||||
SELECT
|
||||
(SELECT COUNT(*) FROM dispensary_crawl_jobs WHERE status = 'pending') AS pending_jobs,
|
||||
(SELECT COUNT(*) FROM dispensary_crawl_jobs WHERE status = 'running') AS running_jobs,
|
||||
(SELECT COUNT(*) FROM dispensary_crawl_jobs WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') AS completed_1h,
|
||||
(SELECT COUNT(*) FROM dispensary_crawl_jobs WHERE status = 'failed' AND completed_at > NOW() - INTERVAL '1 hour') AS failed_1h,
|
||||
(SELECT COUNT(DISTINCT worker_id) FROM dispensary_crawl_jobs WHERE status = 'running' AND worker_id IS NOT NULL) AS active_workers,
|
||||
(SELECT AVG(EXTRACT(EPOCH FROM (completed_at - started_at))) FROM dispensary_crawl_jobs WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') AS avg_duration_seconds;
|
||||
|
||||
CREATE OR REPLACE VIEW v_active_workers AS
|
||||
SELECT
|
||||
worker_id,
|
||||
worker_hostname,
|
||||
COUNT(*) AS current_jobs,
|
||||
SUM(products_found) AS total_products_found,
|
||||
SUM(products_upserted) AS total_products_upserted,
|
||||
SUM(snapshots_created) AS total_snapshots,
|
||||
MIN(claimed_at) AS first_claimed_at,
|
||||
MAX(last_heartbeat_at) AS last_heartbeat
|
||||
FROM dispensary_crawl_jobs
|
||||
WHERE status = 'running' AND worker_id IS NOT NULL
|
||||
GROUP BY worker_id, worker_hostname;
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- DONE
|
||||
-- ============================================================================
|
||||
|
||||
SELECT 'Migration 050 completed successfully. Canonical schema v2 is ready.' AS status;
|
||||
642
backend/migrations/051_cannaiq_canonical_safe_bootstrap.sql
Normal file
642
backend/migrations/051_cannaiq_canonical_safe_bootstrap.sql
Normal file
@@ -0,0 +1,642 @@
|
||||
-- ============================================================================
|
||||
-- Migration 051: CannaiQ Canonical Schema - Safe Bootstrap
|
||||
-- ============================================================================
|
||||
--
|
||||
-- Purpose: Create the canonical CannaiQ schema tables from scratch.
|
||||
-- This migration is FULLY IDEMPOTENT and safe to run multiple times.
|
||||
--
|
||||
-- SAFETY RULES FOLLOWED:
|
||||
-- 1. ALL tables use CREATE TABLE IF NOT EXISTS
|
||||
-- 2. ALL columns use ALTER TABLE ADD COLUMN IF NOT EXISTS
|
||||
-- 3. ALL indexes use CREATE INDEX IF NOT EXISTS
|
||||
-- 4. NO DROP, DELETE, TRUNCATE, or destructive operations
|
||||
-- 5. NO assumptions about existing data or column existence
|
||||
-- 6. NO dependencies on migrations 041, 043, or 050
|
||||
-- 7. Compatible with dutchie_menus database as it exists today
|
||||
-- 8. Safe handling of pre-existing states table with missing columns
|
||||
--
|
||||
-- Tables Created:
|
||||
-- - states (US state reference table)
|
||||
-- - chains (retail chain/group table)
|
||||
-- - crawl_runs (crawl execution records)
|
||||
-- - store_products (current menu state)
|
||||
-- - store_product_snapshots (historical price/stock data)
|
||||
--
|
||||
-- Columns Added:
|
||||
-- - dispensaries.state_id (FK to states)
|
||||
-- - dispensaries.chain_id (FK to chains)
|
||||
--
|
||||
-- Run with:
|
||||
-- psql "postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \
|
||||
-- -f migrations/051_cannaiq_canonical_safe_bootstrap.sql
|
||||
--
|
||||
-- ============================================================================
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 1: STATES TABLE
|
||||
-- ============================================================================
|
||||
-- Reference table for US states where CannaiQ operates.
|
||||
-- This section handles the case where the table exists but is missing columns.
|
||||
|
||||
-- First, create the table if it doesn't exist (minimal definition)
|
||||
CREATE TABLE IF NOT EXISTS states (
|
||||
id SERIAL PRIMARY KEY,
|
||||
code VARCHAR(2) NOT NULL,
|
||||
name VARCHAR(100) NOT NULL,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Now safely add any missing columns (each is independent, won't fail if exists)
|
||||
ALTER TABLE states ADD COLUMN IF NOT EXISTS timezone TEXT;
|
||||
ALTER TABLE states ADD COLUMN IF NOT EXISTS is_active BOOLEAN DEFAULT TRUE;
|
||||
ALTER TABLE states ADD COLUMN IF NOT EXISTS crawl_enabled BOOLEAN DEFAULT TRUE;
|
||||
|
||||
-- Add unique constraint on code if not exists
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'states_code_key' AND conrelid = 'states'::regclass
|
||||
) THEN
|
||||
-- Check if there's already a unique constraint with a different name
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_indexes
|
||||
WHERE tablename = 'states' AND indexdef LIKE '%UNIQUE%code%'
|
||||
) THEN
|
||||
ALTER TABLE states ADD CONSTRAINT states_code_key UNIQUE (code);
|
||||
END IF;
|
||||
END IF;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN
|
||||
NULL; -- Constraint already exists
|
||||
WHEN OTHERS THEN
|
||||
NULL; -- Handle any other errors gracefully
|
||||
END $$;
|
||||
|
||||
-- Set default timezone values for existing rows that have NULL
|
||||
UPDATE states SET timezone = 'America/Phoenix' WHERE timezone IS NULL AND code = 'AZ';
|
||||
UPDATE states SET timezone = 'America/Los_Angeles' WHERE timezone IS NULL AND code IN ('CA', 'NV', 'OR', 'WA');
|
||||
UPDATE states SET timezone = 'America/Denver' WHERE timezone IS NULL AND code = 'CO';
|
||||
UPDATE states SET timezone = 'America/New_York' WHERE timezone IS NULL AND code IN ('FL', 'MA', 'MD', 'NJ', 'NY', 'OH', 'PA');
|
||||
UPDATE states SET timezone = 'America/Chicago' WHERE timezone IS NULL AND code IN ('IL', 'MO', 'OK');
|
||||
UPDATE states SET timezone = 'America/Detroit' WHERE timezone IS NULL AND code = 'MI';
|
||||
|
||||
-- Set default is_active for existing rows
|
||||
UPDATE states SET is_active = TRUE WHERE is_active IS NULL;
|
||||
UPDATE states SET crawl_enabled = TRUE WHERE crawl_enabled IS NULL;
|
||||
|
||||
-- Insert known states (idempotent - ON CONFLICT DO UPDATE to fill missing values)
|
||||
INSERT INTO states (code, name, timezone, is_active, crawl_enabled) VALUES
|
||||
('AZ', 'Arizona', 'America/Phoenix', TRUE, TRUE),
|
||||
('CA', 'California', 'America/Los_Angeles', TRUE, TRUE),
|
||||
('CO', 'Colorado', 'America/Denver', TRUE, TRUE),
|
||||
('FL', 'Florida', 'America/New_York', TRUE, TRUE),
|
||||
('IL', 'Illinois', 'America/Chicago', TRUE, TRUE),
|
||||
('MA', 'Massachusetts', 'America/New_York', TRUE, TRUE),
|
||||
('MD', 'Maryland', 'America/New_York', TRUE, TRUE),
|
||||
('MI', 'Michigan', 'America/Detroit', TRUE, TRUE),
|
||||
('MO', 'Missouri', 'America/Chicago', TRUE, TRUE),
|
||||
('NV', 'Nevada', 'America/Los_Angeles', TRUE, TRUE),
|
||||
('NJ', 'New Jersey', 'America/New_York', TRUE, TRUE),
|
||||
('NY', 'New York', 'America/New_York', TRUE, TRUE),
|
||||
('OH', 'Ohio', 'America/New_York', TRUE, TRUE),
|
||||
('OK', 'Oklahoma', 'America/Chicago', TRUE, TRUE),
|
||||
('OR', 'Oregon', 'America/Los_Angeles', TRUE, TRUE),
|
||||
('PA', 'Pennsylvania', 'America/New_York', TRUE, TRUE),
|
||||
('WA', 'Washington', 'America/Los_Angeles', TRUE, TRUE)
|
||||
ON CONFLICT (code) DO UPDATE SET
|
||||
timezone = COALESCE(states.timezone, EXCLUDED.timezone),
|
||||
is_active = COALESCE(states.is_active, EXCLUDED.is_active),
|
||||
crawl_enabled = COALESCE(states.crawl_enabled, EXCLUDED.crawl_enabled),
|
||||
updated_at = NOW();
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_states_code ON states(code);
|
||||
CREATE INDEX IF NOT EXISTS idx_states_active ON states(is_active) WHERE is_active = TRUE;
|
||||
|
||||
COMMENT ON TABLE states IS 'US states where CannaiQ operates. Single source of truth for state configuration.';
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 2: CHAINS TABLE
|
||||
-- ============================================================================
|
||||
-- Retail chains/groups that own multiple dispensary locations.
|
||||
-- Examples: Curaleaf, Trulieve, Harvest, Columbia Care
|
||||
|
||||
CREATE TABLE IF NOT EXISTS chains (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
slug VARCHAR(255) NOT NULL,
|
||||
website_url TEXT,
|
||||
logo_url TEXT,
|
||||
description TEXT,
|
||||
headquarters_city VARCHAR(100),
|
||||
headquarters_state_id INTEGER,
|
||||
founded_year INTEGER,
|
||||
is_active BOOLEAN DEFAULT TRUE,
|
||||
is_public BOOLEAN DEFAULT FALSE,
|
||||
stock_ticker VARCHAR(10),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Add unique constraint on slug if not exists
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'chains_slug_key' AND conrelid = 'chains'::regclass
|
||||
) THEN
|
||||
ALTER TABLE chains ADD CONSTRAINT chains_slug_key UNIQUE (slug);
|
||||
END IF;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN
|
||||
NULL;
|
||||
WHEN OTHERS THEN
|
||||
NULL;
|
||||
END $$;
|
||||
|
||||
-- Add FK to states if not exists
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'chains_headquarters_state_id_fkey'
|
||||
) THEN
|
||||
ALTER TABLE chains
|
||||
ADD CONSTRAINT chains_headquarters_state_id_fkey
|
||||
FOREIGN KEY (headquarters_state_id) REFERENCES states(id) ON DELETE SET NULL;
|
||||
END IF;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN
|
||||
NULL;
|
||||
WHEN OTHERS THEN
|
||||
NULL;
|
||||
END $$;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_chains_slug ON chains(slug);
|
||||
CREATE INDEX IF NOT EXISTS idx_chains_active ON chains(is_active) WHERE is_active = TRUE;
|
||||
|
||||
COMMENT ON TABLE chains IS 'Retail chains/groups that own multiple dispensary locations.';
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 3: ADD state_id AND chain_id TO DISPENSARIES
|
||||
-- ============================================================================
|
||||
-- Link existing dispensaries table to states and chains.
|
||||
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS state_id INTEGER;
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS chain_id INTEGER;
|
||||
|
||||
-- Add FK constraints if not exist
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'dispensaries_state_id_fkey'
|
||||
) THEN
|
||||
ALTER TABLE dispensaries
|
||||
ADD CONSTRAINT dispensaries_state_id_fkey
|
||||
FOREIGN KEY (state_id) REFERENCES states(id) ON DELETE SET NULL;
|
||||
END IF;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN
|
||||
NULL;
|
||||
WHEN OTHERS THEN
|
||||
NULL;
|
||||
END $$;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'dispensaries_chain_id_fkey'
|
||||
) THEN
|
||||
ALTER TABLE dispensaries
|
||||
ADD CONSTRAINT dispensaries_chain_id_fkey
|
||||
FOREIGN KEY (chain_id) REFERENCES chains(id) ON DELETE SET NULL;
|
||||
END IF;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN
|
||||
NULL;
|
||||
WHEN OTHERS THEN
|
||||
NULL;
|
||||
END $$;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_state_id ON dispensaries(state_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_chain_id ON dispensaries(chain_id) WHERE chain_id IS NOT NULL;
|
||||
|
||||
-- Backfill state_id from existing state column (safe - only updates NULL values)
|
||||
UPDATE dispensaries d
|
||||
SET state_id = s.id
|
||||
FROM states s
|
||||
WHERE d.state = s.code
|
||||
AND d.state_id IS NULL;
|
||||
|
||||
COMMENT ON COLUMN dispensaries.state_id IS 'FK to states table. Canonical state reference.';
|
||||
COMMENT ON COLUMN dispensaries.chain_id IS 'FK to chains table. NULL if independent dispensary.';
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 4: CRAWL_RUNS TABLE
|
||||
-- ============================================================================
|
||||
-- One record per crawl execution. Links to snapshots.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS crawl_runs (
|
||||
id SERIAL PRIMARY KEY,
|
||||
dispensary_id INTEGER NOT NULL,
|
||||
state_id INTEGER,
|
||||
|
||||
-- Provider info
|
||||
provider VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
||||
|
||||
-- Timing
|
||||
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
finished_at TIMESTAMPTZ,
|
||||
duration_ms INTEGER,
|
||||
|
||||
-- Status
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'running',
|
||||
error_code VARCHAR(50),
|
||||
error_message TEXT,
|
||||
http_status INTEGER,
|
||||
|
||||
-- Results
|
||||
products_found INTEGER DEFAULT 0,
|
||||
products_new INTEGER DEFAULT 0,
|
||||
products_updated INTEGER DEFAULT 0,
|
||||
products_missing INTEGER DEFAULT 0,
|
||||
snapshots_written INTEGER DEFAULT 0,
|
||||
|
||||
-- Infrastructure
|
||||
worker_id VARCHAR(100),
|
||||
worker_hostname VARCHAR(100),
|
||||
proxy_used TEXT,
|
||||
trigger_type VARCHAR(50) DEFAULT 'scheduled',
|
||||
|
||||
-- Metadata
|
||||
metadata JSONB DEFAULT '{}',
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Add FK constraints if not exist
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'crawl_runs_dispensary_id_fkey'
|
||||
) THEN
|
||||
ALTER TABLE crawl_runs
|
||||
ADD CONSTRAINT crawl_runs_dispensary_id_fkey
|
||||
FOREIGN KEY (dispensary_id) REFERENCES dispensaries(id) ON DELETE CASCADE;
|
||||
END IF;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN
|
||||
NULL;
|
||||
WHEN OTHERS THEN
|
||||
NULL;
|
||||
END $$;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'crawl_runs_state_id_fkey'
|
||||
) THEN
|
||||
ALTER TABLE crawl_runs
|
||||
ADD CONSTRAINT crawl_runs_state_id_fkey
|
||||
FOREIGN KEY (state_id) REFERENCES states(id) ON DELETE SET NULL;
|
||||
END IF;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN
|
||||
NULL;
|
||||
WHEN OTHERS THEN
|
||||
NULL;
|
||||
END $$;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_runs_dispensary ON crawl_runs(dispensary_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_runs_state ON crawl_runs(state_id) WHERE state_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_runs_status ON crawl_runs(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_runs_started ON crawl_runs(started_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_runs_dispensary_started ON crawl_runs(dispensary_id, started_at DESC);
|
||||
|
||||
COMMENT ON TABLE crawl_runs IS 'Each crawl execution. Links to snapshots and traces.';
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 5: STORE_PRODUCTS TABLE
|
||||
-- ============================================================================
|
||||
-- Current state of products on each dispensary menu.
|
||||
-- Provider-agnostic structure for analytics.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS store_products (
|
||||
id SERIAL PRIMARY KEY,
|
||||
dispensary_id INTEGER NOT NULL,
|
||||
state_id INTEGER,
|
||||
|
||||
-- Provider-specific identifiers
|
||||
provider VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
||||
provider_product_id VARCHAR(100) NOT NULL,
|
||||
provider_brand_id VARCHAR(100),
|
||||
enterprise_product_id VARCHAR(100),
|
||||
|
||||
-- Raw data from platform (not normalized)
|
||||
name VARCHAR(500) NOT NULL,
|
||||
brand_name VARCHAR(255),
|
||||
category VARCHAR(100),
|
||||
subcategory VARCHAR(100),
|
||||
strain_type VARCHAR(50),
|
||||
description TEXT,
|
||||
|
||||
-- Pricing (current)
|
||||
price_rec NUMERIC(10,2),
|
||||
price_med NUMERIC(10,2),
|
||||
price_rec_special NUMERIC(10,2),
|
||||
price_med_special NUMERIC(10,2),
|
||||
is_on_special BOOLEAN DEFAULT FALSE,
|
||||
special_name TEXT,
|
||||
discount_percent NUMERIC(5,2),
|
||||
price_unit VARCHAR(20) DEFAULT 'each',
|
||||
|
||||
-- Inventory
|
||||
is_in_stock BOOLEAN DEFAULT TRUE,
|
||||
stock_quantity INTEGER,
|
||||
stock_status VARCHAR(50) DEFAULT 'in_stock',
|
||||
|
||||
-- Potency
|
||||
thc_percent NUMERIC(5,2),
|
||||
cbd_percent NUMERIC(5,2),
|
||||
thc_mg NUMERIC(10,2),
|
||||
cbd_mg NUMERIC(10,2),
|
||||
|
||||
-- Weight/Size
|
||||
weight_value NUMERIC(10,2),
|
||||
weight_unit VARCHAR(20),
|
||||
|
||||
-- Images
|
||||
image_url TEXT,
|
||||
local_image_path TEXT,
|
||||
thumbnail_url TEXT,
|
||||
|
||||
-- Flags
|
||||
is_featured BOOLEAN DEFAULT FALSE,
|
||||
medical_only BOOLEAN DEFAULT FALSE,
|
||||
rec_only BOOLEAN DEFAULT FALSE,
|
||||
|
||||
-- Menu position (for tracking prominence)
|
||||
menu_position INTEGER,
|
||||
|
||||
-- Timestamps
|
||||
first_seen_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
last_seen_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
last_price_change_at TIMESTAMPTZ,
|
||||
last_stock_change_at TIMESTAMPTZ,
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Add unique constraint if not exists
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'store_products_dispensary_provider_product_key'
|
||||
) THEN
|
||||
ALTER TABLE store_products
|
||||
ADD CONSTRAINT store_products_dispensary_provider_product_key
|
||||
UNIQUE (dispensary_id, provider, provider_product_id);
|
||||
END IF;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN
|
||||
NULL;
|
||||
WHEN OTHERS THEN
|
||||
NULL;
|
||||
END $$;
|
||||
|
||||
-- Add FK constraints if not exist
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'store_products_dispensary_id_fkey'
|
||||
) THEN
|
||||
ALTER TABLE store_products
|
||||
ADD CONSTRAINT store_products_dispensary_id_fkey
|
||||
FOREIGN KEY (dispensary_id) REFERENCES dispensaries(id) ON DELETE CASCADE;
|
||||
END IF;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN
|
||||
NULL;
|
||||
WHEN OTHERS THEN
|
||||
NULL;
|
||||
END $$;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'store_products_state_id_fkey'
|
||||
) THEN
|
||||
ALTER TABLE store_products
|
||||
ADD CONSTRAINT store_products_state_id_fkey
|
||||
FOREIGN KEY (state_id) REFERENCES states(id) ON DELETE SET NULL;
|
||||
END IF;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN
|
||||
NULL;
|
||||
WHEN OTHERS THEN
|
||||
NULL;
|
||||
END $$;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_dispensary ON store_products(dispensary_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_state ON store_products(state_id) WHERE state_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_category ON store_products(category) WHERE category IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_brand_name ON store_products(brand_name) WHERE brand_name IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_in_stock ON store_products(dispensary_id, is_in_stock);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_special ON store_products(dispensary_id, is_on_special) WHERE is_on_special = TRUE;
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_last_seen ON store_products(last_seen_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_provider ON store_products(provider);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_enterprise ON store_products(enterprise_product_id) WHERE enterprise_product_id IS NOT NULL;
|
||||
|
||||
COMMENT ON TABLE store_products IS 'Current state of products on each dispensary menu. Provider-agnostic.';
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 6: STORE_PRODUCT_SNAPSHOTS TABLE
|
||||
-- ============================================================================
|
||||
-- Historical price/stock data. One row per product per crawl.
|
||||
-- CRITICAL: NEVER DELETE from this table.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS store_product_snapshots (
|
||||
id SERIAL PRIMARY KEY,
|
||||
dispensary_id INTEGER NOT NULL,
|
||||
store_product_id INTEGER,
|
||||
state_id INTEGER,
|
||||
|
||||
-- Provider info
|
||||
provider VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
||||
provider_product_id VARCHAR(100),
|
||||
|
||||
-- Link to crawl run
|
||||
crawl_run_id INTEGER,
|
||||
|
||||
-- Capture timestamp
|
||||
captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
-- Raw data from platform
|
||||
name VARCHAR(500),
|
||||
brand_name VARCHAR(255),
|
||||
category VARCHAR(100),
|
||||
subcategory VARCHAR(100),
|
||||
|
||||
-- Pricing at time of capture
|
||||
price_rec NUMERIC(10,2),
|
||||
price_med NUMERIC(10,2),
|
||||
price_rec_special NUMERIC(10,2),
|
||||
price_med_special NUMERIC(10,2),
|
||||
is_on_special BOOLEAN DEFAULT FALSE,
|
||||
discount_percent NUMERIC(5,2),
|
||||
|
||||
-- Inventory at time of capture
|
||||
is_in_stock BOOLEAN DEFAULT TRUE,
|
||||
stock_quantity INTEGER,
|
||||
stock_status VARCHAR(50) DEFAULT 'in_stock',
|
||||
is_present_in_feed BOOLEAN DEFAULT TRUE,
|
||||
|
||||
-- Potency at time of capture
|
||||
thc_percent NUMERIC(5,2),
|
||||
cbd_percent NUMERIC(5,2),
|
||||
|
||||
-- Menu position (for tracking prominence changes)
|
||||
menu_position INTEGER,
|
||||
|
||||
-- Image URL at time of capture
|
||||
image_url TEXT,
|
||||
|
||||
-- Full raw response for debugging
|
||||
raw_data JSONB,
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Add FK constraints if not exist
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'store_product_snapshots_dispensary_id_fkey'
|
||||
) THEN
|
||||
ALTER TABLE store_product_snapshots
|
||||
ADD CONSTRAINT store_product_snapshots_dispensary_id_fkey
|
||||
FOREIGN KEY (dispensary_id) REFERENCES dispensaries(id) ON DELETE CASCADE;
|
||||
END IF;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN
|
||||
NULL;
|
||||
WHEN OTHERS THEN
|
||||
NULL;
|
||||
END $$;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'store_product_snapshots_store_product_id_fkey'
|
||||
) THEN
|
||||
ALTER TABLE store_product_snapshots
|
||||
ADD CONSTRAINT store_product_snapshots_store_product_id_fkey
|
||||
FOREIGN KEY (store_product_id) REFERENCES store_products(id) ON DELETE SET NULL;
|
||||
END IF;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN
|
||||
NULL;
|
||||
WHEN OTHERS THEN
|
||||
NULL;
|
||||
END $$;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'store_product_snapshots_state_id_fkey'
|
||||
) THEN
|
||||
ALTER TABLE store_product_snapshots
|
||||
ADD CONSTRAINT store_product_snapshots_state_id_fkey
|
||||
FOREIGN KEY (state_id) REFERENCES states(id) ON DELETE SET NULL;
|
||||
END IF;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN
|
||||
NULL;
|
||||
WHEN OTHERS THEN
|
||||
NULL;
|
||||
END $$;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'store_product_snapshots_crawl_run_id_fkey'
|
||||
) THEN
|
||||
ALTER TABLE store_product_snapshots
|
||||
ADD CONSTRAINT store_product_snapshots_crawl_run_id_fkey
|
||||
FOREIGN KEY (crawl_run_id) REFERENCES crawl_runs(id) ON DELETE SET NULL;
|
||||
END IF;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN
|
||||
NULL;
|
||||
WHEN OTHERS THEN
|
||||
NULL;
|
||||
END $$;
|
||||
|
||||
-- Indexes optimized for analytics queries
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_dispensary_captured ON store_product_snapshots(dispensary_id, captured_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_state_captured ON store_product_snapshots(state_id, captured_at DESC) WHERE state_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_product_captured ON store_product_snapshots(store_product_id, captured_at DESC) WHERE store_product_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_crawl_run ON store_product_snapshots(crawl_run_id) WHERE crawl_run_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_captured_at ON store_product_snapshots(captured_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_brand ON store_product_snapshots(brand_name) WHERE brand_name IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_provider_product ON store_product_snapshots(provider_product_id) WHERE provider_product_id IS NOT NULL;
|
||||
|
||||
COMMENT ON TABLE store_product_snapshots IS 'Historical crawl data. One row per product per crawl. NEVER DELETE.';
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 7: VIEWS FOR BACKWARD COMPATIBILITY
|
||||
-- ============================================================================
|
||||
|
||||
-- View: Latest snapshot per store product
|
||||
CREATE OR REPLACE VIEW v_latest_store_snapshots AS
|
||||
SELECT DISTINCT ON (dispensary_id, provider_product_id)
|
||||
sps.*
|
||||
FROM store_product_snapshots sps
|
||||
ORDER BY dispensary_id, provider_product_id, captured_at DESC;
|
||||
|
||||
-- View: Crawl run summary per dispensary
|
||||
CREATE OR REPLACE VIEW v_dispensary_crawl_summary AS
|
||||
SELECT
|
||||
d.id AS dispensary_id,
|
||||
COALESCE(d.dba_name, d.name) AS dispensary_name,
|
||||
d.city,
|
||||
d.state,
|
||||
d.state_id,
|
||||
s.name AS state_name,
|
||||
COUNT(DISTINCT sp.id) AS current_product_count,
|
||||
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock) AS in_stock_count,
|
||||
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_on_special) AS on_special_count,
|
||||
MAX(cr.finished_at) AS last_crawl_at,
|
||||
(SELECT status FROM crawl_runs WHERE dispensary_id = d.id ORDER BY started_at DESC LIMIT 1) AS last_crawl_status
|
||||
FROM dispensaries d
|
||||
LEFT JOIN states s ON s.id = d.state_id
|
||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
|
||||
LEFT JOIN crawl_runs cr ON cr.dispensary_id = d.id
|
||||
GROUP BY d.id, d.dba_name, d.name, d.city, d.state, d.state_id, s.name;
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- MIGRATION 051 COMPLETE
|
||||
-- ============================================================================
|
||||
|
||||
SELECT 'Migration 051 completed successfully. Canonical schema is ready.' AS status;
|
||||
98
backend/migrations/051_create_mv_state_metrics.sql
Normal file
98
backend/migrations/051_create_mv_state_metrics.sql
Normal file
@@ -0,0 +1,98 @@
|
||||
-- Migration 051: Create materialized view for state metrics
|
||||
-- Used by Analytics V2 state endpoints for fast aggregated queries
|
||||
-- Canonical tables: states, dispensaries, store_products, store_product_snapshots, brands
|
||||
|
||||
-- Drop existing view if it exists (for clean recreation)
|
||||
DROP MATERIALIZED VIEW IF EXISTS mv_state_metrics;
|
||||
|
||||
-- Create materialized view with comprehensive state metrics
|
||||
-- Schema verified via information_schema on 2025-12-06
|
||||
-- Real columns used:
|
||||
-- states: id, code, name, recreational_legal, medical_legal, rec_year, med_year
|
||||
-- dispensaries: id, state_id (NO is_active column)
|
||||
-- store_products: id, dispensary_id, brand_id, category_raw, price_rec, price_med, is_in_stock
|
||||
-- store_product_snapshots: id, store_product_id, captured_at
|
||||
-- brands: id (joined via sp.brand_id)
|
||||
|
||||
CREATE MATERIALIZED VIEW mv_state_metrics AS
|
||||
SELECT
|
||||
s.id AS state_id,
|
||||
s.code AS state,
|
||||
s.name AS state_name,
|
||||
COALESCE(s.recreational_legal, FALSE) AS recreational_legal,
|
||||
COALESCE(s.medical_legal, FALSE) AS medical_legal,
|
||||
s.rec_year,
|
||||
s.med_year,
|
||||
|
||||
-- Dispensary metrics
|
||||
COUNT(DISTINCT d.id) AS dispensary_count,
|
||||
|
||||
-- Product metrics
|
||||
COUNT(DISTINCT sp.id) AS total_products,
|
||||
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock = TRUE) AS in_stock_products,
|
||||
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock = FALSE) AS out_of_stock_products,
|
||||
|
||||
-- Brand metrics (using brand_id FK, not brand_name)
|
||||
COUNT(DISTINCT sp.brand_id) FILTER (WHERE sp.brand_id IS NOT NULL) AS unique_brands,
|
||||
|
||||
-- Category metrics (using category_raw, not category)
|
||||
COUNT(DISTINCT sp.category_raw) FILTER (WHERE sp.category_raw IS NOT NULL) AS unique_categories,
|
||||
|
||||
-- Pricing metrics (recreational)
|
||||
AVG(sp.price_rec) FILTER (WHERE sp.price_rec IS NOT NULL AND sp.is_in_stock = TRUE) AS avg_price_rec,
|
||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)
|
||||
FILTER (WHERE sp.price_rec IS NOT NULL AND sp.is_in_stock = TRUE) AS median_price_rec,
|
||||
MIN(sp.price_rec) FILTER (WHERE sp.price_rec IS NOT NULL AND sp.is_in_stock = TRUE) AS min_price_rec,
|
||||
MAX(sp.price_rec) FILTER (WHERE sp.price_rec IS NOT NULL AND sp.is_in_stock = TRUE) AS max_price_rec,
|
||||
|
||||
-- Pricing metrics (medical)
|
||||
AVG(sp.price_med) FILTER (WHERE sp.price_med IS NOT NULL AND sp.is_in_stock = TRUE) AS avg_price_med,
|
||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_med)
|
||||
FILTER (WHERE sp.price_med IS NOT NULL AND sp.is_in_stock = TRUE) AS median_price_med,
|
||||
|
||||
-- Snapshot/crawl metrics
|
||||
COUNT(sps.id) AS total_snapshots,
|
||||
MAX(sps.captured_at) AS last_crawl_at,
|
||||
MIN(sps.captured_at) AS first_crawl_at,
|
||||
|
||||
-- Data freshness
|
||||
CASE
|
||||
WHEN MAX(sps.captured_at) > NOW() - INTERVAL '24 hours' THEN 'fresh'
|
||||
WHEN MAX(sps.captured_at) > NOW() - INTERVAL '7 days' THEN 'recent'
|
||||
WHEN MAX(sps.captured_at) IS NOT NULL THEN 'stale'
|
||||
ELSE 'no_data'
|
||||
END AS data_freshness,
|
||||
|
||||
-- Metadata
|
||||
NOW() AS refreshed_at
|
||||
|
||||
FROM states s
|
||||
LEFT JOIN dispensaries d ON d.state_id = s.id
|
||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
|
||||
LEFT JOIN store_product_snapshots sps ON sps.store_product_id = sp.id
|
||||
GROUP BY s.id, s.code, s.name, s.recreational_legal, s.medical_legal, s.rec_year, s.med_year;
|
||||
|
||||
-- Create unique index on state code for fast lookups
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS mv_state_metrics_state_idx
|
||||
ON mv_state_metrics (state);
|
||||
|
||||
-- Create index on state_id for joins
|
||||
CREATE INDEX IF NOT EXISTS mv_state_metrics_state_id_idx
|
||||
ON mv_state_metrics (state_id);
|
||||
|
||||
-- Create index for legal status filtering
|
||||
CREATE INDEX IF NOT EXISTS mv_state_metrics_legal_idx
|
||||
ON mv_state_metrics (recreational_legal, medical_legal);
|
||||
|
||||
-- Create index for data freshness queries
|
||||
CREATE INDEX IF NOT EXISTS mv_state_metrics_freshness_idx
|
||||
ON mv_state_metrics (data_freshness);
|
||||
|
||||
-- Comment on the view
|
||||
COMMENT ON MATERIALIZED VIEW mv_state_metrics IS
|
||||
'Aggregated state-level metrics for Analytics V2 endpoints. Refresh periodically with: REFRESH MATERIALIZED VIEW CONCURRENTLY mv_state_metrics;';
|
||||
|
||||
-- Record migration
|
||||
INSERT INTO schema_migrations (version, name, applied_at)
|
||||
VALUES ('051', 'create_mv_state_metrics', NOW())
|
||||
ON CONFLICT (version) DO NOTHING;
|
||||
96
backend/migrations/052_add_provider_data_columns.sql
Normal file
96
backend/migrations/052_add_provider_data_columns.sql
Normal file
@@ -0,0 +1,96 @@
|
||||
-- Migration 052: Add provider_data JSONB and frequently-queried columns
|
||||
--
|
||||
-- Adds hybrid storage for legacy data:
|
||||
-- 1. provider_data JSONB on both tables for all extra fields
|
||||
-- 2. Specific columns for frequently-queried fields
|
||||
|
||||
-- ============================================================================
|
||||
-- store_products: Add provider_data and queryable columns
|
||||
-- ============================================================================
|
||||
|
||||
-- JSONB for all extra provider-specific data
|
||||
ALTER TABLE store_products
|
||||
ADD COLUMN IF NOT EXISTS provider_data JSONB;
|
||||
|
||||
-- Frequently-queried columns
|
||||
ALTER TABLE store_products
|
||||
ADD COLUMN IF NOT EXISTS strain_type TEXT;
|
||||
|
||||
ALTER TABLE store_products
|
||||
ADD COLUMN IF NOT EXISTS medical_only BOOLEAN DEFAULT FALSE;
|
||||
|
||||
ALTER TABLE store_products
|
||||
ADD COLUMN IF NOT EXISTS rec_only BOOLEAN DEFAULT FALSE;
|
||||
|
||||
ALTER TABLE store_products
|
||||
ADD COLUMN IF NOT EXISTS brand_logo_url TEXT;
|
||||
|
||||
ALTER TABLE store_products
|
||||
ADD COLUMN IF NOT EXISTS platform_dispensary_id TEXT;
|
||||
|
||||
-- Index for strain_type queries
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_strain_type
|
||||
ON store_products(strain_type)
|
||||
WHERE strain_type IS NOT NULL;
|
||||
|
||||
-- Index for medical/rec filtering
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_medical_rec
|
||||
ON store_products(medical_only, rec_only);
|
||||
|
||||
-- GIN index for provider_data JSONB queries
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_provider_data
|
||||
ON store_products USING GIN (provider_data);
|
||||
|
||||
-- ============================================================================
|
||||
-- store_product_snapshots: Add provider_data and queryable columns
|
||||
-- ============================================================================
|
||||
|
||||
-- JSONB for all extra provider-specific data
|
||||
ALTER TABLE store_product_snapshots
|
||||
ADD COLUMN IF NOT EXISTS provider_data JSONB;
|
||||
|
||||
-- Frequently-queried columns
|
||||
ALTER TABLE store_product_snapshots
|
||||
ADD COLUMN IF NOT EXISTS featured BOOLEAN DEFAULT FALSE;
|
||||
|
||||
ALTER TABLE store_product_snapshots
|
||||
ADD COLUMN IF NOT EXISTS is_below_threshold BOOLEAN DEFAULT FALSE;
|
||||
|
||||
ALTER TABLE store_product_snapshots
|
||||
ADD COLUMN IF NOT EXISTS is_below_kiosk_threshold BOOLEAN DEFAULT FALSE;
|
||||
|
||||
-- Index for featured products
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_featured
|
||||
ON store_product_snapshots(dispensary_id, featured)
|
||||
WHERE featured = TRUE;
|
||||
|
||||
-- Index for low stock alerts
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_below_threshold
|
||||
ON store_product_snapshots(dispensary_id, is_below_threshold)
|
||||
WHERE is_below_threshold = TRUE;
|
||||
|
||||
-- GIN index for provider_data JSONB queries
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_provider_data
|
||||
ON store_product_snapshots USING GIN (provider_data);
|
||||
|
||||
-- ============================================================================
|
||||
-- Comments for documentation
|
||||
-- ============================================================================
|
||||
|
||||
COMMENT ON COLUMN store_products.provider_data IS
|
||||
'JSONB blob containing all provider-specific fields not in canonical columns (effects, terpenes, cannabinoids_v2, etc.)';
|
||||
|
||||
COMMENT ON COLUMN store_products.strain_type IS
|
||||
'Cannabis strain type: Indica, Sativa, Hybrid, Indica-Hybrid, Sativa-Hybrid';
|
||||
|
||||
COMMENT ON COLUMN store_products.platform_dispensary_id IS
|
||||
'Provider platform dispensary ID (e.g., Dutchie MongoDB ObjectId)';
|
||||
|
||||
COMMENT ON COLUMN store_product_snapshots.provider_data IS
|
||||
'JSONB blob containing all provider-specific snapshot fields (options, kiosk data, etc.)';
|
||||
|
||||
COMMENT ON COLUMN store_product_snapshots.featured IS
|
||||
'Whether product was featured/highlighted at capture time';
|
||||
|
||||
COMMENT ON COLUMN store_product_snapshots.is_below_threshold IS
|
||||
'Whether product was below inventory threshold at capture time';
|
||||
127
backend/migrations/052_add_state_cannabis_flags.sql
Normal file
127
backend/migrations/052_add_state_cannabis_flags.sql
Normal file
@@ -0,0 +1,127 @@
|
||||
-- ============================================================================
|
||||
-- Migration 052: Add Cannabis Legalization Flags to States
|
||||
-- ============================================================================
|
||||
--
|
||||
-- Purpose: Add recreational/medical cannabis legalization status and years
|
||||
-- to the existing states table, then seed all 50 states + DC.
|
||||
--
|
||||
-- SAFETY RULES:
|
||||
-- - Uses ADD COLUMN IF NOT EXISTS (idempotent)
|
||||
-- - Uses INSERT ... ON CONFLICT (code) DO UPDATE (idempotent)
|
||||
-- - NO DROP, DELETE, TRUNCATE, or destructive operations
|
||||
-- - Safe to run multiple times
|
||||
--
|
||||
-- Run with:
|
||||
-- psql "$DATABASE_URL" -f migrations/052_add_state_cannabis_flags.sql
|
||||
--
|
||||
-- ============================================================================
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 1: Add cannabis legalization columns
|
||||
-- ============================================================================
|
||||
|
||||
ALTER TABLE states ADD COLUMN IF NOT EXISTS recreational_legal BOOLEAN;
|
||||
ALTER TABLE states ADD COLUMN IF NOT EXISTS rec_year INTEGER;
|
||||
ALTER TABLE states ADD COLUMN IF NOT EXISTS medical_legal BOOLEAN;
|
||||
ALTER TABLE states ADD COLUMN IF NOT EXISTS med_year INTEGER;
|
||||
|
||||
COMMENT ON COLUMN states.recreational_legal IS 'Whether recreational cannabis is legal in this state';
|
||||
COMMENT ON COLUMN states.rec_year IS 'Year recreational cannabis was legalized (NULL if not legal)';
|
||||
COMMENT ON COLUMN states.medical_legal IS 'Whether medical cannabis is legal in this state';
|
||||
COMMENT ON COLUMN states.med_year IS 'Year medical cannabis was legalized (NULL if not legal)';
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 2: Seed all 50 states + DC with cannabis legalization data
|
||||
-- ============================================================================
|
||||
-- Data sourced from state legalization records as of 2024
|
||||
-- States ordered by medical legalization year, then alphabetically
|
||||
|
||||
INSERT INTO states (code, name, timezone, recreational_legal, rec_year, medical_legal, med_year)
|
||||
VALUES
|
||||
-- Recreational + Medical States (ordered by rec year)
|
||||
('WA', 'Washington', 'America/Los_Angeles', TRUE, 2012, TRUE, 1998),
|
||||
('CO', 'Colorado', 'America/Denver', TRUE, 2012, TRUE, 2000),
|
||||
('AK', 'Alaska', 'America/Anchorage', TRUE, 2014, TRUE, 1998),
|
||||
('OR', 'Oregon', 'America/Los_Angeles', TRUE, 2014, TRUE, 1998),
|
||||
('DC', 'District of Columbia', 'America/New_York', TRUE, 2015, TRUE, 2011),
|
||||
('CA', 'California', 'America/Los_Angeles', TRUE, 2016, TRUE, 1996),
|
||||
('NV', 'Nevada', 'America/Los_Angeles', TRUE, 2016, TRUE, 1998),
|
||||
('ME', 'Maine', 'America/New_York', TRUE, 2016, TRUE, 1999),
|
||||
('MA', 'Massachusetts', 'America/New_York', TRUE, 2016, TRUE, 2012),
|
||||
('MI', 'Michigan', 'America/Detroit', TRUE, 2018, TRUE, 2008),
|
||||
('IL', 'Illinois', 'America/Chicago', TRUE, 2019, TRUE, 2013),
|
||||
('AZ', 'Arizona', 'America/Phoenix', TRUE, 2020, TRUE, 2010),
|
||||
('MT', 'Montana', 'America/Denver', TRUE, 2020, TRUE, 2004),
|
||||
('NJ', 'New Jersey', 'America/New_York', TRUE, 2020, TRUE, 2010),
|
||||
('VT', 'Vermont', 'America/New_York', TRUE, 2020, TRUE, 2004),
|
||||
('CT', 'Connecticut', 'America/New_York', TRUE, 2021, TRUE, 2012),
|
||||
('NM', 'New Mexico', 'America/Denver', TRUE, 2021, TRUE, 2007),
|
||||
('NY', 'New York', 'America/New_York', TRUE, 2021, TRUE, 2014),
|
||||
('VA', 'Virginia', 'America/New_York', TRUE, 2021, TRUE, 2020),
|
||||
('MD', 'Maryland', 'America/New_York', TRUE, 2022, TRUE, 2013),
|
||||
('MO', 'Missouri', 'America/Chicago', TRUE, 2022, TRUE, 2018),
|
||||
('RI', 'Rhode Island', 'America/New_York', TRUE, 2022, TRUE, 2006),
|
||||
('DE', 'Delaware', 'America/New_York', TRUE, 2023, TRUE, 2011),
|
||||
('MN', 'Minnesota', 'America/Chicago', TRUE, 2023, TRUE, 2014),
|
||||
('OH', 'Ohio', 'America/New_York', TRUE, 2023, TRUE, 2016),
|
||||
|
||||
-- Medical Only States (no recreational)
|
||||
('HI', 'Hawaii', 'Pacific/Honolulu', FALSE, NULL, TRUE, 2000),
|
||||
('NH', 'New Hampshire', 'America/New_York', FALSE, NULL, TRUE, 2013),
|
||||
('GA', 'Georgia', 'America/New_York', FALSE, NULL, TRUE, 2015),
|
||||
('LA', 'Louisiana', 'America/Chicago', FALSE, NULL, TRUE, 2015),
|
||||
('TX', 'Texas', 'America/Chicago', FALSE, NULL, TRUE, 2015),
|
||||
('AR', 'Arkansas', 'America/Chicago', FALSE, NULL, TRUE, 2016),
|
||||
('FL', 'Florida', 'America/New_York', FALSE, NULL, TRUE, 2016),
|
||||
('ND', 'North Dakota', 'America/Chicago', FALSE, NULL, TRUE, 2016),
|
||||
('PA', 'Pennsylvania', 'America/New_York', FALSE, NULL, TRUE, 2016),
|
||||
('IA', 'Iowa', 'America/Chicago', FALSE, NULL, TRUE, 2017),
|
||||
('WV', 'West Virginia', 'America/New_York', FALSE, NULL, TRUE, 2017),
|
||||
('OK', 'Oklahoma', 'America/Chicago', FALSE, NULL, TRUE, 2018),
|
||||
('UT', 'Utah', 'America/Denver', FALSE, NULL, TRUE, 2018),
|
||||
('SD', 'South Dakota', 'America/Chicago', FALSE, NULL, TRUE, 2020),
|
||||
('AL', 'Alabama', 'America/Chicago', FALSE, NULL, TRUE, 2021),
|
||||
('MS', 'Mississippi', 'America/Chicago', FALSE, NULL, TRUE, 2022),
|
||||
('KY', 'Kentucky', 'America/New_York', FALSE, NULL, TRUE, 2023),
|
||||
('NE', 'Nebraska', 'America/Chicago', FALSE, NULL, TRUE, 2024),
|
||||
|
||||
-- No Cannabis Programs (neither rec nor medical)
|
||||
('ID', 'Idaho', 'America/Boise', FALSE, NULL, FALSE, NULL),
|
||||
('IN', 'Indiana', 'America/Indiana/Indianapolis', FALSE, NULL, FALSE, NULL),
|
||||
('KS', 'Kansas', 'America/Chicago', FALSE, NULL, FALSE, NULL),
|
||||
('NC', 'North Carolina', 'America/New_York', FALSE, NULL, FALSE, NULL),
|
||||
('SC', 'South Carolina', 'America/New_York', FALSE, NULL, FALSE, NULL),
|
||||
('TN', 'Tennessee', 'America/Chicago', FALSE, NULL, FALSE, NULL),
|
||||
('WI', 'Wisconsin', 'America/Chicago', FALSE, NULL, FALSE, NULL),
|
||||
('WY', 'Wyoming', 'America/Denver', FALSE, NULL, FALSE, NULL)
|
||||
|
||||
ON CONFLICT (code) DO UPDATE SET
|
||||
name = EXCLUDED.name,
|
||||
timezone = COALESCE(states.timezone, EXCLUDED.timezone),
|
||||
recreational_legal = EXCLUDED.recreational_legal,
|
||||
rec_year = EXCLUDED.rec_year,
|
||||
medical_legal = EXCLUDED.medical_legal,
|
||||
med_year = EXCLUDED.med_year,
|
||||
updated_at = NOW();
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 3: Add indexes for common queries
|
||||
-- ============================================================================
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_states_recreational ON states(recreational_legal) WHERE recreational_legal = TRUE;
|
||||
CREATE INDEX IF NOT EXISTS idx_states_medical ON states(medical_legal) WHERE medical_legal = TRUE;
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 4: Verification query (informational only)
|
||||
-- ============================================================================
|
||||
|
||||
SELECT
|
||||
'Migration 052 completed successfully.' AS status,
|
||||
(SELECT COUNT(*) FROM states WHERE recreational_legal = TRUE) AS rec_states,
|
||||
(SELECT COUNT(*) FROM states WHERE medical_legal = TRUE AND recreational_legal = FALSE) AS med_only_states,
|
||||
(SELECT COUNT(*) FROM states WHERE medical_legal = FALSE OR medical_legal IS NULL) AS no_program_states,
|
||||
(SELECT COUNT(*) FROM states) AS total_states;
|
||||
249
backend/migrations/052_hydration_schema_alignment.sql
Normal file
249
backend/migrations/052_hydration_schema_alignment.sql
Normal file
@@ -0,0 +1,249 @@
|
||||
-- ============================================================================
|
||||
-- Migration 052: Hydration Schema Alignment
|
||||
-- ============================================================================
|
||||
--
|
||||
-- Purpose: Add columns to canonical tables needed for hydration from
|
||||
-- dutchie_products and dutchie_product_snapshots.
|
||||
--
|
||||
-- This migration ensures store_products and store_product_snapshots can
|
||||
-- receive all data from the legacy dutchie_* tables.
|
||||
--
|
||||
-- SAFETY RULES:
|
||||
-- - ALL columns use ADD COLUMN IF NOT EXISTS
|
||||
-- - NO DROP, DELETE, TRUNCATE, or destructive operations
|
||||
-- - Fully idempotent - safe to run multiple times
|
||||
--
|
||||
-- Run with:
|
||||
-- psql "postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \
|
||||
-- -f migrations/052_hydration_schema_alignment.sql
|
||||
--
|
||||
-- ============================================================================
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 1: store_products - Additional columns from dutchie_products
|
||||
-- ============================================================================
|
||||
|
||||
-- Brand ID from Dutchie GraphQL (brandId field)
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS provider_brand_id VARCHAR(100);
|
||||
|
||||
-- Legacy dutchie_products.id for cross-reference during migration
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS legacy_dutchie_product_id INTEGER;
|
||||
|
||||
-- THC/CBD content as text (from dutchie_products.thc_content/cbd_content)
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS thc_content_text VARCHAR(50);
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cbd_content_text VARCHAR(50);
|
||||
|
||||
-- Full cannabinoid data
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cannabinoids JSONB;
|
||||
|
||||
-- Effects array
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS effects TEXT[];
|
||||
|
||||
-- Type (Flower, Edible, etc.) - maps to category in legacy
|
||||
-- Already have category VARCHAR(100), but type may differ
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS product_type VARCHAR(100);
|
||||
|
||||
-- Additional images array
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS additional_images TEXT[];
|
||||
|
||||
-- Local image paths (from 032 migration)
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS local_image_url TEXT;
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS local_image_thumb_url TEXT;
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS local_image_medium_url TEXT;
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS original_image_url TEXT;
|
||||
|
||||
-- Status from Dutchie (Active/Inactive)
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS platform_status VARCHAR(20);
|
||||
|
||||
-- Threshold flags
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS is_below_threshold BOOLEAN DEFAULT FALSE;
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS is_below_kiosk_threshold BOOLEAN DEFAULT FALSE;
|
||||
|
||||
-- cName / slug from Dutchie
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS c_name VARCHAR(255);
|
||||
|
||||
-- Coming soon flag
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS is_coming_soon BOOLEAN DEFAULT FALSE;
|
||||
|
||||
-- Provider column already exists, ensure we have provider_dispensary_id
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS provider_dispensary_id VARCHAR(100);
|
||||
|
||||
-- Enterprise product ID (cross-store product linking)
|
||||
-- Already exists from migration 051
|
||||
|
||||
-- Total quantity available (from POSMetaData.children)
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS total_quantity_available INTEGER;
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS total_kiosk_quantity_available INTEGER;
|
||||
|
||||
-- Weight
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS weight VARCHAR(50);
|
||||
|
||||
-- Options array (size/weight options)
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS options TEXT[];
|
||||
|
||||
-- Measurements
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS measurements JSONB;
|
||||
|
||||
-- Raw data from last crawl
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS raw_data JSONB;
|
||||
|
||||
-- Source timestamps from Dutchie
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS source_created_at TIMESTAMPTZ;
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS source_updated_at TIMESTAMPTZ;
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 2: store_product_snapshots - Additional columns for hydration
|
||||
-- ============================================================================
|
||||
|
||||
-- Legacy dutchie_product_snapshot.id for cross-reference
|
||||
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS legacy_snapshot_id INTEGER;
|
||||
|
||||
-- Legacy dutchie_product_id reference
|
||||
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS legacy_dutchie_product_id INTEGER;
|
||||
|
||||
-- Options JSONB from dutchie_product_snapshots
|
||||
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS options JSONB;
|
||||
|
||||
-- Provider dispensary ID
|
||||
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS provider_dispensary_id VARCHAR(100);
|
||||
|
||||
-- Inventory details
|
||||
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS total_quantity_available INTEGER;
|
||||
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS total_kiosk_quantity_available INTEGER;
|
||||
|
||||
-- Platform status at time of snapshot
|
||||
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS platform_status VARCHAR(20);
|
||||
|
||||
-- Threshold flags at time of snapshot
|
||||
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS is_below_threshold BOOLEAN DEFAULT FALSE;
|
||||
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS is_below_kiosk_threshold BOOLEAN DEFAULT FALSE;
|
||||
|
||||
-- Special data
|
||||
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS special_data JSONB;
|
||||
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS special_name TEXT;
|
||||
|
||||
-- Pricing mode (rec/med)
|
||||
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS pricing_type VARCHAR(10);
|
||||
|
||||
-- Crawl mode (mode_a/mode_b)
|
||||
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS crawl_mode VARCHAR(20);
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 3: crawl_runs - Additional columns for hydration
|
||||
-- ============================================================================
|
||||
|
||||
-- Legacy job ID references
|
||||
ALTER TABLE crawl_runs ADD COLUMN IF NOT EXISTS legacy_dispensary_crawl_job_id INTEGER;
|
||||
ALTER TABLE crawl_runs ADD COLUMN IF NOT EXISTS legacy_job_run_log_id INTEGER;
|
||||
|
||||
-- Schedule reference
|
||||
ALTER TABLE crawl_runs ADD COLUMN IF NOT EXISTS schedule_id INTEGER;
|
||||
|
||||
-- Job type
|
||||
ALTER TABLE crawl_runs ADD COLUMN IF NOT EXISTS job_type VARCHAR(50);
|
||||
|
||||
-- Brands found count
|
||||
ALTER TABLE crawl_runs ADD COLUMN IF NOT EXISTS brands_found INTEGER DEFAULT 0;
|
||||
|
||||
-- Retry count
|
||||
ALTER TABLE crawl_runs ADD COLUMN IF NOT EXISTS retry_count INTEGER DEFAULT 0;
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 4: INDEXES for hydration queries
|
||||
-- ============================================================================
|
||||
|
||||
-- Index on legacy IDs for migration lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_legacy_id
|
||||
ON store_products(legacy_dutchie_product_id)
|
||||
WHERE legacy_dutchie_product_id IS NOT NULL;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_legacy_id
|
||||
ON store_product_snapshots(legacy_snapshot_id)
|
||||
WHERE legacy_snapshot_id IS NOT NULL;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_legacy_product_id
|
||||
ON store_product_snapshots(legacy_dutchie_product_id)
|
||||
WHERE legacy_dutchie_product_id IS NOT NULL;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_crawl_runs_legacy_job_id
|
||||
ON crawl_runs(legacy_dispensary_crawl_job_id)
|
||||
WHERE legacy_dispensary_crawl_job_id IS NOT NULL;
|
||||
|
||||
-- Index on provider_product_id for upserts
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_provider_id
|
||||
ON store_products(provider_product_id);
|
||||
|
||||
-- Composite index for canonical key lookup
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_canonical_key
|
||||
ON store_products(dispensary_id, provider, provider_product_id);
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 5: Unique constraint for idempotent hydration
|
||||
-- ============================================================================
|
||||
|
||||
-- Ensure unique snapshots per product per crawl
|
||||
-- This prevents duplicate snapshots during re-runs
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'store_product_snapshots_unique_per_crawl'
|
||||
) THEN
|
||||
-- Can't add unique constraint on nullable columns directly,
|
||||
-- so we use a partial unique index instead
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_snapshots_unique_per_crawl
|
||||
ON store_product_snapshots(store_product_id, crawl_run_id)
|
||||
WHERE store_product_id IS NOT NULL AND crawl_run_id IS NOT NULL;
|
||||
END IF;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN NULL;
|
||||
WHEN OTHERS THEN NULL;
|
||||
END $$;
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 6: View for hydration status monitoring
|
||||
-- ============================================================================
|
||||
|
||||
CREATE OR REPLACE VIEW v_hydration_status AS
|
||||
SELECT
|
||||
'dutchie_products' AS source_table,
|
||||
(SELECT COUNT(*) FROM dutchie_products) AS source_count,
|
||||
(SELECT COUNT(*) FROM store_products WHERE legacy_dutchie_product_id IS NOT NULL) AS hydrated_count,
|
||||
ROUND(
|
||||
100.0 * (SELECT COUNT(*) FROM store_products WHERE legacy_dutchie_product_id IS NOT NULL) /
|
||||
NULLIF((SELECT COUNT(*) FROM dutchie_products), 0),
|
||||
2
|
||||
) AS hydration_pct
|
||||
UNION ALL
|
||||
SELECT
|
||||
'dutchie_product_snapshots' AS source_table,
|
||||
(SELECT COUNT(*) FROM dutchie_product_snapshots) AS source_count,
|
||||
(SELECT COUNT(*) FROM store_product_snapshots WHERE legacy_snapshot_id IS NOT NULL) AS hydrated_count,
|
||||
ROUND(
|
||||
100.0 * (SELECT COUNT(*) FROM store_product_snapshots WHERE legacy_snapshot_id IS NOT NULL) /
|
||||
NULLIF((SELECT COUNT(*) FROM dutchie_product_snapshots), 0),
|
||||
2
|
||||
) AS hydration_pct
|
||||
UNION ALL
|
||||
SELECT
|
||||
'dispensary_crawl_jobs' AS source_table,
|
||||
(SELECT COUNT(*) FROM dispensary_crawl_jobs WHERE status = 'completed') AS source_count,
|
||||
(SELECT COUNT(*) FROM crawl_runs WHERE legacy_dispensary_crawl_job_id IS NOT NULL) AS hydrated_count,
|
||||
ROUND(
|
||||
100.0 * (SELECT COUNT(*) FROM crawl_runs WHERE legacy_dispensary_crawl_job_id IS NOT NULL) /
|
||||
NULLIF((SELECT COUNT(*) FROM dispensary_crawl_jobs WHERE status = 'completed'), 0),
|
||||
2
|
||||
) AS hydration_pct;
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- DONE
|
||||
-- ============================================================================
|
||||
|
||||
SELECT 'Migration 052 completed successfully. Hydration schema aligned.' AS status;
|
||||
157
backend/migrations/053_analytics_indexes.sql
Normal file
157
backend/migrations/053_analytics_indexes.sql
Normal file
@@ -0,0 +1,157 @@
|
||||
-- ============================================================================
|
||||
-- Migration 053: Analytics Engine Indexes
|
||||
-- ============================================================================
|
||||
--
|
||||
-- Purpose: Add indexes optimized for analytics queries on canonical tables.
|
||||
-- These indexes support price trends, brand penetration, category
|
||||
-- growth, and state-level analytics.
|
||||
--
|
||||
-- SAFETY RULES:
|
||||
-- - Uses CREATE INDEX IF NOT EXISTS (idempotent)
|
||||
-- - Uses ADD COLUMN IF NOT EXISTS for helper columns
|
||||
-- - NO DROP, DELETE, TRUNCATE, or destructive operations
|
||||
-- - Safe to run multiple times
|
||||
--
|
||||
-- Run with:
|
||||
-- psql "$DATABASE_URL" -f migrations/053_analytics_indexes.sql
|
||||
--
|
||||
-- ============================================================================
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 1: Helper columns for analytics (if missing)
|
||||
-- ============================================================================
|
||||
|
||||
-- Ensure store_products has brand_id for faster brand analytics joins
|
||||
-- (brand_name exists, but a normalized brand_id helps)
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS brand_id INTEGER;
|
||||
|
||||
-- Ensure snapshots have category for time-series category analytics
|
||||
ALTER TABLE store_product_snapshots ADD COLUMN IF NOT EXISTS category VARCHAR(100);
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 2: Price Analytics Indexes
|
||||
-- ============================================================================
|
||||
|
||||
-- Price trends by store_product over time
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_product_price_time
|
||||
ON store_product_snapshots(store_product_id, captured_at DESC, price_rec, price_med)
|
||||
WHERE store_product_id IS NOT NULL;
|
||||
|
||||
-- Price by category over time (for category price trends)
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_category_price_time
|
||||
ON store_product_snapshots(category, captured_at DESC, price_rec)
|
||||
WHERE category IS NOT NULL;
|
||||
|
||||
-- Price changes detection (for volatility analysis)
|
||||
CREATE INDEX IF NOT EXISTS idx_products_price_change
|
||||
ON store_products(last_price_change_at DESC)
|
||||
WHERE last_price_change_at IS NOT NULL;
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 3: Brand Penetration Indexes
|
||||
-- ============================================================================
|
||||
|
||||
-- Brand by dispensary (for penetration counts)
|
||||
CREATE INDEX IF NOT EXISTS idx_products_brand_dispensary
|
||||
ON store_products(brand_name, dispensary_id)
|
||||
WHERE brand_name IS NOT NULL;
|
||||
|
||||
-- Brand by state (for state-level brand analytics)
|
||||
CREATE INDEX IF NOT EXISTS idx_products_brand_state
|
||||
ON store_products(brand_name, state_id)
|
||||
WHERE brand_name IS NOT NULL AND state_id IS NOT NULL;
|
||||
|
||||
-- Brand first/last seen (for penetration trends)
|
||||
CREATE INDEX IF NOT EXISTS idx_products_brand_first_seen
|
||||
ON store_products(brand_name, first_seen_at)
|
||||
WHERE brand_name IS NOT NULL;
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 4: Category Analytics Indexes
|
||||
-- ============================================================================
|
||||
|
||||
-- Category by state (for state-level category analytics)
|
||||
CREATE INDEX IF NOT EXISTS idx_products_category_state
|
||||
ON store_products(category, state_id)
|
||||
WHERE category IS NOT NULL;
|
||||
|
||||
-- Category by dispensary
|
||||
CREATE INDEX IF NOT EXISTS idx_products_category_dispensary
|
||||
ON store_products(category, dispensary_id)
|
||||
WHERE category IS NOT NULL;
|
||||
|
||||
-- Category first seen (for growth tracking)
|
||||
CREATE INDEX IF NOT EXISTS idx_products_category_first_seen
|
||||
ON store_products(category, first_seen_at)
|
||||
WHERE category IS NOT NULL;
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 5: Store Analytics Indexes
|
||||
-- ============================================================================
|
||||
|
||||
-- Products added/removed by dispensary
|
||||
CREATE INDEX IF NOT EXISTS idx_products_dispensary_first_seen
|
||||
ON store_products(dispensary_id, first_seen_at DESC);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_products_dispensary_last_seen
|
||||
ON store_products(dispensary_id, last_seen_at DESC);
|
||||
|
||||
-- Stock status changes
|
||||
CREATE INDEX IF NOT EXISTS idx_products_stock_change
|
||||
ON store_products(dispensary_id, last_stock_change_at DESC)
|
||||
WHERE last_stock_change_at IS NOT NULL;
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 6: State Analytics Indexes
|
||||
-- ============================================================================
|
||||
|
||||
-- Dispensary count by state
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_state_active
|
||||
ON dispensaries(state_id)
|
||||
WHERE state_id IS NOT NULL;
|
||||
|
||||
-- Products by state
|
||||
CREATE INDEX IF NOT EXISTS idx_products_state_active
|
||||
ON store_products(state_id, is_in_stock)
|
||||
WHERE state_id IS NOT NULL;
|
||||
|
||||
-- Snapshots by state for time-series
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_state_time
|
||||
ON store_product_snapshots(state_id, captured_at DESC)
|
||||
WHERE state_id IS NOT NULL;
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 7: Composite indexes for common analytics queries
|
||||
-- ============================================================================
|
||||
|
||||
-- Brand + Category + State (for market share calculations)
|
||||
CREATE INDEX IF NOT EXISTS idx_products_brand_category_state
|
||||
ON store_products(brand_name, category, state_id)
|
||||
WHERE brand_name IS NOT NULL AND category IS NOT NULL;
|
||||
|
||||
-- Dispensary + Category + Brand (for store-level brand analysis)
|
||||
CREATE INDEX IF NOT EXISTS idx_products_disp_cat_brand
|
||||
ON store_products(dispensary_id, category, brand_name)
|
||||
WHERE category IS NOT NULL;
|
||||
|
||||
-- Special pricing by category (for promo analysis)
|
||||
CREATE INDEX IF NOT EXISTS idx_products_special_category
|
||||
ON store_products(category, is_on_special)
|
||||
WHERE is_on_special = TRUE;
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 8: Verification
|
||||
-- ============================================================================
|
||||
|
||||
SELECT
|
||||
'Migration 053 completed successfully.' AS status,
|
||||
(SELECT COUNT(*) FROM pg_indexes WHERE indexname LIKE 'idx_products_%') AS product_indexes,
|
||||
(SELECT COUNT(*) FROM pg_indexes WHERE indexname LIKE 'idx_snapshots_%') AS snapshot_indexes;
|
||||
346
backend/migrations/053_dutchie_discovery_schema.sql
Normal file
346
backend/migrations/053_dutchie_discovery_schema.sql
Normal file
@@ -0,0 +1,346 @@
|
||||
-- ============================================================================
|
||||
-- Migration 053: Dutchie Discovery Schema
|
||||
-- ============================================================================
|
||||
--
|
||||
-- Purpose: Create tables for Dutchie store discovery workflow.
|
||||
-- Stores are discovered and held in staging tables until verified,
|
||||
-- then promoted to the canonical dispensaries table.
|
||||
--
|
||||
-- Tables Created:
|
||||
-- - dutchie_discovery_cities: City pages from Dutchie
|
||||
-- - dutchie_discovery_locations: Individual store locations
|
||||
--
|
||||
-- SAFETY RULES:
|
||||
-- - ALL tables use CREATE TABLE IF NOT EXISTS
|
||||
-- - NO DROP, DELETE, TRUNCATE, or destructive operations
|
||||
-- - Does NOT touch canonical dispensaries table
|
||||
-- - Fully idempotent - safe to run multiple times
|
||||
--
|
||||
-- Run with:
|
||||
-- psql "postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \
|
||||
-- -f migrations/053_dutchie_discovery_schema.sql
|
||||
--
|
||||
-- ============================================================================
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 1: DUTCHIE_DISCOVERY_CITIES
|
||||
-- ============================================================================
|
||||
-- Stores Dutchie city pages for systematic crawling.
|
||||
-- Each city can contain multiple dispensary locations.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS dutchie_discovery_cities (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
|
||||
-- Platform identification (future-proof for other platforms)
|
||||
platform TEXT NOT NULL DEFAULT 'dutchie',
|
||||
|
||||
-- City identification
|
||||
city_name TEXT NOT NULL,
|
||||
city_slug TEXT NOT NULL,
|
||||
state_code TEXT, -- 'AZ', 'CA', 'ON', etc.
|
||||
country_code TEXT NOT NULL DEFAULT 'US',
|
||||
|
||||
-- Crawl management
|
||||
last_crawled_at TIMESTAMPTZ,
|
||||
crawl_enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
location_count INTEGER, -- Number of locations found in this city
|
||||
|
||||
-- Metadata
|
||||
notes TEXT,
|
||||
metadata JSONB,
|
||||
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Add unique constraint if not exists
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'dutchie_discovery_cities_unique'
|
||||
) THEN
|
||||
ALTER TABLE dutchie_discovery_cities
|
||||
ADD CONSTRAINT dutchie_discovery_cities_unique
|
||||
UNIQUE (platform, country_code, state_code, city_slug);
|
||||
END IF;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN NULL;
|
||||
WHEN OTHERS THEN NULL;
|
||||
END $$;
|
||||
|
||||
-- Indexes
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_cities_platform
|
||||
ON dutchie_discovery_cities(platform);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_cities_state
|
||||
ON dutchie_discovery_cities(country_code, state_code);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_cities_crawl_enabled
|
||||
ON dutchie_discovery_cities(crawl_enabled)
|
||||
WHERE crawl_enabled = TRUE;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_cities_last_crawled
|
||||
ON dutchie_discovery_cities(last_crawled_at);
|
||||
|
||||
COMMENT ON TABLE dutchie_discovery_cities IS 'City pages from Dutchie for systematic store discovery.';
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 2: DUTCHIE_DISCOVERY_LOCATIONS
|
||||
-- ============================================================================
|
||||
-- Individual store locations discovered from Dutchie.
|
||||
-- These are NOT promoted to canonical dispensaries until verified.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS dutchie_discovery_locations (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
|
||||
-- Platform identification
|
||||
platform TEXT NOT NULL DEFAULT 'dutchie',
|
||||
platform_location_id TEXT NOT NULL, -- Dutchie's internal Location ID
|
||||
platform_slug TEXT NOT NULL, -- URL slug for the store
|
||||
platform_menu_url TEXT NOT NULL, -- Full menu URL
|
||||
|
||||
-- Store name
|
||||
name TEXT NOT NULL,
|
||||
|
||||
-- Address components
|
||||
raw_address TEXT,
|
||||
address_line1 TEXT,
|
||||
address_line2 TEXT,
|
||||
city TEXT,
|
||||
state_code TEXT, -- 'AZ', 'CA', 'ON', etc.
|
||||
postal_code TEXT,
|
||||
country_code TEXT, -- 'US' or 'CA'
|
||||
|
||||
-- Coordinates
|
||||
latitude DOUBLE PRECISION,
|
||||
longitude DOUBLE PRECISION,
|
||||
timezone TEXT,
|
||||
|
||||
-- Discovery status
|
||||
status TEXT NOT NULL DEFAULT 'discovered',
|
||||
-- discovered: Just found, not yet verified
|
||||
-- verified: Verified and promoted to canonical dispensaries
|
||||
-- rejected: Manually rejected (e.g., duplicate, test store)
|
||||
-- merged: Linked to existing canonical dispensary
|
||||
|
||||
-- Link to canonical dispensaries (only after verification)
|
||||
dispensary_id INTEGER,
|
||||
|
||||
-- Reference to discovery city
|
||||
discovery_city_id BIGINT,
|
||||
|
||||
-- Raw data from Dutchie
|
||||
metadata JSONB,
|
||||
notes TEXT,
|
||||
|
||||
-- Store capabilities (from Dutchie)
|
||||
offers_delivery BOOLEAN,
|
||||
offers_pickup BOOLEAN,
|
||||
is_recreational BOOLEAN,
|
||||
is_medical BOOLEAN,
|
||||
|
||||
-- Tracking
|
||||
first_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
last_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
last_checked_at TIMESTAMPTZ,
|
||||
verified_at TIMESTAMPTZ,
|
||||
verified_by TEXT, -- User who verified
|
||||
|
||||
active BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Add unique constraints if not exist
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'dutchie_discovery_locations_platform_id_unique'
|
||||
) THEN
|
||||
ALTER TABLE dutchie_discovery_locations
|
||||
ADD CONSTRAINT dutchie_discovery_locations_platform_id_unique
|
||||
UNIQUE (platform, platform_location_id);
|
||||
END IF;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN NULL;
|
||||
WHEN OTHERS THEN NULL;
|
||||
END $$;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'dutchie_discovery_locations_slug_unique'
|
||||
) THEN
|
||||
ALTER TABLE dutchie_discovery_locations
|
||||
ADD CONSTRAINT dutchie_discovery_locations_slug_unique
|
||||
UNIQUE (platform, platform_slug, country_code, state_code, city);
|
||||
END IF;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN NULL;
|
||||
WHEN OTHERS THEN NULL;
|
||||
END $$;
|
||||
|
||||
-- Add FK to dispensaries if not exists (allows NULL)
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'dutchie_discovery_locations_dispensary_fk'
|
||||
) THEN
|
||||
ALTER TABLE dutchie_discovery_locations
|
||||
ADD CONSTRAINT dutchie_discovery_locations_dispensary_fk
|
||||
FOREIGN KEY (dispensary_id) REFERENCES dispensaries(id) ON DELETE SET NULL;
|
||||
END IF;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN NULL;
|
||||
WHEN OTHERS THEN NULL;
|
||||
END $$;
|
||||
|
||||
-- Add FK to discovery cities if not exists
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conname = 'dutchie_discovery_locations_city_fk'
|
||||
) THEN
|
||||
ALTER TABLE dutchie_discovery_locations
|
||||
ADD CONSTRAINT dutchie_discovery_locations_city_fk
|
||||
FOREIGN KEY (discovery_city_id) REFERENCES dutchie_discovery_cities(id) ON DELETE SET NULL;
|
||||
END IF;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN NULL;
|
||||
WHEN OTHERS THEN NULL;
|
||||
END $$;
|
||||
|
||||
-- Indexes
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_locations_platform
|
||||
ON dutchie_discovery_locations(platform);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_locations_status
|
||||
ON dutchie_discovery_locations(status);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_locations_state
|
||||
ON dutchie_discovery_locations(country_code, state_code);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_locations_city
|
||||
ON dutchie_discovery_locations(city, state_code);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_locations_dispensary
|
||||
ON dutchie_discovery_locations(dispensary_id)
|
||||
WHERE dispensary_id IS NOT NULL;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_locations_discovered
|
||||
ON dutchie_discovery_locations(status, first_seen_at DESC)
|
||||
WHERE status = 'discovered';
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_locations_active
|
||||
ON dutchie_discovery_locations(active)
|
||||
WHERE active = TRUE;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_locations_coords
|
||||
ON dutchie_discovery_locations(latitude, longitude)
|
||||
WHERE latitude IS NOT NULL AND longitude IS NOT NULL;
|
||||
|
||||
COMMENT ON TABLE dutchie_discovery_locations IS 'Discovered store locations from Dutchie. Held in staging until verified.';
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 3: ADD CANADIAN PROVINCES TO STATES TABLE
|
||||
-- ============================================================================
|
||||
-- Support for Canadian provinces (Ontario, BC, Alberta, etc.)
|
||||
|
||||
INSERT INTO states (code, name, timezone, is_active, crawl_enabled) VALUES
|
||||
('AB', 'Alberta', 'America/Edmonton', TRUE, TRUE),
|
||||
('BC', 'British Columbia', 'America/Vancouver', TRUE, TRUE),
|
||||
('MB', 'Manitoba', 'America/Winnipeg', TRUE, TRUE),
|
||||
('NB', 'New Brunswick', 'America/Moncton', TRUE, TRUE),
|
||||
('NL', 'Newfoundland and Labrador', 'America/St_Johns', TRUE, TRUE),
|
||||
('NS', 'Nova Scotia', 'America/Halifax', TRUE, TRUE),
|
||||
('NT', 'Northwest Territories', 'America/Yellowknife', TRUE, TRUE),
|
||||
('NU', 'Nunavut', 'America/Iqaluit', TRUE, TRUE),
|
||||
('ON', 'Ontario', 'America/Toronto', TRUE, TRUE),
|
||||
('PE', 'Prince Edward Island', 'America/Halifax', TRUE, TRUE),
|
||||
('QC', 'Quebec', 'America/Montreal', TRUE, TRUE),
|
||||
('SK', 'Saskatchewan', 'America/Regina', TRUE, TRUE),
|
||||
('YT', 'Yukon', 'America/Whitehorse', TRUE, TRUE)
|
||||
ON CONFLICT (code) DO UPDATE SET
|
||||
name = EXCLUDED.name,
|
||||
timezone = COALESCE(states.timezone, EXCLUDED.timezone),
|
||||
updated_at = NOW();
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- SECTION 4: VIEWS FOR DISCOVERY MONITORING
|
||||
-- ============================================================================
|
||||
|
||||
-- View: Discovery status summary
|
||||
CREATE OR REPLACE VIEW v_discovery_status AS
|
||||
SELECT
|
||||
platform,
|
||||
country_code,
|
||||
state_code,
|
||||
status,
|
||||
COUNT(*) AS location_count,
|
||||
COUNT(*) FILTER (WHERE dispensary_id IS NOT NULL) AS linked_count,
|
||||
MIN(first_seen_at) AS earliest_discovery,
|
||||
MAX(last_seen_at) AS latest_activity
|
||||
FROM dutchie_discovery_locations
|
||||
GROUP BY platform, country_code, state_code, status
|
||||
ORDER BY country_code, state_code, status;
|
||||
|
||||
-- View: Unverified discoveries awaiting action
|
||||
CREATE OR REPLACE VIEW v_discovery_pending AS
|
||||
SELECT
|
||||
dl.id,
|
||||
dl.platform,
|
||||
dl.name,
|
||||
dl.city,
|
||||
dl.state_code,
|
||||
dl.country_code,
|
||||
dl.platform_menu_url,
|
||||
dl.first_seen_at,
|
||||
dl.last_seen_at,
|
||||
dl.offers_delivery,
|
||||
dl.offers_pickup,
|
||||
dl.is_recreational,
|
||||
dl.is_medical,
|
||||
dc.city_name AS discovery_city_name
|
||||
FROM dutchie_discovery_locations dl
|
||||
LEFT JOIN dutchie_discovery_cities dc ON dc.id = dl.discovery_city_id
|
||||
WHERE dl.status = 'discovered'
|
||||
AND dl.active = TRUE
|
||||
ORDER BY dl.state_code, dl.city, dl.name;
|
||||
|
||||
-- View: City crawl status
|
||||
CREATE OR REPLACE VIEW v_discovery_cities_status AS
|
||||
SELECT
|
||||
dc.id,
|
||||
dc.platform,
|
||||
dc.city_name,
|
||||
dc.state_code,
|
||||
dc.country_code,
|
||||
dc.crawl_enabled,
|
||||
dc.last_crawled_at,
|
||||
dc.location_count,
|
||||
COUNT(dl.id) AS actual_locations,
|
||||
COUNT(dl.id) FILTER (WHERE dl.status = 'discovered') AS pending_count,
|
||||
COUNT(dl.id) FILTER (WHERE dl.status = 'verified') AS verified_count,
|
||||
COUNT(dl.id) FILTER (WHERE dl.status = 'rejected') AS rejected_count
|
||||
FROM dutchie_discovery_cities dc
|
||||
LEFT JOIN dutchie_discovery_locations dl ON dl.discovery_city_id = dc.id
|
||||
GROUP BY dc.id, dc.platform, dc.city_name, dc.state_code, dc.country_code,
|
||||
dc.crawl_enabled, dc.last_crawled_at, dc.location_count
|
||||
ORDER BY dc.country_code, dc.state_code, dc.city_name;
|
||||
|
||||
|
||||
-- ============================================================================
|
||||
-- DONE
|
||||
-- ============================================================================
|
||||
|
||||
SELECT 'Migration 053 completed successfully. Discovery schema created.' AS status;
|
||||
49
backend/migrations/054_worker_metadata.sql
Normal file
49
backend/migrations/054_worker_metadata.sql
Normal file
@@ -0,0 +1,49 @@
|
||||
-- Migration 054: Worker Metadata for Named Workforce
|
||||
-- Adds worker_name and worker_role to job tables for displaying friendly worker identities
|
||||
|
||||
-- Add worker metadata columns to job_schedules
|
||||
ALTER TABLE job_schedules
|
||||
ADD COLUMN IF NOT EXISTS worker_name VARCHAR(50),
|
||||
ADD COLUMN IF NOT EXISTS worker_role VARCHAR(100);
|
||||
|
||||
COMMENT ON COLUMN job_schedules.worker_name IS 'Friendly name for the worker (e.g., Alice, Henry, Bella, Oscar)';
|
||||
COMMENT ON COLUMN job_schedules.worker_role IS 'Description of worker role (e.g., Store Discovery Worker, GraphQL Product Sync)';
|
||||
|
||||
-- Add worker metadata columns to job_run_logs
|
||||
ALTER TABLE job_run_logs
|
||||
ADD COLUMN IF NOT EXISTS worker_name VARCHAR(50),
|
||||
ADD COLUMN IF NOT EXISTS run_role VARCHAR(100);
|
||||
|
||||
COMMENT ON COLUMN job_run_logs.worker_name IS 'Name of the worker that executed this run (copied from schedule)';
|
||||
COMMENT ON COLUMN job_run_logs.run_role IS 'Role description for this specific run';
|
||||
|
||||
-- Add worker_name to dispensary_crawl_jobs (for tracking which named worker enqueued it)
|
||||
ALTER TABLE dispensary_crawl_jobs
|
||||
ADD COLUMN IF NOT EXISTS enqueued_by_worker VARCHAR(50);
|
||||
|
||||
COMMENT ON COLUMN dispensary_crawl_jobs.enqueued_by_worker IS 'Name of the worker that enqueued this job';
|
||||
|
||||
-- Update existing schedules with worker names
|
||||
UPDATE job_schedules SET
|
||||
worker_name = 'Bella',
|
||||
worker_role = 'GraphQL Product Sync'
|
||||
WHERE job_name = 'dutchie_az_product_crawl' AND worker_name IS NULL;
|
||||
|
||||
UPDATE job_schedules SET
|
||||
worker_name = 'Henry',
|
||||
worker_role = 'Entry Point Finder'
|
||||
WHERE job_name = 'dutchie_az_menu_detection' AND worker_name IS NULL;
|
||||
|
||||
UPDATE job_schedules SET
|
||||
worker_name = 'Alice',
|
||||
worker_role = 'Store Discovery'
|
||||
WHERE job_name = 'dutchie_store_discovery' AND worker_name IS NULL;
|
||||
|
||||
UPDATE job_schedules SET
|
||||
worker_name = 'Oscar',
|
||||
worker_role = 'Analytics Refresh'
|
||||
WHERE job_name = 'analytics_refresh' AND worker_name IS NULL;
|
||||
|
||||
-- Create index for worker name lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_job_run_logs_worker_name ON job_run_logs(worker_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_enqueued_by ON dispensary_crawl_jobs(enqueued_by_worker);
|
||||
123
backend/migrations/055_workforce_enhancements.sql
Normal file
123
backend/migrations/055_workforce_enhancements.sql
Normal file
@@ -0,0 +1,123 @@
|
||||
-- Migration 055: Workforce System Enhancements
|
||||
-- Adds visibility tracking, slug change tracking, and scope support for workers
|
||||
|
||||
-- ============================================================
|
||||
-- 1. VISIBILITY TRACKING FOR BELLA (Product Sync)
|
||||
-- ============================================================
|
||||
|
||||
-- Add visibility tracking to dutchie_products
|
||||
ALTER TABLE dutchie_products
|
||||
ADD COLUMN IF NOT EXISTS visibility_lost BOOLEAN DEFAULT FALSE,
|
||||
ADD COLUMN IF NOT EXISTS visibility_lost_at TIMESTAMPTZ,
|
||||
ADD COLUMN IF NOT EXISTS visibility_restored_at TIMESTAMPTZ;
|
||||
|
||||
COMMENT ON COLUMN dutchie_products.visibility_lost IS 'True if product disappeared from GraphQL results';
|
||||
COMMENT ON COLUMN dutchie_products.visibility_lost_at IS 'When product was last marked as visibility lost';
|
||||
COMMENT ON COLUMN dutchie_products.visibility_restored_at IS 'When product reappeared after being lost';
|
||||
|
||||
-- Index for visibility queries
|
||||
CREATE INDEX IF NOT EXISTS idx_dutchie_products_visibility_lost
|
||||
ON dutchie_products(dispensary_id, visibility_lost)
|
||||
WHERE visibility_lost = TRUE;
|
||||
|
||||
-- ============================================================
|
||||
-- 2. SLUG CHANGE TRACKING FOR ALICE (Store Discovery)
|
||||
-- ============================================================
|
||||
|
||||
-- Add slug change and retirement tracking to discovery locations
|
||||
ALTER TABLE dutchie_discovery_locations
|
||||
ADD COLUMN IF NOT EXISTS slug_changed_at TIMESTAMPTZ,
|
||||
ADD COLUMN IF NOT EXISTS previous_slug VARCHAR(255),
|
||||
ADD COLUMN IF NOT EXISTS retired_at TIMESTAMPTZ,
|
||||
ADD COLUMN IF NOT EXISTS retirement_reason VARCHAR(100);
|
||||
|
||||
COMMENT ON COLUMN dutchie_discovery_locations.slug_changed_at IS 'When the platform slug was last changed';
|
||||
COMMENT ON COLUMN dutchie_discovery_locations.previous_slug IS 'Previous slug before the last change';
|
||||
COMMENT ON COLUMN dutchie_discovery_locations.retired_at IS 'When store was marked as retired/removed';
|
||||
COMMENT ON COLUMN dutchie_discovery_locations.retirement_reason IS 'Reason for retirement (removed_from_source, closed, etc.)';
|
||||
|
||||
-- Index for finding retired stores
|
||||
CREATE INDEX IF NOT EXISTS idx_dutchie_discovery_locations_retired
|
||||
ON dutchie_discovery_locations(retired_at)
|
||||
WHERE retired_at IS NOT NULL;
|
||||
|
||||
-- ============================================================
|
||||
-- 3. ID RESOLUTION TRACKING FOR HENRY (Entry Point Finder)
|
||||
-- ============================================================
|
||||
|
||||
-- Add resolution tracking to dispensaries
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN IF NOT EXISTS last_id_resolution_at TIMESTAMPTZ,
|
||||
ADD COLUMN IF NOT EXISTS id_resolution_attempts INT DEFAULT 0,
|
||||
ADD COLUMN IF NOT EXISTS id_resolution_error TEXT;
|
||||
|
||||
COMMENT ON COLUMN dispensaries.last_id_resolution_at IS 'When platform_dispensary_id was last resolved/attempted';
|
||||
COMMENT ON COLUMN dispensaries.id_resolution_attempts IS 'Number of resolution attempts';
|
||||
COMMENT ON COLUMN dispensaries.id_resolution_error IS 'Last error message from resolution attempt';
|
||||
|
||||
-- Index for finding stores needing resolution
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_needs_resolution
|
||||
ON dispensaries(state, menu_type)
|
||||
WHERE platform_dispensary_id IS NULL AND menu_type = 'dutchie';
|
||||
|
||||
-- ============================================================
|
||||
-- 4. ENHANCED CITIES TABLE FOR ALICE
|
||||
-- ============================================================
|
||||
|
||||
-- Add tracking columns to cities table
|
||||
ALTER TABLE dutchie_discovery_cities
|
||||
ADD COLUMN IF NOT EXISTS state_name VARCHAR(100),
|
||||
ADD COLUMN IF NOT EXISTS discovered_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
ADD COLUMN IF NOT EXISTS last_verified_at TIMESTAMPTZ,
|
||||
ADD COLUMN IF NOT EXISTS store_count_reported INT,
|
||||
ADD COLUMN IF NOT EXISTS store_count_actual INT;
|
||||
|
||||
COMMENT ON COLUMN dutchie_discovery_cities.state_name IS 'Full state name from source';
|
||||
COMMENT ON COLUMN dutchie_discovery_cities.discovered_at IS 'When city was first discovered';
|
||||
COMMENT ON COLUMN dutchie_discovery_cities.last_verified_at IS 'When city was last verified to exist';
|
||||
COMMENT ON COLUMN dutchie_discovery_cities.store_count_reported IS 'Store count reported by source';
|
||||
COMMENT ON COLUMN dutchie_discovery_cities.store_count_actual IS 'Actual store count from discovery';
|
||||
|
||||
-- ============================================================
|
||||
-- 5. UPDATE WORKER ROLES (Standardize naming)
|
||||
-- ============================================================
|
||||
|
||||
-- Update existing workers to use standardized role names
|
||||
UPDATE job_schedules SET worker_role = 'store_discovery'
|
||||
WHERE worker_name = 'Alice' AND worker_role = 'Store Discovery';
|
||||
|
||||
UPDATE job_schedules SET worker_role = 'entry_point_finder'
|
||||
WHERE worker_name = 'Henry' AND worker_role = 'Entry Point Finder';
|
||||
|
||||
UPDATE job_schedules SET worker_role = 'product_sync'
|
||||
WHERE worker_name = 'Bella' AND worker_role = 'GraphQL Product Sync';
|
||||
|
||||
UPDATE job_schedules SET worker_role = 'analytics_refresh'
|
||||
WHERE worker_name = 'Oscar' AND worker_role = 'Analytics Refresh';
|
||||
|
||||
-- ============================================================
|
||||
-- 6. VISIBILITY EVENTS IN SNAPSHOTS (JSONB approach)
|
||||
-- ============================================================
|
||||
|
||||
-- Add visibility_events array to product snapshots metadata
|
||||
-- This will store: [{event_type, timestamp, worker_name}]
|
||||
-- No schema change needed - we use existing metadata JSONB column
|
||||
|
||||
-- ============================================================
|
||||
-- 7. INDEXES FOR WORKER QUERIES
|
||||
-- ============================================================
|
||||
|
||||
-- Index for finding recently added stores (for Henry)
|
||||
CREATE INDEX IF NOT EXISTS idx_dutchie_discovery_locations_created
|
||||
ON dutchie_discovery_locations(created_at DESC)
|
||||
WHERE active = TRUE;
|
||||
|
||||
-- Index for scope-based queries (by state)
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_state_menu
|
||||
ON dispensaries(state, menu_type)
|
||||
WHERE menu_type IS NOT NULL;
|
||||
|
||||
-- Record migration
|
||||
INSERT INTO schema_migrations (version, name, applied_at)
|
||||
VALUES (55, '055_workforce_enhancements', NOW())
|
||||
ON CONFLICT (version) DO NOTHING;
|
||||
110
backend/migrations/056_fix_worker_and_run_logs.sql
Normal file
110
backend/migrations/056_fix_worker_and_run_logs.sql
Normal file
@@ -0,0 +1,110 @@
|
||||
-- Migration 056: Fix Worker Metadata and Job Run Logs
|
||||
--
|
||||
-- This migration safely ensures all expected schema exists for:
|
||||
-- 1. job_schedules - worker_name, worker_role columns
|
||||
-- 2. job_run_logs - entire table creation if missing
|
||||
--
|
||||
-- Uses IF NOT EXISTS / ADD COLUMN IF NOT EXISTS for idempotency.
|
||||
-- Safe to run on databases that already have some or all of these changes.
|
||||
|
||||
-- ============================================================
|
||||
-- 1. ADD MISSING COLUMNS TO job_schedules
|
||||
-- ============================================================
|
||||
|
||||
ALTER TABLE job_schedules
|
||||
ADD COLUMN IF NOT EXISTS worker_name VARCHAR(50),
|
||||
ADD COLUMN IF NOT EXISTS worker_role VARCHAR(100);
|
||||
|
||||
COMMENT ON COLUMN job_schedules.worker_name IS 'Friendly name for the worker (e.g., Alice, Henry, Bella, Oscar)';
|
||||
COMMENT ON COLUMN job_schedules.worker_role IS 'Description of worker role (e.g., store_discovery, product_sync)';
|
||||
|
||||
-- ============================================================
|
||||
-- 2. CREATE job_run_logs TABLE IF NOT EXISTS
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS job_run_logs (
|
||||
id SERIAL PRIMARY KEY,
|
||||
schedule_id INTEGER NOT NULL REFERENCES job_schedules(id) ON DELETE CASCADE,
|
||||
job_name VARCHAR(100) NOT NULL,
|
||||
status VARCHAR(20) NOT NULL, -- 'pending', 'running', 'success', 'error', 'partial'
|
||||
started_at TIMESTAMPTZ,
|
||||
completed_at TIMESTAMPTZ,
|
||||
duration_ms INTEGER,
|
||||
error_message TEXT,
|
||||
|
||||
-- Results summary
|
||||
items_processed INTEGER DEFAULT 0,
|
||||
items_succeeded INTEGER DEFAULT 0,
|
||||
items_failed INTEGER DEFAULT 0,
|
||||
|
||||
-- Worker metadata (from scheduler.ts createRunLog function)
|
||||
worker_name VARCHAR(50),
|
||||
run_role VARCHAR(100),
|
||||
|
||||
-- Additional run details
|
||||
metadata JSONB,
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Create indexes if they don't exist
|
||||
CREATE INDEX IF NOT EXISTS idx_job_run_logs_schedule ON job_run_logs(schedule_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_job_run_logs_job_name ON job_run_logs(job_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_job_run_logs_status ON job_run_logs(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_job_run_logs_created ON job_run_logs(created_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_job_run_logs_worker_name ON job_run_logs(worker_name);
|
||||
|
||||
-- ============================================================
|
||||
-- 3. ADD enqueued_by_worker TO dispensary_crawl_jobs IF EXISTS
|
||||
-- ============================================================
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
-- Only add column if dispensary_crawl_jobs table exists
|
||||
IF EXISTS (SELECT FROM information_schema.tables WHERE table_name = 'dispensary_crawl_jobs') THEN
|
||||
ALTER TABLE dispensary_crawl_jobs
|
||||
ADD COLUMN IF NOT EXISTS enqueued_by_worker VARCHAR(50);
|
||||
|
||||
COMMENT ON COLUMN dispensary_crawl_jobs.enqueued_by_worker IS 'Name of the worker that enqueued this job';
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_enqueued_by
|
||||
ON dispensary_crawl_jobs(enqueued_by_worker);
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- ============================================================
|
||||
-- 4. SEED DEFAULT WORKER NAMES FOR EXISTING SCHEDULES
|
||||
-- ============================================================
|
||||
|
||||
UPDATE job_schedules SET
|
||||
worker_name = 'Bella',
|
||||
worker_role = 'product_sync'
|
||||
WHERE job_name = 'dutchie_az_product_crawl' AND worker_name IS NULL;
|
||||
|
||||
UPDATE job_schedules SET
|
||||
worker_name = 'Henry',
|
||||
worker_role = 'entry_point_finder'
|
||||
WHERE job_name = 'dutchie_az_menu_detection' AND worker_name IS NULL;
|
||||
|
||||
UPDATE job_schedules SET
|
||||
worker_name = 'Alice',
|
||||
worker_role = 'store_discovery'
|
||||
WHERE job_name = 'dutchie_store_discovery' AND worker_name IS NULL;
|
||||
|
||||
UPDATE job_schedules SET
|
||||
worker_name = 'Oscar',
|
||||
worker_role = 'analytics_refresh'
|
||||
WHERE job_name = 'analytics_refresh' AND worker_name IS NULL;
|
||||
|
||||
-- ============================================================
|
||||
-- 5. RECORD MIGRATION (if schema_migrations table exists)
|
||||
-- ============================================================
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (SELECT FROM information_schema.tables WHERE table_name = 'schema_migrations') THEN
|
||||
INSERT INTO schema_migrations (version, name, applied_at)
|
||||
VALUES (56, '056_fix_worker_and_run_logs', NOW())
|
||||
ON CONFLICT (version) DO NOTHING;
|
||||
END IF;
|
||||
END $$;
|
||||
Reference in New Issue
Block a user