Files
cannaiq/backend/migrations/053_dutchie_discovery_schema.sql
Kelly b4a2fb7d03 feat: Add v2 architecture with multi-state support and orchestrator services
Major additions:
- Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare
- Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator
- Discovery system: dutchie discovery service, geo validation, city seeding scripts
- Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages
- Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram)
- Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata

Frontend pages added:
- Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores
- StateHeatmap, CrossStateCompare, SyncInfoPanel

Components added:
- StateSelector, OrchestratorTraceModal, WorkflowStepper

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-07 11:30:57 -07:00

347 lines
12 KiB
SQL

-- ============================================================================
-- Migration 053: Dutchie Discovery Schema
-- ============================================================================
--
-- Purpose: Create tables for Dutchie store discovery workflow.
-- Stores are discovered and held in staging tables until verified,
-- then promoted to the canonical dispensaries table.
--
-- Tables Created:
-- - dutchie_discovery_cities: City pages from Dutchie
-- - dutchie_discovery_locations: Individual store locations
--
-- SAFETY RULES:
-- - ALL tables use CREATE TABLE IF NOT EXISTS
-- - NO DROP, DELETE, TRUNCATE, or destructive operations
-- - Does NOT touch canonical dispensaries table
-- - Fully idempotent - safe to run multiple times
--
-- Run with:
-- psql "postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \
-- -f migrations/053_dutchie_discovery_schema.sql
--
-- ============================================================================
-- ============================================================================
-- SECTION 1: DUTCHIE_DISCOVERY_CITIES
-- ============================================================================
-- Stores Dutchie city pages for systematic crawling.
-- Each city can contain multiple dispensary locations.
CREATE TABLE IF NOT EXISTS dutchie_discovery_cities (
id BIGSERIAL PRIMARY KEY,
-- Platform identification (future-proof for other platforms)
platform TEXT NOT NULL DEFAULT 'dutchie',
-- City identification
city_name TEXT NOT NULL,
city_slug TEXT NOT NULL,
state_code TEXT, -- 'AZ', 'CA', 'ON', etc.
country_code TEXT NOT NULL DEFAULT 'US',
-- Crawl management
last_crawled_at TIMESTAMPTZ,
crawl_enabled BOOLEAN NOT NULL DEFAULT TRUE,
location_count INTEGER, -- Number of locations found in this city
-- Metadata
notes TEXT,
metadata JSONB,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Add unique constraint if not exists
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_constraint
WHERE conname = 'dutchie_discovery_cities_unique'
) THEN
ALTER TABLE dutchie_discovery_cities
ADD CONSTRAINT dutchie_discovery_cities_unique
UNIQUE (platform, country_code, state_code, city_slug);
END IF;
EXCEPTION
WHEN duplicate_object THEN NULL;
WHEN OTHERS THEN NULL;
END $$;
-- Indexes
CREATE INDEX IF NOT EXISTS idx_discovery_cities_platform
ON dutchie_discovery_cities(platform);
CREATE INDEX IF NOT EXISTS idx_discovery_cities_state
ON dutchie_discovery_cities(country_code, state_code);
CREATE INDEX IF NOT EXISTS idx_discovery_cities_crawl_enabled
ON dutchie_discovery_cities(crawl_enabled)
WHERE crawl_enabled = TRUE;
CREATE INDEX IF NOT EXISTS idx_discovery_cities_last_crawled
ON dutchie_discovery_cities(last_crawled_at);
COMMENT ON TABLE dutchie_discovery_cities IS 'City pages from Dutchie for systematic store discovery.';
-- ============================================================================
-- SECTION 2: DUTCHIE_DISCOVERY_LOCATIONS
-- ============================================================================
-- Individual store locations discovered from Dutchie.
-- These are NOT promoted to canonical dispensaries until verified.
CREATE TABLE IF NOT EXISTS dutchie_discovery_locations (
id BIGSERIAL PRIMARY KEY,
-- Platform identification
platform TEXT NOT NULL DEFAULT 'dutchie',
platform_location_id TEXT NOT NULL, -- Dutchie's internal Location ID
platform_slug TEXT NOT NULL, -- URL slug for the store
platform_menu_url TEXT NOT NULL, -- Full menu URL
-- Store name
name TEXT NOT NULL,
-- Address components
raw_address TEXT,
address_line1 TEXT,
address_line2 TEXT,
city TEXT,
state_code TEXT, -- 'AZ', 'CA', 'ON', etc.
postal_code TEXT,
country_code TEXT, -- 'US' or 'CA'
-- Coordinates
latitude DOUBLE PRECISION,
longitude DOUBLE PRECISION,
timezone TEXT,
-- Discovery status
status TEXT NOT NULL DEFAULT 'discovered',
-- discovered: Just found, not yet verified
-- verified: Verified and promoted to canonical dispensaries
-- rejected: Manually rejected (e.g., duplicate, test store)
-- merged: Linked to existing canonical dispensary
-- Link to canonical dispensaries (only after verification)
dispensary_id INTEGER,
-- Reference to discovery city
discovery_city_id BIGINT,
-- Raw data from Dutchie
metadata JSONB,
notes TEXT,
-- Store capabilities (from Dutchie)
offers_delivery BOOLEAN,
offers_pickup BOOLEAN,
is_recreational BOOLEAN,
is_medical BOOLEAN,
-- Tracking
first_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
last_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
last_checked_at TIMESTAMPTZ,
verified_at TIMESTAMPTZ,
verified_by TEXT, -- User who verified
active BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Add unique constraints if not exist
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_constraint
WHERE conname = 'dutchie_discovery_locations_platform_id_unique'
) THEN
ALTER TABLE dutchie_discovery_locations
ADD CONSTRAINT dutchie_discovery_locations_platform_id_unique
UNIQUE (platform, platform_location_id);
END IF;
EXCEPTION
WHEN duplicate_object THEN NULL;
WHEN OTHERS THEN NULL;
END $$;
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_constraint
WHERE conname = 'dutchie_discovery_locations_slug_unique'
) THEN
ALTER TABLE dutchie_discovery_locations
ADD CONSTRAINT dutchie_discovery_locations_slug_unique
UNIQUE (platform, platform_slug, country_code, state_code, city);
END IF;
EXCEPTION
WHEN duplicate_object THEN NULL;
WHEN OTHERS THEN NULL;
END $$;
-- Add FK to dispensaries if not exists (allows NULL)
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_constraint
WHERE conname = 'dutchie_discovery_locations_dispensary_fk'
) THEN
ALTER TABLE dutchie_discovery_locations
ADD CONSTRAINT dutchie_discovery_locations_dispensary_fk
FOREIGN KEY (dispensary_id) REFERENCES dispensaries(id) ON DELETE SET NULL;
END IF;
EXCEPTION
WHEN duplicate_object THEN NULL;
WHEN OTHERS THEN NULL;
END $$;
-- Add FK to discovery cities if not exists
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_constraint
WHERE conname = 'dutchie_discovery_locations_city_fk'
) THEN
ALTER TABLE dutchie_discovery_locations
ADD CONSTRAINT dutchie_discovery_locations_city_fk
FOREIGN KEY (discovery_city_id) REFERENCES dutchie_discovery_cities(id) ON DELETE SET NULL;
END IF;
EXCEPTION
WHEN duplicate_object THEN NULL;
WHEN OTHERS THEN NULL;
END $$;
-- Indexes
CREATE INDEX IF NOT EXISTS idx_discovery_locations_platform
ON dutchie_discovery_locations(platform);
CREATE INDEX IF NOT EXISTS idx_discovery_locations_status
ON dutchie_discovery_locations(status);
CREATE INDEX IF NOT EXISTS idx_discovery_locations_state
ON dutchie_discovery_locations(country_code, state_code);
CREATE INDEX IF NOT EXISTS idx_discovery_locations_city
ON dutchie_discovery_locations(city, state_code);
CREATE INDEX IF NOT EXISTS idx_discovery_locations_dispensary
ON dutchie_discovery_locations(dispensary_id)
WHERE dispensary_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_discovery_locations_discovered
ON dutchie_discovery_locations(status, first_seen_at DESC)
WHERE status = 'discovered';
CREATE INDEX IF NOT EXISTS idx_discovery_locations_active
ON dutchie_discovery_locations(active)
WHERE active = TRUE;
CREATE INDEX IF NOT EXISTS idx_discovery_locations_coords
ON dutchie_discovery_locations(latitude, longitude)
WHERE latitude IS NOT NULL AND longitude IS NOT NULL;
COMMENT ON TABLE dutchie_discovery_locations IS 'Discovered store locations from Dutchie. Held in staging until verified.';
-- ============================================================================
-- SECTION 3: ADD CANADIAN PROVINCES TO STATES TABLE
-- ============================================================================
-- Support for Canadian provinces (Ontario, BC, Alberta, etc.)
INSERT INTO states (code, name, timezone, is_active, crawl_enabled) VALUES
('AB', 'Alberta', 'America/Edmonton', TRUE, TRUE),
('BC', 'British Columbia', 'America/Vancouver', TRUE, TRUE),
('MB', 'Manitoba', 'America/Winnipeg', TRUE, TRUE),
('NB', 'New Brunswick', 'America/Moncton', TRUE, TRUE),
('NL', 'Newfoundland and Labrador', 'America/St_Johns', TRUE, TRUE),
('NS', 'Nova Scotia', 'America/Halifax', TRUE, TRUE),
('NT', 'Northwest Territories', 'America/Yellowknife', TRUE, TRUE),
('NU', 'Nunavut', 'America/Iqaluit', TRUE, TRUE),
('ON', 'Ontario', 'America/Toronto', TRUE, TRUE),
('PE', 'Prince Edward Island', 'America/Halifax', TRUE, TRUE),
('QC', 'Quebec', 'America/Montreal', TRUE, TRUE),
('SK', 'Saskatchewan', 'America/Regina', TRUE, TRUE),
('YT', 'Yukon', 'America/Whitehorse', TRUE, TRUE)
ON CONFLICT (code) DO UPDATE SET
name = EXCLUDED.name,
timezone = COALESCE(states.timezone, EXCLUDED.timezone),
updated_at = NOW();
-- ============================================================================
-- SECTION 4: VIEWS FOR DISCOVERY MONITORING
-- ============================================================================
-- View: Discovery status summary
CREATE OR REPLACE VIEW v_discovery_status AS
SELECT
platform,
country_code,
state_code,
status,
COUNT(*) AS location_count,
COUNT(*) FILTER (WHERE dispensary_id IS NOT NULL) AS linked_count,
MIN(first_seen_at) AS earliest_discovery,
MAX(last_seen_at) AS latest_activity
FROM dutchie_discovery_locations
GROUP BY platform, country_code, state_code, status
ORDER BY country_code, state_code, status;
-- View: Unverified discoveries awaiting action
CREATE OR REPLACE VIEW v_discovery_pending AS
SELECT
dl.id,
dl.platform,
dl.name,
dl.city,
dl.state_code,
dl.country_code,
dl.platform_menu_url,
dl.first_seen_at,
dl.last_seen_at,
dl.offers_delivery,
dl.offers_pickup,
dl.is_recreational,
dl.is_medical,
dc.city_name AS discovery_city_name
FROM dutchie_discovery_locations dl
LEFT JOIN dutchie_discovery_cities dc ON dc.id = dl.discovery_city_id
WHERE dl.status = 'discovered'
AND dl.active = TRUE
ORDER BY dl.state_code, dl.city, dl.name;
-- View: City crawl status
CREATE OR REPLACE VIEW v_discovery_cities_status AS
SELECT
dc.id,
dc.platform,
dc.city_name,
dc.state_code,
dc.country_code,
dc.crawl_enabled,
dc.last_crawled_at,
dc.location_count,
COUNT(dl.id) AS actual_locations,
COUNT(dl.id) FILTER (WHERE dl.status = 'discovered') AS pending_count,
COUNT(dl.id) FILTER (WHERE dl.status = 'verified') AS verified_count,
COUNT(dl.id) FILTER (WHERE dl.status = 'rejected') AS rejected_count
FROM dutchie_discovery_cities dc
LEFT JOIN dutchie_discovery_locations dl ON dl.discovery_city_id = dc.id
GROUP BY dc.id, dc.platform, dc.city_name, dc.state_code, dc.country_code,
dc.crawl_enabled, dc.last_crawled_at, dc.location_count
ORDER BY dc.country_code, dc.state_code, dc.city_name;
-- ============================================================================
-- DONE
-- ============================================================================
SELECT 'Migration 053 completed successfully. Discovery schema created.' AS status;