Major additions: - Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare - Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator - Discovery system: dutchie discovery service, geo validation, city seeding scripts - Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages - Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram) - Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata Frontend pages added: - Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores - StateHeatmap, CrossStateCompare, SyncInfoPanel Components added: - StateSelector, OrchestratorTraceModal, WorkflowStepper 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
347 lines
12 KiB
SQL
347 lines
12 KiB
SQL
-- ============================================================================
|
|
-- Migration 053: Dutchie Discovery Schema
|
|
-- ============================================================================
|
|
--
|
|
-- Purpose: Create tables for Dutchie store discovery workflow.
|
|
-- Stores are discovered and held in staging tables until verified,
|
|
-- then promoted to the canonical dispensaries table.
|
|
--
|
|
-- Tables Created:
|
|
-- - dutchie_discovery_cities: City pages from Dutchie
|
|
-- - dutchie_discovery_locations: Individual store locations
|
|
--
|
|
-- SAFETY RULES:
|
|
-- - ALL tables use CREATE TABLE IF NOT EXISTS
|
|
-- - NO DROP, DELETE, TRUNCATE, or destructive operations
|
|
-- - Does NOT touch canonical dispensaries table
|
|
-- - Fully idempotent - safe to run multiple times
|
|
--
|
|
-- Run with:
|
|
-- psql "postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \
|
|
-- -f migrations/053_dutchie_discovery_schema.sql
|
|
--
|
|
-- ============================================================================
|
|
|
|
|
|
-- ============================================================================
|
|
-- SECTION 1: DUTCHIE_DISCOVERY_CITIES
|
|
-- ============================================================================
|
|
-- Stores Dutchie city pages for systematic crawling.
|
|
-- Each city can contain multiple dispensary locations.
|
|
|
|
CREATE TABLE IF NOT EXISTS dutchie_discovery_cities (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
|
|
-- Platform identification (future-proof for other platforms)
|
|
platform TEXT NOT NULL DEFAULT 'dutchie',
|
|
|
|
-- City identification
|
|
city_name TEXT NOT NULL,
|
|
city_slug TEXT NOT NULL,
|
|
state_code TEXT, -- 'AZ', 'CA', 'ON', etc.
|
|
country_code TEXT NOT NULL DEFAULT 'US',
|
|
|
|
-- Crawl management
|
|
last_crawled_at TIMESTAMPTZ,
|
|
crawl_enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
|
location_count INTEGER, -- Number of locations found in this city
|
|
|
|
-- Metadata
|
|
notes TEXT,
|
|
metadata JSONB,
|
|
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
|
|
-- Add unique constraint if not exists
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_constraint
|
|
WHERE conname = 'dutchie_discovery_cities_unique'
|
|
) THEN
|
|
ALTER TABLE dutchie_discovery_cities
|
|
ADD CONSTRAINT dutchie_discovery_cities_unique
|
|
UNIQUE (platform, country_code, state_code, city_slug);
|
|
END IF;
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN NULL;
|
|
WHEN OTHERS THEN NULL;
|
|
END $$;
|
|
|
|
-- Indexes
|
|
CREATE INDEX IF NOT EXISTS idx_discovery_cities_platform
|
|
ON dutchie_discovery_cities(platform);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_discovery_cities_state
|
|
ON dutchie_discovery_cities(country_code, state_code);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_discovery_cities_crawl_enabled
|
|
ON dutchie_discovery_cities(crawl_enabled)
|
|
WHERE crawl_enabled = TRUE;
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_discovery_cities_last_crawled
|
|
ON dutchie_discovery_cities(last_crawled_at);
|
|
|
|
COMMENT ON TABLE dutchie_discovery_cities IS 'City pages from Dutchie for systematic store discovery.';
|
|
|
|
|
|
-- ============================================================================
|
|
-- SECTION 2: DUTCHIE_DISCOVERY_LOCATIONS
|
|
-- ============================================================================
|
|
-- Individual store locations discovered from Dutchie.
|
|
-- These are NOT promoted to canonical dispensaries until verified.
|
|
|
|
CREATE TABLE IF NOT EXISTS dutchie_discovery_locations (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
|
|
-- Platform identification
|
|
platform TEXT NOT NULL DEFAULT 'dutchie',
|
|
platform_location_id TEXT NOT NULL, -- Dutchie's internal Location ID
|
|
platform_slug TEXT NOT NULL, -- URL slug for the store
|
|
platform_menu_url TEXT NOT NULL, -- Full menu URL
|
|
|
|
-- Store name
|
|
name TEXT NOT NULL,
|
|
|
|
-- Address components
|
|
raw_address TEXT,
|
|
address_line1 TEXT,
|
|
address_line2 TEXT,
|
|
city TEXT,
|
|
state_code TEXT, -- 'AZ', 'CA', 'ON', etc.
|
|
postal_code TEXT,
|
|
country_code TEXT, -- 'US' or 'CA'
|
|
|
|
-- Coordinates
|
|
latitude DOUBLE PRECISION,
|
|
longitude DOUBLE PRECISION,
|
|
timezone TEXT,
|
|
|
|
-- Discovery status
|
|
status TEXT NOT NULL DEFAULT 'discovered',
|
|
-- discovered: Just found, not yet verified
|
|
-- verified: Verified and promoted to canonical dispensaries
|
|
-- rejected: Manually rejected (e.g., duplicate, test store)
|
|
-- merged: Linked to existing canonical dispensary
|
|
|
|
-- Link to canonical dispensaries (only after verification)
|
|
dispensary_id INTEGER,
|
|
|
|
-- Reference to discovery city
|
|
discovery_city_id BIGINT,
|
|
|
|
-- Raw data from Dutchie
|
|
metadata JSONB,
|
|
notes TEXT,
|
|
|
|
-- Store capabilities (from Dutchie)
|
|
offers_delivery BOOLEAN,
|
|
offers_pickup BOOLEAN,
|
|
is_recreational BOOLEAN,
|
|
is_medical BOOLEAN,
|
|
|
|
-- Tracking
|
|
first_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
last_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
last_checked_at TIMESTAMPTZ,
|
|
verified_at TIMESTAMPTZ,
|
|
verified_by TEXT, -- User who verified
|
|
|
|
active BOOLEAN NOT NULL DEFAULT TRUE,
|
|
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
|
|
-- Add unique constraints if not exist
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_constraint
|
|
WHERE conname = 'dutchie_discovery_locations_platform_id_unique'
|
|
) THEN
|
|
ALTER TABLE dutchie_discovery_locations
|
|
ADD CONSTRAINT dutchie_discovery_locations_platform_id_unique
|
|
UNIQUE (platform, platform_location_id);
|
|
END IF;
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN NULL;
|
|
WHEN OTHERS THEN NULL;
|
|
END $$;
|
|
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_constraint
|
|
WHERE conname = 'dutchie_discovery_locations_slug_unique'
|
|
) THEN
|
|
ALTER TABLE dutchie_discovery_locations
|
|
ADD CONSTRAINT dutchie_discovery_locations_slug_unique
|
|
UNIQUE (platform, platform_slug, country_code, state_code, city);
|
|
END IF;
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN NULL;
|
|
WHEN OTHERS THEN NULL;
|
|
END $$;
|
|
|
|
-- Add FK to dispensaries if not exists (allows NULL)
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_constraint
|
|
WHERE conname = 'dutchie_discovery_locations_dispensary_fk'
|
|
) THEN
|
|
ALTER TABLE dutchie_discovery_locations
|
|
ADD CONSTRAINT dutchie_discovery_locations_dispensary_fk
|
|
FOREIGN KEY (dispensary_id) REFERENCES dispensaries(id) ON DELETE SET NULL;
|
|
END IF;
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN NULL;
|
|
WHEN OTHERS THEN NULL;
|
|
END $$;
|
|
|
|
-- Add FK to discovery cities if not exists
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (
|
|
SELECT 1 FROM pg_constraint
|
|
WHERE conname = 'dutchie_discovery_locations_city_fk'
|
|
) THEN
|
|
ALTER TABLE dutchie_discovery_locations
|
|
ADD CONSTRAINT dutchie_discovery_locations_city_fk
|
|
FOREIGN KEY (discovery_city_id) REFERENCES dutchie_discovery_cities(id) ON DELETE SET NULL;
|
|
END IF;
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN NULL;
|
|
WHEN OTHERS THEN NULL;
|
|
END $$;
|
|
|
|
-- Indexes
|
|
CREATE INDEX IF NOT EXISTS idx_discovery_locations_platform
|
|
ON dutchie_discovery_locations(platform);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_discovery_locations_status
|
|
ON dutchie_discovery_locations(status);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_discovery_locations_state
|
|
ON dutchie_discovery_locations(country_code, state_code);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_discovery_locations_city
|
|
ON dutchie_discovery_locations(city, state_code);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_discovery_locations_dispensary
|
|
ON dutchie_discovery_locations(dispensary_id)
|
|
WHERE dispensary_id IS NOT NULL;
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_discovery_locations_discovered
|
|
ON dutchie_discovery_locations(status, first_seen_at DESC)
|
|
WHERE status = 'discovered';
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_discovery_locations_active
|
|
ON dutchie_discovery_locations(active)
|
|
WHERE active = TRUE;
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_discovery_locations_coords
|
|
ON dutchie_discovery_locations(latitude, longitude)
|
|
WHERE latitude IS NOT NULL AND longitude IS NOT NULL;
|
|
|
|
COMMENT ON TABLE dutchie_discovery_locations IS 'Discovered store locations from Dutchie. Held in staging until verified.';
|
|
|
|
|
|
-- ============================================================================
|
|
-- SECTION 3: ADD CANADIAN PROVINCES TO STATES TABLE
|
|
-- ============================================================================
|
|
-- Support for Canadian provinces (Ontario, BC, Alberta, etc.)
|
|
|
|
INSERT INTO states (code, name, timezone, is_active, crawl_enabled) VALUES
|
|
('AB', 'Alberta', 'America/Edmonton', TRUE, TRUE),
|
|
('BC', 'British Columbia', 'America/Vancouver', TRUE, TRUE),
|
|
('MB', 'Manitoba', 'America/Winnipeg', TRUE, TRUE),
|
|
('NB', 'New Brunswick', 'America/Moncton', TRUE, TRUE),
|
|
('NL', 'Newfoundland and Labrador', 'America/St_Johns', TRUE, TRUE),
|
|
('NS', 'Nova Scotia', 'America/Halifax', TRUE, TRUE),
|
|
('NT', 'Northwest Territories', 'America/Yellowknife', TRUE, TRUE),
|
|
('NU', 'Nunavut', 'America/Iqaluit', TRUE, TRUE),
|
|
('ON', 'Ontario', 'America/Toronto', TRUE, TRUE),
|
|
('PE', 'Prince Edward Island', 'America/Halifax', TRUE, TRUE),
|
|
('QC', 'Quebec', 'America/Montreal', TRUE, TRUE),
|
|
('SK', 'Saskatchewan', 'America/Regina', TRUE, TRUE),
|
|
('YT', 'Yukon', 'America/Whitehorse', TRUE, TRUE)
|
|
ON CONFLICT (code) DO UPDATE SET
|
|
name = EXCLUDED.name,
|
|
timezone = COALESCE(states.timezone, EXCLUDED.timezone),
|
|
updated_at = NOW();
|
|
|
|
|
|
-- ============================================================================
|
|
-- SECTION 4: VIEWS FOR DISCOVERY MONITORING
|
|
-- ============================================================================
|
|
|
|
-- View: Discovery status summary
|
|
CREATE OR REPLACE VIEW v_discovery_status AS
|
|
SELECT
|
|
platform,
|
|
country_code,
|
|
state_code,
|
|
status,
|
|
COUNT(*) AS location_count,
|
|
COUNT(*) FILTER (WHERE dispensary_id IS NOT NULL) AS linked_count,
|
|
MIN(first_seen_at) AS earliest_discovery,
|
|
MAX(last_seen_at) AS latest_activity
|
|
FROM dutchie_discovery_locations
|
|
GROUP BY platform, country_code, state_code, status
|
|
ORDER BY country_code, state_code, status;
|
|
|
|
-- View: Unverified discoveries awaiting action
|
|
CREATE OR REPLACE VIEW v_discovery_pending AS
|
|
SELECT
|
|
dl.id,
|
|
dl.platform,
|
|
dl.name,
|
|
dl.city,
|
|
dl.state_code,
|
|
dl.country_code,
|
|
dl.platform_menu_url,
|
|
dl.first_seen_at,
|
|
dl.last_seen_at,
|
|
dl.offers_delivery,
|
|
dl.offers_pickup,
|
|
dl.is_recreational,
|
|
dl.is_medical,
|
|
dc.city_name AS discovery_city_name
|
|
FROM dutchie_discovery_locations dl
|
|
LEFT JOIN dutchie_discovery_cities dc ON dc.id = dl.discovery_city_id
|
|
WHERE dl.status = 'discovered'
|
|
AND dl.active = TRUE
|
|
ORDER BY dl.state_code, dl.city, dl.name;
|
|
|
|
-- View: City crawl status
|
|
CREATE OR REPLACE VIEW v_discovery_cities_status AS
|
|
SELECT
|
|
dc.id,
|
|
dc.platform,
|
|
dc.city_name,
|
|
dc.state_code,
|
|
dc.country_code,
|
|
dc.crawl_enabled,
|
|
dc.last_crawled_at,
|
|
dc.location_count,
|
|
COUNT(dl.id) AS actual_locations,
|
|
COUNT(dl.id) FILTER (WHERE dl.status = 'discovered') AS pending_count,
|
|
COUNT(dl.id) FILTER (WHERE dl.status = 'verified') AS verified_count,
|
|
COUNT(dl.id) FILTER (WHERE dl.status = 'rejected') AS rejected_count
|
|
FROM dutchie_discovery_cities dc
|
|
LEFT JOIN dutchie_discovery_locations dl ON dl.discovery_city_id = dc.id
|
|
GROUP BY dc.id, dc.platform, dc.city_name, dc.state_code, dc.country_code,
|
|
dc.crawl_enabled, dc.last_crawled_at, dc.location_count
|
|
ORDER BY dc.country_code, dc.state_code, dc.city_name;
|
|
|
|
|
|
-- ============================================================================
|
|
-- DONE
|
|
-- ============================================================================
|
|
|
|
SELECT 'Migration 053 completed successfully. Discovery schema created.' AS status;
|