-- ============================================================================ -- Migration 053: Dutchie Discovery Schema -- ============================================================================ -- -- Purpose: Create tables for Dutchie store discovery workflow. -- Stores are discovered and held in staging tables until verified, -- then promoted to the canonical dispensaries table. -- -- Tables Created: -- - dutchie_discovery_cities: City pages from Dutchie -- - dutchie_discovery_locations: Individual store locations -- -- SAFETY RULES: -- - ALL tables use CREATE TABLE IF NOT EXISTS -- - NO DROP, DELETE, TRUNCATE, or destructive operations -- - Does NOT touch canonical dispensaries table -- - Fully idempotent - safe to run multiple times -- -- Run with: -- psql "postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \ -- -f migrations/053_dutchie_discovery_schema.sql -- -- ============================================================================ -- ============================================================================ -- SECTION 1: DUTCHIE_DISCOVERY_CITIES -- ============================================================================ -- Stores Dutchie city pages for systematic crawling. -- Each city can contain multiple dispensary locations. CREATE TABLE IF NOT EXISTS dutchie_discovery_cities ( id BIGSERIAL PRIMARY KEY, -- Platform identification (future-proof for other platforms) platform TEXT NOT NULL DEFAULT 'dutchie', -- City identification city_name TEXT NOT NULL, city_slug TEXT NOT NULL, state_code TEXT, -- 'AZ', 'CA', 'ON', etc. country_code TEXT NOT NULL DEFAULT 'US', -- Crawl management last_crawled_at TIMESTAMPTZ, crawl_enabled BOOLEAN NOT NULL DEFAULT TRUE, location_count INTEGER, -- Number of locations found in this city -- Metadata notes TEXT, metadata JSONB, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); -- Add unique constraint if not exists DO $$ BEGIN IF NOT EXISTS ( SELECT 1 FROM pg_constraint WHERE conname = 'dutchie_discovery_cities_unique' ) THEN ALTER TABLE dutchie_discovery_cities ADD CONSTRAINT dutchie_discovery_cities_unique UNIQUE (platform, country_code, state_code, city_slug); END IF; EXCEPTION WHEN duplicate_object THEN NULL; WHEN OTHERS THEN NULL; END $$; -- Indexes CREATE INDEX IF NOT EXISTS idx_discovery_cities_platform ON dutchie_discovery_cities(platform); CREATE INDEX IF NOT EXISTS idx_discovery_cities_state ON dutchie_discovery_cities(country_code, state_code); CREATE INDEX IF NOT EXISTS idx_discovery_cities_crawl_enabled ON dutchie_discovery_cities(crawl_enabled) WHERE crawl_enabled = TRUE; CREATE INDEX IF NOT EXISTS idx_discovery_cities_last_crawled ON dutchie_discovery_cities(last_crawled_at); COMMENT ON TABLE dutchie_discovery_cities IS 'City pages from Dutchie for systematic store discovery.'; -- ============================================================================ -- SECTION 2: DUTCHIE_DISCOVERY_LOCATIONS -- ============================================================================ -- Individual store locations discovered from Dutchie. -- These are NOT promoted to canonical dispensaries until verified. CREATE TABLE IF NOT EXISTS dutchie_discovery_locations ( id BIGSERIAL PRIMARY KEY, -- Platform identification platform TEXT NOT NULL DEFAULT 'dutchie', platform_location_id TEXT NOT NULL, -- Dutchie's internal Location ID platform_slug TEXT NOT NULL, -- URL slug for the store platform_menu_url TEXT NOT NULL, -- Full menu URL -- Store name name TEXT NOT NULL, -- Address components raw_address TEXT, address_line1 TEXT, address_line2 TEXT, city TEXT, state_code TEXT, -- 'AZ', 'CA', 'ON', etc. postal_code TEXT, country_code TEXT, -- 'US' or 'CA' -- Coordinates latitude DOUBLE PRECISION, longitude DOUBLE PRECISION, timezone TEXT, -- Discovery status status TEXT NOT NULL DEFAULT 'discovered', -- discovered: Just found, not yet verified -- verified: Verified and promoted to canonical dispensaries -- rejected: Manually rejected (e.g., duplicate, test store) -- merged: Linked to existing canonical dispensary -- Link to canonical dispensaries (only after verification) dispensary_id INTEGER, -- Reference to discovery city discovery_city_id BIGINT, -- Raw data from Dutchie metadata JSONB, notes TEXT, -- Store capabilities (from Dutchie) offers_delivery BOOLEAN, offers_pickup BOOLEAN, is_recreational BOOLEAN, is_medical BOOLEAN, -- Tracking first_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), last_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), last_checked_at TIMESTAMPTZ, verified_at TIMESTAMPTZ, verified_by TEXT, -- User who verified active BOOLEAN NOT NULL DEFAULT TRUE, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); -- Add unique constraints if not exist DO $$ BEGIN IF NOT EXISTS ( SELECT 1 FROM pg_constraint WHERE conname = 'dutchie_discovery_locations_platform_id_unique' ) THEN ALTER TABLE dutchie_discovery_locations ADD CONSTRAINT dutchie_discovery_locations_platform_id_unique UNIQUE (platform, platform_location_id); END IF; EXCEPTION WHEN duplicate_object THEN NULL; WHEN OTHERS THEN NULL; END $$; DO $$ BEGIN IF NOT EXISTS ( SELECT 1 FROM pg_constraint WHERE conname = 'dutchie_discovery_locations_slug_unique' ) THEN ALTER TABLE dutchie_discovery_locations ADD CONSTRAINT dutchie_discovery_locations_slug_unique UNIQUE (platform, platform_slug, country_code, state_code, city); END IF; EXCEPTION WHEN duplicate_object THEN NULL; WHEN OTHERS THEN NULL; END $$; -- Add FK to dispensaries if not exists (allows NULL) DO $$ BEGIN IF NOT EXISTS ( SELECT 1 FROM pg_constraint WHERE conname = 'dutchie_discovery_locations_dispensary_fk' ) THEN ALTER TABLE dutchie_discovery_locations ADD CONSTRAINT dutchie_discovery_locations_dispensary_fk FOREIGN KEY (dispensary_id) REFERENCES dispensaries(id) ON DELETE SET NULL; END IF; EXCEPTION WHEN duplicate_object THEN NULL; WHEN OTHERS THEN NULL; END $$; -- Add FK to discovery cities if not exists DO $$ BEGIN IF NOT EXISTS ( SELECT 1 FROM pg_constraint WHERE conname = 'dutchie_discovery_locations_city_fk' ) THEN ALTER TABLE dutchie_discovery_locations ADD CONSTRAINT dutchie_discovery_locations_city_fk FOREIGN KEY (discovery_city_id) REFERENCES dutchie_discovery_cities(id) ON DELETE SET NULL; END IF; EXCEPTION WHEN duplicate_object THEN NULL; WHEN OTHERS THEN NULL; END $$; -- Indexes CREATE INDEX IF NOT EXISTS idx_discovery_locations_platform ON dutchie_discovery_locations(platform); CREATE INDEX IF NOT EXISTS idx_discovery_locations_status ON dutchie_discovery_locations(status); CREATE INDEX IF NOT EXISTS idx_discovery_locations_state ON dutchie_discovery_locations(country_code, state_code); CREATE INDEX IF NOT EXISTS idx_discovery_locations_city ON dutchie_discovery_locations(city, state_code); CREATE INDEX IF NOT EXISTS idx_discovery_locations_dispensary ON dutchie_discovery_locations(dispensary_id) WHERE dispensary_id IS NOT NULL; CREATE INDEX IF NOT EXISTS idx_discovery_locations_discovered ON dutchie_discovery_locations(status, first_seen_at DESC) WHERE status = 'discovered'; CREATE INDEX IF NOT EXISTS idx_discovery_locations_active ON dutchie_discovery_locations(active) WHERE active = TRUE; CREATE INDEX IF NOT EXISTS idx_discovery_locations_coords ON dutchie_discovery_locations(latitude, longitude) WHERE latitude IS NOT NULL AND longitude IS NOT NULL; COMMENT ON TABLE dutchie_discovery_locations IS 'Discovered store locations from Dutchie. Held in staging until verified.'; -- ============================================================================ -- SECTION 3: ADD CANADIAN PROVINCES TO STATES TABLE -- ============================================================================ -- Support for Canadian provinces (Ontario, BC, Alberta, etc.) INSERT INTO states (code, name, timezone, is_active, crawl_enabled) VALUES ('AB', 'Alberta', 'America/Edmonton', TRUE, TRUE), ('BC', 'British Columbia', 'America/Vancouver', TRUE, TRUE), ('MB', 'Manitoba', 'America/Winnipeg', TRUE, TRUE), ('NB', 'New Brunswick', 'America/Moncton', TRUE, TRUE), ('NL', 'Newfoundland and Labrador', 'America/St_Johns', TRUE, TRUE), ('NS', 'Nova Scotia', 'America/Halifax', TRUE, TRUE), ('NT', 'Northwest Territories', 'America/Yellowknife', TRUE, TRUE), ('NU', 'Nunavut', 'America/Iqaluit', TRUE, TRUE), ('ON', 'Ontario', 'America/Toronto', TRUE, TRUE), ('PE', 'Prince Edward Island', 'America/Halifax', TRUE, TRUE), ('QC', 'Quebec', 'America/Montreal', TRUE, TRUE), ('SK', 'Saskatchewan', 'America/Regina', TRUE, TRUE), ('YT', 'Yukon', 'America/Whitehorse', TRUE, TRUE) ON CONFLICT (code) DO UPDATE SET name = EXCLUDED.name, timezone = COALESCE(states.timezone, EXCLUDED.timezone), updated_at = NOW(); -- ============================================================================ -- SECTION 4: VIEWS FOR DISCOVERY MONITORING -- ============================================================================ -- View: Discovery status summary CREATE OR REPLACE VIEW v_discovery_status AS SELECT platform, country_code, state_code, status, COUNT(*) AS location_count, COUNT(*) FILTER (WHERE dispensary_id IS NOT NULL) AS linked_count, MIN(first_seen_at) AS earliest_discovery, MAX(last_seen_at) AS latest_activity FROM dutchie_discovery_locations GROUP BY platform, country_code, state_code, status ORDER BY country_code, state_code, status; -- View: Unverified discoveries awaiting action CREATE OR REPLACE VIEW v_discovery_pending AS SELECT dl.id, dl.platform, dl.name, dl.city, dl.state_code, dl.country_code, dl.platform_menu_url, dl.first_seen_at, dl.last_seen_at, dl.offers_delivery, dl.offers_pickup, dl.is_recreational, dl.is_medical, dc.city_name AS discovery_city_name FROM dutchie_discovery_locations dl LEFT JOIN dutchie_discovery_cities dc ON dc.id = dl.discovery_city_id WHERE dl.status = 'discovered' AND dl.active = TRUE ORDER BY dl.state_code, dl.city, dl.name; -- View: City crawl status CREATE OR REPLACE VIEW v_discovery_cities_status AS SELECT dc.id, dc.platform, dc.city_name, dc.state_code, dc.country_code, dc.crawl_enabled, dc.last_crawled_at, dc.location_count, COUNT(dl.id) AS actual_locations, COUNT(dl.id) FILTER (WHERE dl.status = 'discovered') AS pending_count, COUNT(dl.id) FILTER (WHERE dl.status = 'verified') AS verified_count, COUNT(dl.id) FILTER (WHERE dl.status = 'rejected') AS rejected_count FROM dutchie_discovery_cities dc LEFT JOIN dutchie_discovery_locations dl ON dl.discovery_city_id = dc.id GROUP BY dc.id, dc.platform, dc.city_name, dc.state_code, dc.country_code, dc.crawl_enabled, dc.last_crawled_at, dc.location_count ORDER BY dc.country_code, dc.state_code, dc.city_name; -- ============================================================================ -- DONE -- ============================================================================ SELECT 'Migration 053 completed successfully. Discovery schema created.' AS status;