-- Migration 034: Add crawl status fields for dispensary detection -- Tracks provider detection state and not_crawlable status -- Add crawl_status column: ready, not_ready, not_crawlable ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS crawl_status VARCHAR(30) DEFAULT 'not_ready'; -- Add reason for current status (e.g., "removed from Dutchie", "unsupported provider") ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS crawl_status_reason TEXT; -- When the status was last updated ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS crawl_status_updated_at TIMESTAMPTZ; -- The menu_url that was tested (for tracking when it changes) ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_tested_menu_url TEXT; -- HTTP status code from last test (403, 404, 200, etc.) ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_http_status INTEGER; -- Index for filtering by crawl status CREATE INDEX IF NOT EXISTS idx_dispensaries_crawl_status ON dispensaries(crawl_status); -- Index for ready dispensaries with dutchie type CREATE INDEX IF NOT EXISTS idx_dispensaries_ready_dutchie ON dispensaries(id) WHERE crawl_status = 'ready' AND menu_type = 'dutchie' AND platform_dispensary_id IS NOT NULL; -- Update existing dispensaries based on current state: -- 1. Deeply Rooted has platform_dispensary_id, so it's ready UPDATE dispensaries SET crawl_status = 'ready', crawl_status_reason = 'Platform ID resolved', crawl_status_updated_at = NOW() WHERE menu_type = 'dutchie' AND platform_dispensary_id IS NOT NULL; -- 2. Dispensaries with not_crawlable in provider_detection_data UPDATE dispensaries SET crawl_status = 'not_crawlable', crawl_status_reason = provider_detection_data->>'not_crawlable_reason', crawl_status_updated_at = NOW() WHERE provider_detection_data->>'not_crawlable' = 'true'; -- 3. All other dutchie stores are not_ready UPDATE dispensaries SET crawl_status = 'not_ready', crawl_status_reason = 'Platform ID not resolved', crawl_status_updated_at = NOW() WHERE menu_type = 'dutchie' AND platform_dispensary_id IS NULL AND crawl_status IS NULL OR crawl_status = 'not_ready'; COMMENT ON COLUMN dispensaries.crawl_status IS 'Crawl readiness: ready (can crawl), not_ready (needs setup), not_crawlable (removed/unsupported)'; COMMENT ON COLUMN dispensaries.crawl_status_reason IS 'Human-readable reason for current crawl status';