-- Migration 021: Dispensary Crawl Schedule -- Creates a schedule table linked directly to dispensaries (not stores) -- Dispensary crawl schedule table CREATE TABLE IF NOT EXISTS dispensary_crawl_schedule ( id SERIAL PRIMARY KEY, dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE, -- Schedule settings is_active BOOLEAN NOT NULL DEFAULT TRUE, interval_minutes INTEGER NOT NULL DEFAULT 240, -- 4 hours default priority INTEGER NOT NULL DEFAULT 0, -- Higher = scheduled first -- Timing last_run_at TIMESTAMPTZ, next_run_at TIMESTAMPTZ, -- Status tracking last_status VARCHAR(50), -- 'success', 'error', 'sandbox_only', 'detection_only', 'running' last_summary TEXT, last_error TEXT, last_duration_ms INTEGER, -- Run tracking consecutive_failures INTEGER DEFAULT 0, total_runs INTEGER DEFAULT 0, successful_runs INTEGER DEFAULT 0, -- Metadata created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW(), UNIQUE(dispensary_id) ); -- Indexes CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_schedule_active ON dispensary_crawl_schedule(is_active); CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_schedule_next_run ON dispensary_crawl_schedule(next_run_at) WHERE is_active = TRUE; CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_schedule_status ON dispensary_crawl_schedule(last_status); CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_schedule_priority ON dispensary_crawl_schedule(priority DESC, next_run_at ASC); -- Dispensary crawl jobs table (separate from store crawl_jobs) CREATE TABLE IF NOT EXISTS dispensary_crawl_jobs ( id SERIAL PRIMARY KEY, dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE, schedule_id INTEGER REFERENCES dispensary_crawl_schedule(id) ON DELETE SET NULL, -- Job info job_type VARCHAR(50) NOT NULL DEFAULT 'orchestrator', -- 'orchestrator', 'detection', 'products', 'sandbox' trigger_type VARCHAR(50) NOT NULL DEFAULT 'scheduled', -- 'scheduled', 'manual', 'bootstrap' status VARCHAR(20) NOT NULL DEFAULT 'pending', -- 'pending', 'running', 'completed', 'failed', 'cancelled' priority INTEGER DEFAULT 0, -- Timing scheduled_at TIMESTAMPTZ DEFAULT NOW(), started_at TIMESTAMPTZ, completed_at TIMESTAMPTZ, duration_ms INTEGER, -- Results detection_ran BOOLEAN DEFAULT FALSE, crawl_ran BOOLEAN DEFAULT FALSE, crawl_type VARCHAR(20), -- 'production', 'sandbox', 'none' products_found INTEGER, products_new INTEGER, products_updated INTEGER, -- Detection results detected_provider VARCHAR(50), detected_confidence SMALLINT, detected_mode VARCHAR(20), -- Error tracking error_message TEXT, -- Worker tracking worker_id VARCHAR(100), run_id UUID, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() ); -- Indexes CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_status ON dispensary_crawl_jobs(status); CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_dispensary ON dispensary_crawl_jobs(dispensary_id, created_at DESC); CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_pending ON dispensary_crawl_jobs(priority DESC, scheduled_at ASC) WHERE status = 'pending'; CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_recent ON dispensary_crawl_jobs(created_at DESC); -- View for dispensary schedule status with latest job info CREATE OR REPLACE VIEW dispensary_crawl_status AS SELECT d.id AS dispensary_id, d.name AS dispensary_name, d.city, d.website, d.menu_url, d.product_provider, d.product_confidence, d.product_crawler_mode, d.last_product_scan_at, COALESCE(dcs.is_active, TRUE) AS schedule_active, COALESCE(dcs.interval_minutes, 240) AS interval_minutes, COALESCE(dcs.priority, 0) AS priority, dcs.last_run_at, dcs.next_run_at, dcs.last_status, dcs.last_summary, dcs.last_error, dcs.consecutive_failures, dcs.total_runs, dcs.successful_runs, dcj.id AS latest_job_id, dcj.job_type AS latest_job_type, dcj.status AS latest_job_status, dcj.started_at AS latest_job_started, dcj.products_found AS latest_products_found FROM dispensaries d LEFT JOIN dispensary_crawl_schedule dcs ON dcs.dispensary_id = d.id LEFT JOIN LATERAL ( SELECT * FROM dispensary_crawl_jobs WHERE dispensary_id = d.id ORDER BY created_at DESC LIMIT 1 ) dcj ON true ORDER BY COALESCE(dcs.priority, 0) DESC, d.name;