Files
cannaiq/backend/migrations/021_dispensary_crawl_schedule.sql
Kelly 5306c3f4ca Switch scheduler UI to dispensary-based API
- Add migrations 021-023 for dispensary_crawl_schedule tables and views
- Add dispensary-orchestrator service and bootstrap-discovery script
- Update schedule routes with dispensary endpoints (/api/schedule/dispensaries)
- Switch frontend scheduler to use canonical dispensaries table (182 AZDHS entries)
- Add dispensary schedule API methods to frontend api.ts
- Remove "Unmapped" badge logic - all dispensaries now linked properly
- Add proper URL linking to dispensary detail pages (/dispensaries/:state/:city/:slug)
- Update Jobs table to show dispensary_name

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-30 18:54:52 -07:00

127 lines
4.3 KiB
SQL

-- Migration 021: Dispensary Crawl Schedule
-- Creates a schedule table linked directly to dispensaries (not stores)
-- Dispensary crawl schedule table
CREATE TABLE IF NOT EXISTS dispensary_crawl_schedule (
id SERIAL PRIMARY KEY,
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
-- Schedule settings
is_active BOOLEAN NOT NULL DEFAULT TRUE,
interval_minutes INTEGER NOT NULL DEFAULT 240, -- 4 hours default
priority INTEGER NOT NULL DEFAULT 0, -- Higher = scheduled first
-- Timing
last_run_at TIMESTAMPTZ,
next_run_at TIMESTAMPTZ,
-- Status tracking
last_status VARCHAR(50), -- 'success', 'error', 'sandbox_only', 'detection_only', 'running'
last_summary TEXT,
last_error TEXT,
last_duration_ms INTEGER,
-- Run tracking
consecutive_failures INTEGER DEFAULT 0,
total_runs INTEGER DEFAULT 0,
successful_runs INTEGER DEFAULT 0,
-- Metadata
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(dispensary_id)
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_schedule_active ON dispensary_crawl_schedule(is_active);
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_schedule_next_run ON dispensary_crawl_schedule(next_run_at) WHERE is_active = TRUE;
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_schedule_status ON dispensary_crawl_schedule(last_status);
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_schedule_priority ON dispensary_crawl_schedule(priority DESC, next_run_at ASC);
-- Dispensary crawl jobs table (separate from store crawl_jobs)
CREATE TABLE IF NOT EXISTS dispensary_crawl_jobs (
id SERIAL PRIMARY KEY,
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
schedule_id INTEGER REFERENCES dispensary_crawl_schedule(id) ON DELETE SET NULL,
-- Job info
job_type VARCHAR(50) NOT NULL DEFAULT 'orchestrator', -- 'orchestrator', 'detection', 'products', 'sandbox'
trigger_type VARCHAR(50) NOT NULL DEFAULT 'scheduled', -- 'scheduled', 'manual', 'bootstrap'
status VARCHAR(20) NOT NULL DEFAULT 'pending', -- 'pending', 'running', 'completed', 'failed', 'cancelled'
priority INTEGER DEFAULT 0,
-- Timing
scheduled_at TIMESTAMPTZ DEFAULT NOW(),
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
duration_ms INTEGER,
-- Results
detection_ran BOOLEAN DEFAULT FALSE,
crawl_ran BOOLEAN DEFAULT FALSE,
crawl_type VARCHAR(20), -- 'production', 'sandbox', 'none'
products_found INTEGER,
products_new INTEGER,
products_updated INTEGER,
-- Detection results
detected_provider VARCHAR(50),
detected_confidence SMALLINT,
detected_mode VARCHAR(20),
-- Error tracking
error_message TEXT,
-- Worker tracking
worker_id VARCHAR(100),
run_id UUID,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_status ON dispensary_crawl_jobs(status);
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_dispensary ON dispensary_crawl_jobs(dispensary_id, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_pending ON dispensary_crawl_jobs(priority DESC, scheduled_at ASC) WHERE status = 'pending';
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_recent ON dispensary_crawl_jobs(created_at DESC);
-- View for dispensary schedule status with latest job info
CREATE OR REPLACE VIEW dispensary_crawl_status AS
SELECT
d.id AS dispensary_id,
d.name AS dispensary_name,
d.city,
d.website,
d.menu_url,
d.product_provider,
d.product_confidence,
d.product_crawler_mode,
d.last_product_scan_at,
COALESCE(dcs.is_active, TRUE) AS schedule_active,
COALESCE(dcs.interval_minutes, 240) AS interval_minutes,
COALESCE(dcs.priority, 0) AS priority,
dcs.last_run_at,
dcs.next_run_at,
dcs.last_status,
dcs.last_summary,
dcs.last_error,
dcs.consecutive_failures,
dcs.total_runs,
dcs.successful_runs,
dcj.id AS latest_job_id,
dcj.job_type AS latest_job_type,
dcj.status AS latest_job_status,
dcj.started_at AS latest_job_started,
dcj.products_found AS latest_products_found
FROM dispensaries d
LEFT JOIN dispensary_crawl_schedule dcs ON dcs.dispensary_id = d.id
LEFT JOIN LATERAL (
SELECT * FROM dispensary_crawl_jobs
WHERE dispensary_id = d.id
ORDER BY created_at DESC
LIMIT 1
) dcj ON true
ORDER BY COALESCE(dcs.priority, 0) DESC, d.name;