- Add migrations 021-023 for dispensary_crawl_schedule tables and views - Add dispensary-orchestrator service and bootstrap-discovery script - Update schedule routes with dispensary endpoints (/api/schedule/dispensaries) - Switch frontend scheduler to use canonical dispensaries table (182 AZDHS entries) - Add dispensary schedule API methods to frontend api.ts - Remove "Unmapped" badge logic - all dispensaries now linked properly - Add proper URL linking to dispensary detail pages (/dispensaries/:state/:city/:slug) - Update Jobs table to show dispensary_name 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
127 lines
4.3 KiB
SQL
127 lines
4.3 KiB
SQL
-- Migration 021: Dispensary Crawl Schedule
|
|
-- Creates a schedule table linked directly to dispensaries (not stores)
|
|
|
|
-- Dispensary crawl schedule table
|
|
CREATE TABLE IF NOT EXISTS dispensary_crawl_schedule (
|
|
id SERIAL PRIMARY KEY,
|
|
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
|
|
|
-- Schedule settings
|
|
is_active BOOLEAN NOT NULL DEFAULT TRUE,
|
|
interval_minutes INTEGER NOT NULL DEFAULT 240, -- 4 hours default
|
|
priority INTEGER NOT NULL DEFAULT 0, -- Higher = scheduled first
|
|
|
|
-- Timing
|
|
last_run_at TIMESTAMPTZ,
|
|
next_run_at TIMESTAMPTZ,
|
|
|
|
-- Status tracking
|
|
last_status VARCHAR(50), -- 'success', 'error', 'sandbox_only', 'detection_only', 'running'
|
|
last_summary TEXT,
|
|
last_error TEXT,
|
|
last_duration_ms INTEGER,
|
|
|
|
-- Run tracking
|
|
consecutive_failures INTEGER DEFAULT 0,
|
|
total_runs INTEGER DEFAULT 0,
|
|
successful_runs INTEGER DEFAULT 0,
|
|
|
|
-- Metadata
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
|
|
|
UNIQUE(dispensary_id)
|
|
);
|
|
|
|
-- Indexes
|
|
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_schedule_active ON dispensary_crawl_schedule(is_active);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_schedule_next_run ON dispensary_crawl_schedule(next_run_at) WHERE is_active = TRUE;
|
|
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_schedule_status ON dispensary_crawl_schedule(last_status);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_schedule_priority ON dispensary_crawl_schedule(priority DESC, next_run_at ASC);
|
|
|
|
-- Dispensary crawl jobs table (separate from store crawl_jobs)
|
|
CREATE TABLE IF NOT EXISTS dispensary_crawl_jobs (
|
|
id SERIAL PRIMARY KEY,
|
|
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
|
schedule_id INTEGER REFERENCES dispensary_crawl_schedule(id) ON DELETE SET NULL,
|
|
|
|
-- Job info
|
|
job_type VARCHAR(50) NOT NULL DEFAULT 'orchestrator', -- 'orchestrator', 'detection', 'products', 'sandbox'
|
|
trigger_type VARCHAR(50) NOT NULL DEFAULT 'scheduled', -- 'scheduled', 'manual', 'bootstrap'
|
|
status VARCHAR(20) NOT NULL DEFAULT 'pending', -- 'pending', 'running', 'completed', 'failed', 'cancelled'
|
|
priority INTEGER DEFAULT 0,
|
|
|
|
-- Timing
|
|
scheduled_at TIMESTAMPTZ DEFAULT NOW(),
|
|
started_at TIMESTAMPTZ,
|
|
completed_at TIMESTAMPTZ,
|
|
duration_ms INTEGER,
|
|
|
|
-- Results
|
|
detection_ran BOOLEAN DEFAULT FALSE,
|
|
crawl_ran BOOLEAN DEFAULT FALSE,
|
|
crawl_type VARCHAR(20), -- 'production', 'sandbox', 'none'
|
|
products_found INTEGER,
|
|
products_new INTEGER,
|
|
products_updated INTEGER,
|
|
|
|
-- Detection results
|
|
detected_provider VARCHAR(50),
|
|
detected_confidence SMALLINT,
|
|
detected_mode VARCHAR(20),
|
|
|
|
-- Error tracking
|
|
error_message TEXT,
|
|
|
|
-- Worker tracking
|
|
worker_id VARCHAR(100),
|
|
run_id UUID,
|
|
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- Indexes
|
|
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_status ON dispensary_crawl_jobs(status);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_dispensary ON dispensary_crawl_jobs(dispensary_id, created_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_pending ON dispensary_crawl_jobs(priority DESC, scheduled_at ASC) WHERE status = 'pending';
|
|
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_recent ON dispensary_crawl_jobs(created_at DESC);
|
|
|
|
-- View for dispensary schedule status with latest job info
|
|
CREATE OR REPLACE VIEW dispensary_crawl_status AS
|
|
SELECT
|
|
d.id AS dispensary_id,
|
|
d.name AS dispensary_name,
|
|
d.city,
|
|
d.website,
|
|
d.menu_url,
|
|
d.product_provider,
|
|
d.product_confidence,
|
|
d.product_crawler_mode,
|
|
d.last_product_scan_at,
|
|
COALESCE(dcs.is_active, TRUE) AS schedule_active,
|
|
COALESCE(dcs.interval_minutes, 240) AS interval_minutes,
|
|
COALESCE(dcs.priority, 0) AS priority,
|
|
dcs.last_run_at,
|
|
dcs.next_run_at,
|
|
dcs.last_status,
|
|
dcs.last_summary,
|
|
dcs.last_error,
|
|
dcs.consecutive_failures,
|
|
dcs.total_runs,
|
|
dcs.successful_runs,
|
|
dcj.id AS latest_job_id,
|
|
dcj.job_type AS latest_job_type,
|
|
dcj.status AS latest_job_status,
|
|
dcj.started_at AS latest_job_started,
|
|
dcj.products_found AS latest_products_found
|
|
FROM dispensaries d
|
|
LEFT JOIN dispensary_crawl_schedule dcs ON dcs.dispensary_id = d.id
|
|
LEFT JOIN LATERAL (
|
|
SELECT * FROM dispensary_crawl_jobs
|
|
WHERE dispensary_id = d.id
|
|
ORDER BY created_at DESC
|
|
LIMIT 1
|
|
) dcj ON true
|
|
ORDER BY COALESCE(dcs.priority, 0) DESC, d.name;
|