The job_run_logs table tracks scheduled job orchestration, not individual worker jobs. Worker info (worker_id, worker_hostname) belongs on dispensary_crawl_jobs, not job_run_logs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
54 lines
2.3 KiB
SQL
54 lines
2.3 KiB
SQL
-- Migration 034: Add crawl status fields for dispensary detection
|
|
-- Tracks provider detection state and not_crawlable status
|
|
|
|
-- Add crawl_status column: ready, not_ready, not_crawlable
|
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS crawl_status VARCHAR(30) DEFAULT 'not_ready';
|
|
|
|
-- Add reason for current status (e.g., "removed from Dutchie", "unsupported provider")
|
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS crawl_status_reason TEXT;
|
|
|
|
-- When the status was last updated
|
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS crawl_status_updated_at TIMESTAMPTZ;
|
|
|
|
-- The menu_url that was tested (for tracking when it changes)
|
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_tested_menu_url TEXT;
|
|
|
|
-- HTTP status code from last test (403, 404, 200, etc.)
|
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_http_status INTEGER;
|
|
|
|
-- Index for filtering by crawl status
|
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_crawl_status ON dispensaries(crawl_status);
|
|
|
|
-- Index for ready dispensaries with dutchie type
|
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_ready_dutchie
|
|
ON dispensaries(id)
|
|
WHERE crawl_status = 'ready' AND menu_type = 'dutchie' AND platform_dispensary_id IS NOT NULL;
|
|
|
|
-- Update existing dispensaries based on current state:
|
|
-- 1. Deeply Rooted has platform_dispensary_id, so it's ready
|
|
UPDATE dispensaries
|
|
SET crawl_status = 'ready',
|
|
crawl_status_reason = 'Platform ID resolved',
|
|
crawl_status_updated_at = NOW()
|
|
WHERE menu_type = 'dutchie'
|
|
AND platform_dispensary_id IS NOT NULL;
|
|
|
|
-- 2. Dispensaries with not_crawlable in provider_detection_data
|
|
UPDATE dispensaries
|
|
SET crawl_status = 'not_crawlable',
|
|
crawl_status_reason = provider_detection_data->>'not_crawlable_reason',
|
|
crawl_status_updated_at = NOW()
|
|
WHERE provider_detection_data->>'not_crawlable' = 'true';
|
|
|
|
-- 3. All other dutchie stores are not_ready
|
|
UPDATE dispensaries
|
|
SET crawl_status = 'not_ready',
|
|
crawl_status_reason = 'Platform ID not resolved',
|
|
crawl_status_updated_at = NOW()
|
|
WHERE menu_type = 'dutchie'
|
|
AND platform_dispensary_id IS NULL
|
|
AND crawl_status IS NULL OR crawl_status = 'not_ready';
|
|
|
|
COMMENT ON COLUMN dispensaries.crawl_status IS 'Crawl readiness: ready (can crawl), not_ready (needs setup), not_crawlable (removed/unsupported)';
|
|
COMMENT ON COLUMN dispensaries.crawl_status_reason IS 'Human-readable reason for current crawl status';
|