fix(monitor): remove non-existent worker columns from job_run_logs query
The job_run_logs table tracks scheduled job orchestration, not individual worker jobs. Worker info (worker_id, worker_hostname) belongs on dispensary_crawl_jobs, not job_run_logs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
53
backend/migrations/034_crawl_status_fields.sql
Normal file
53
backend/migrations/034_crawl_status_fields.sql
Normal file
@@ -0,0 +1,53 @@
|
||||
-- Migration 034: Add crawl status fields for dispensary detection
|
||||
-- Tracks provider detection state and not_crawlable status
|
||||
|
||||
-- Add crawl_status column: ready, not_ready, not_crawlable
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS crawl_status VARCHAR(30) DEFAULT 'not_ready';
|
||||
|
||||
-- Add reason for current status (e.g., "removed from Dutchie", "unsupported provider")
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS crawl_status_reason TEXT;
|
||||
|
||||
-- When the status was last updated
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS crawl_status_updated_at TIMESTAMPTZ;
|
||||
|
||||
-- The menu_url that was tested (for tracking when it changes)
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_tested_menu_url TEXT;
|
||||
|
||||
-- HTTP status code from last test (403, 404, 200, etc.)
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_http_status INTEGER;
|
||||
|
||||
-- Index for filtering by crawl status
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_crawl_status ON dispensaries(crawl_status);
|
||||
|
||||
-- Index for ready dispensaries with dutchie type
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_ready_dutchie
|
||||
ON dispensaries(id)
|
||||
WHERE crawl_status = 'ready' AND menu_type = 'dutchie' AND platform_dispensary_id IS NOT NULL;
|
||||
|
||||
-- Update existing dispensaries based on current state:
|
||||
-- 1. Deeply Rooted has platform_dispensary_id, so it's ready
|
||||
UPDATE dispensaries
|
||||
SET crawl_status = 'ready',
|
||||
crawl_status_reason = 'Platform ID resolved',
|
||||
crawl_status_updated_at = NOW()
|
||||
WHERE menu_type = 'dutchie'
|
||||
AND platform_dispensary_id IS NOT NULL;
|
||||
|
||||
-- 2. Dispensaries with not_crawlable in provider_detection_data
|
||||
UPDATE dispensaries
|
||||
SET crawl_status = 'not_crawlable',
|
||||
crawl_status_reason = provider_detection_data->>'not_crawlable_reason',
|
||||
crawl_status_updated_at = NOW()
|
||||
WHERE provider_detection_data->>'not_crawlable' = 'true';
|
||||
|
||||
-- 3. All other dutchie stores are not_ready
|
||||
UPDATE dispensaries
|
||||
SET crawl_status = 'not_ready',
|
||||
crawl_status_reason = 'Platform ID not resolved',
|
||||
crawl_status_updated_at = NOW()
|
||||
WHERE menu_type = 'dutchie'
|
||||
AND platform_dispensary_id IS NULL
|
||||
AND crawl_status IS NULL OR crawl_status = 'not_ready';
|
||||
|
||||
COMMENT ON COLUMN dispensaries.crawl_status IS 'Crawl readiness: ready (can crawl), not_ready (needs setup), not_crawlable (removed/unsupported)';
|
||||
COMMENT ON COLUMN dispensaries.crawl_status_reason IS 'Human-readable reason for current crawl status';
|
||||
Reference in New Issue
Block a user