Files
cannaiq/backend/migrations/create-scrape-jobs.sql
2025-11-28 19:45:44 -07:00

40 lines
1.4 KiB
PL/PgSQL

-- Create brand scrape jobs table for tracking parallel scraping
CREATE TABLE IF NOT EXISTS brand_scrape_jobs (
id SERIAL PRIMARY KEY,
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id),
brand_slug TEXT NOT NULL,
brand_name TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending', -- 'pending', 'in_progress', 'completed', 'failed'
worker_id TEXT,
started_at TIMESTAMP,
completed_at TIMESTAMP,
products_found INTEGER DEFAULT 0,
products_saved INTEGER DEFAULT 0,
error_message TEXT,
retry_count INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW(),
UNIQUE(dispensary_id, brand_slug)
);
-- Indexes for performance
CREATE INDEX IF NOT EXISTS idx_brand_jobs_status ON brand_scrape_jobs(status);
CREATE INDEX IF NOT EXISTS idx_brand_jobs_dispensary ON brand_scrape_jobs(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_brand_jobs_worker ON brand_scrape_jobs(worker_id) WHERE worker_id IS NOT NULL;
-- Function to automatically update updated_at timestamp
CREATE OR REPLACE FUNCTION update_brand_scrape_jobs_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Trigger to update updated_at
DROP TRIGGER IF EXISTS trigger_update_brand_scrape_jobs_timestamp ON brand_scrape_jobs;
CREATE TRIGGER trigger_update_brand_scrape_jobs_timestamp
BEFORE UPDATE ON brand_scrape_jobs
FOR EACH ROW
EXECUTE FUNCTION update_brand_scrape_jobs_updated_at();