-- Create brand scrape jobs table for tracking parallel scraping CREATE TABLE IF NOT EXISTS brand_scrape_jobs ( id SERIAL PRIMARY KEY, dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id), brand_slug TEXT NOT NULL, brand_name TEXT NOT NULL, status TEXT NOT NULL DEFAULT 'pending', -- 'pending', 'in_progress', 'completed', 'failed' worker_id TEXT, started_at TIMESTAMP, completed_at TIMESTAMP, products_found INTEGER DEFAULT 0, products_saved INTEGER DEFAULT 0, error_message TEXT, retry_count INTEGER DEFAULT 0, created_at TIMESTAMP DEFAULT NOW(), updated_at TIMESTAMP DEFAULT NOW(), UNIQUE(dispensary_id, brand_slug) ); -- Indexes for performance CREATE INDEX IF NOT EXISTS idx_brand_jobs_status ON brand_scrape_jobs(status); CREATE INDEX IF NOT EXISTS idx_brand_jobs_dispensary ON brand_scrape_jobs(dispensary_id); CREATE INDEX IF NOT EXISTS idx_brand_jobs_worker ON brand_scrape_jobs(worker_id) WHERE worker_id IS NOT NULL; -- Function to automatically update updated_at timestamp CREATE OR REPLACE FUNCTION update_brand_scrape_jobs_updated_at() RETURNS TRIGGER AS $$ BEGIN NEW.updated_at = NOW(); RETURN NEW; END; $$ LANGUAGE plpgsql; -- Trigger to update updated_at DROP TRIGGER IF EXISTS trigger_update_brand_scrape_jobs_timestamp ON brand_scrape_jobs; CREATE TRIGGER trigger_update_brand_scrape_jobs_timestamp BEFORE UPDATE ON brand_scrape_jobs FOR EACH ROW EXECUTE FUNCTION update_brand_scrape_jobs_updated_at();