40 lines
1.4 KiB
PL/PgSQL
40 lines
1.4 KiB
PL/PgSQL
-- Create brand scrape jobs table for tracking parallel scraping
|
|
CREATE TABLE IF NOT EXISTS brand_scrape_jobs (
|
|
id SERIAL PRIMARY KEY,
|
|
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id),
|
|
brand_slug TEXT NOT NULL,
|
|
brand_name TEXT NOT NULL,
|
|
status TEXT NOT NULL DEFAULT 'pending', -- 'pending', 'in_progress', 'completed', 'failed'
|
|
worker_id TEXT,
|
|
started_at TIMESTAMP,
|
|
completed_at TIMESTAMP,
|
|
products_found INTEGER DEFAULT 0,
|
|
products_saved INTEGER DEFAULT 0,
|
|
error_message TEXT,
|
|
retry_count INTEGER DEFAULT 0,
|
|
created_at TIMESTAMP DEFAULT NOW(),
|
|
updated_at TIMESTAMP DEFAULT NOW(),
|
|
UNIQUE(dispensary_id, brand_slug)
|
|
);
|
|
|
|
-- Indexes for performance
|
|
CREATE INDEX IF NOT EXISTS idx_brand_jobs_status ON brand_scrape_jobs(status);
|
|
CREATE INDEX IF NOT EXISTS idx_brand_jobs_dispensary ON brand_scrape_jobs(dispensary_id);
|
|
CREATE INDEX IF NOT EXISTS idx_brand_jobs_worker ON brand_scrape_jobs(worker_id) WHERE worker_id IS NOT NULL;
|
|
|
|
-- Function to automatically update updated_at timestamp
|
|
CREATE OR REPLACE FUNCTION update_brand_scrape_jobs_updated_at()
|
|
RETURNS TRIGGER AS $$
|
|
BEGIN
|
|
NEW.updated_at = NOW();
|
|
RETURN NEW;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- Trigger to update updated_at
|
|
DROP TRIGGER IF EXISTS trigger_update_brand_scrape_jobs_timestamp ON brand_scrape_jobs;
|
|
CREATE TRIGGER trigger_update_brand_scrape_jobs_timestamp
|
|
BEFORE UPDATE ON brand_scrape_jobs
|
|
FOR EACH ROW
|
|
EXECUTE FUNCTION update_brand_scrape_jobs_updated_at();
|