Initial commit - Dutchie dispensary scraper

This commit is contained in:
Kelly
2025-11-28 19:45:44 -07:00
commit 5757a8e9bd
23375 changed files with 3788799 additions and 0 deletions

View File

@@ -0,0 +1,39 @@
-- Create brand scrape jobs table for tracking parallel scraping
CREATE TABLE IF NOT EXISTS brand_scrape_jobs (
id SERIAL PRIMARY KEY,
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id),
brand_slug TEXT NOT NULL,
brand_name TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending', -- 'pending', 'in_progress', 'completed', 'failed'
worker_id TEXT,
started_at TIMESTAMP,
completed_at TIMESTAMP,
products_found INTEGER DEFAULT 0,
products_saved INTEGER DEFAULT 0,
error_message TEXT,
retry_count INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW(),
UNIQUE(dispensary_id, brand_slug)
);
-- Indexes for performance
CREATE INDEX IF NOT EXISTS idx_brand_jobs_status ON brand_scrape_jobs(status);
CREATE INDEX IF NOT EXISTS idx_brand_jobs_dispensary ON brand_scrape_jobs(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_brand_jobs_worker ON brand_scrape_jobs(worker_id) WHERE worker_id IS NOT NULL;
-- Function to automatically update updated_at timestamp
CREATE OR REPLACE FUNCTION update_brand_scrape_jobs_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Trigger to update updated_at
DROP TRIGGER IF EXISTS trigger_update_brand_scrape_jobs_timestamp ON brand_scrape_jobs;
CREATE TRIGGER trigger_update_brand_scrape_jobs_timestamp
BEFORE UPDATE ON brand_scrape_jobs
FOR EACH ROW
EXECUTE FUNCTION update_brand_scrape_jobs_updated_at();