From 08b1994d0ef61aa410b5e36ca4c1961343b1217a Mon Sep 17 00:00:00 2001 From: Kelly Date: Tue, 2 Dec 2025 11:15:33 -0700 Subject: [PATCH] Add AZ dashboard views to consolidated schema --- backend/migrations/031_consolidate_schema.sql | 850 ++++++++++++++++++ 1 file changed, 850 insertions(+) create mode 100644 backend/migrations/031_consolidate_schema.sql diff --git a/backend/migrations/031_consolidate_schema.sql b/backend/migrations/031_consolidate_schema.sql new file mode 100644 index 00000000..a0c24a07 --- /dev/null +++ b/backend/migrations/031_consolidate_schema.sql @@ -0,0 +1,850 @@ +-- Migration 031: Consolidate Schema for crawlsy DB +-- This migration creates all tables from the legacy schema for the consolidated database +-- Run with: psql $CRAWLSY_DATABASE_URL -f migrations/031_consolidate_schema.sql + +-- Functions (needed for triggers) +CREATE OR REPLACE FUNCTION public.set_requires_recrawl() RETURNS trigger + LANGUAGE plpgsql + AS $$ +BEGIN + IF NEW.field_name IN ('website', 'menu_url') THEN + NEW.requires_recrawl := TRUE; + END IF; + RETURN NEW; +END; +$$; + +CREATE OR REPLACE FUNCTION public.update_api_token_updated_at() RETURNS trigger + LANGUAGE plpgsql + AS $$ +BEGIN + NEW.updated_at = CURRENT_TIMESTAMP; + RETURN NEW; +END; +$$; + +CREATE OR REPLACE FUNCTION public.update_brand_scrape_jobs_updated_at() RETURNS trigger + LANGUAGE plpgsql + AS $$ +BEGIN + NEW.updated_at = NOW(); + RETURN NEW; +END; +$$; + +CREATE OR REPLACE FUNCTION public.update_sandbox_timestamp() RETURNS trigger + LANGUAGE plpgsql + AS $$ +BEGIN + NEW.updated_at = NOW(); + RETURN NEW; +END; +$$; + +CREATE OR REPLACE FUNCTION public.update_schedule_updated_at() RETURNS trigger + LANGUAGE plpgsql + AS $$ +BEGIN + NEW.updated_at = NOW(); + RETURN NEW; +END; +$$; + +-- Core tables (in dependency order) + +-- users +CREATE TABLE IF NOT EXISTS public.users ( + id SERIAL PRIMARY KEY, + email character varying(255) NOT NULL UNIQUE, + password_hash character varying(255) NOT NULL, + role character varying(50) DEFAULT 'user', + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +-- settings +CREATE TABLE IF NOT EXISTS public.settings ( + key character varying(255) PRIMARY KEY, + value text, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +-- azdhs_list +CREATE TABLE IF NOT EXISTS public.azdhs_list ( + id SERIAL PRIMARY KEY, + name character varying(255) NOT NULL, + company_name character varying(255), + slug character varying(255), + address character varying(500), + city character varying(100), + state character varying(2) DEFAULT 'AZ', + zip character varying(10), + phone character varying(20), + email character varying(255), + status_line text, + azdhs_url text, + latitude numeric(10,8), + longitude numeric(11,8), + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + website text, + dba_name character varying(255), + google_rating numeric(2,1), + google_review_count integer +); + +-- dispensaries +CREATE TABLE IF NOT EXISTS public.dispensaries ( + id SERIAL PRIMARY KEY, + azdhs_id integer UNIQUE REFERENCES public.azdhs_list(id), + name character varying(255) NOT NULL, + company_name character varying(255), + dba_name character varying(255), + slug character varying(255) UNIQUE, + address character varying(500), + city character varying(100), + state character varying(2) DEFAULT 'AZ', + zip character varying(10), + phone character varying(20), + email character varying(255), + website text, + menu_url text, + latitude numeric(10,8), + longitude numeric(11,8), + hours_of_operation jsonb, + social_media jsonb, + license_number character varying(100), + license_type character varying(50), + license_status character varying(50), + license_expiry date, + google_place_id character varying(255), + google_rating numeric(2,1), + google_review_count integer, + yelp_id character varying(255), + yelp_rating numeric(2,1), + yelp_review_count integer, + last_enriched_at timestamp without time zone, + menu_provider character varying(100), + menu_provider_confidence integer, + provider_type character varying(100), + provider_detected_at timestamp without time zone, + detection_method character varying(100), + detection_metadata jsonb, + menu_scrape_status character varying(50), + menu_scrape_error text, + menu_last_scraped_at timestamp without time zone, + menu_product_count integer DEFAULT 0, + crawl_status character varying(50) DEFAULT 'idle', + last_crawl_at timestamp without time zone, + next_crawl_at timestamp without time zone, + crawl_frequency_hours integer DEFAULT 24, + consecutive_failures integer DEFAULT 0, + scrape_enabled boolean DEFAULT true, + scrape_priority integer DEFAULT 0, + crawler_mode character varying(50) DEFAULT 'sandbox', + crawler_status character varying(50) DEFAULT 'idle', + product_crawler_mode character varying(50) DEFAULT 'sandbox', + product_provider character varying(100), + specials_crawler_mode character varying(50) DEFAULT 'disabled', + specials_provider character varying(100), + brand_crawler_mode character varying(50) DEFAULT 'disabled', + brand_provider character varying(100), + metadata_crawler_mode character varying(50) DEFAULT 'disabled', + metadata_provider character varying(100), + notes text, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + platform_dispensary_id character varying(100), + dutchie_url text, + dutchie_detected_at timestamp without time zone, + dutchie_retailer_id character varying(100), + external_id character varying(255) +); + +-- stores +CREATE TABLE IF NOT EXISTS public.stores ( + id SERIAL PRIMARY KEY, + name character varying(255) NOT NULL, + slug character varying(255) UNIQUE, + menu_url text, + website_url text, + image_url text, + description text, + dutchie_plus boolean DEFAULT false, + city character varying(100), + state character varying(50), + address text, + phone character varying(20), + hours jsonb, + logo_url text, + logo_public_id character varying(255), + logo_width integer, + logo_height integer, + scrape_enabled boolean DEFAULT true, + last_scraped_at timestamp without time zone, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + status character varying(50) DEFAULT 'active', + dispensary_id integer REFERENCES public.dispensaries(id) ON DELETE SET NULL +); + +-- api_tokens +CREATE TABLE IF NOT EXISTS public.api_tokens ( + id SERIAL PRIMARY KEY, + name character varying(255) NOT NULL, + token character varying(255) NOT NULL UNIQUE, + description text, + user_id integer REFERENCES public.users(id) ON DELETE CASCADE, + active boolean DEFAULT true, + rate_limit integer DEFAULT 100, + allowed_endpoints text[], + expires_at timestamp without time zone, + last_used_at timestamp without time zone, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +-- api_token_usage +CREATE TABLE IF NOT EXISTS public.api_token_usage ( + id SERIAL PRIMARY KEY, + token_id integer REFERENCES public.api_tokens(id) ON DELETE CASCADE, + endpoint character varying(255) NOT NULL, + method character varying(10) NOT NULL, + status_code integer, + response_time_ms integer, + request_size integer, + response_size integer, + ip_address inet, + user_agent text, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +-- categories +CREATE TABLE IF NOT EXISTS public.categories ( + id SERIAL PRIMARY KEY, + store_id integer REFERENCES public.stores(id) ON DELETE CASCADE, + dispensary_id integer REFERENCES public.dispensaries(id) ON DELETE CASCADE, + name character varying(255) NOT NULL, + slug character varying(255) NOT NULL, + dutchie_url text NOT NULL, + scrape_enabled boolean DEFAULT true, + last_scraped_at timestamp without time zone, + product_count integer DEFAULT 0, + parent_id integer REFERENCES public.categories(id) ON DELETE CASCADE, + path character varying(500), + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + UNIQUE(store_id, slug) +); + +-- products +CREATE TABLE IF NOT EXISTS public.products ( + id SERIAL PRIMARY KEY, + store_id integer REFERENCES public.stores(id) ON DELETE CASCADE, + dispensary_id integer REFERENCES public.dispensaries(id) ON DELETE CASCADE, + category_id integer REFERENCES public.categories(id) ON DELETE SET NULL, + name character varying(255) NOT NULL, + slug character varying(255) NOT NULL, + brand character varying(255), + brand_slug character varying(255), + brand_external_id character varying(100), + enterprise_product_id character varying(100), + category character varying(100), + subcategory character varying(100), + strain_type character varying(50), + description text, + effects text[], + price numeric(10,2), + special_price numeric(10,2), + is_on_special boolean DEFAULT false, + weight character varying(50), + unit character varying(50), + thc_percentage numeric(5,2), + cbd_percentage numeric(5,2), + terpenes text[], + image_url text, + image_public_id character varying(255), + image_width integer, + image_height integer, + dutchie_id character varying(100), + dutchie_url text, + sku character varying(100), + in_stock boolean DEFAULT true, + stock_status character varying(50) DEFAULT 'in_stock', + stock_quantity integer, + availability_status character varying(50) DEFAULT 'available', + status character varying(50) DEFAULT 'active', + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + UNIQUE(store_id, slug) +); + +-- dutchie_products (AZ pipeline products) +CREATE TABLE IF NOT EXISTS public.dutchie_products ( + id SERIAL PRIMARY KEY, + dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE, + external_product_id character varying(100) NOT NULL, + name character varying(500) NOT NULL, + brand_name character varying(255), + category character varying(100), + subcategory character varying(100), + strain_type character varying(50), + description text, + effects text[], + thc_content character varying(50), + cbd_content character varying(50), + primary_image_url text, + additional_images text[], + stock_status character varying(50) DEFAULT 'in_stock', + first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + UNIQUE(dispensary_id, external_product_id) +); + +-- dutchie_product_snapshots +CREATE TABLE IF NOT EXISTS public.dutchie_product_snapshots ( + id SERIAL PRIMARY KEY, + dutchie_product_id integer NOT NULL REFERENCES public.dutchie_products(id) ON DELETE CASCADE, + dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE, + options jsonb, + raw_product_data jsonb, + crawled_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +-- campaigns +CREATE TABLE IF NOT EXISTS public.campaigns ( + id SERIAL PRIMARY KEY, + name character varying(255) NOT NULL, + slug character varying(255) NOT NULL UNIQUE, + description text, + display_style character varying(50) DEFAULT 'grid', + active boolean DEFAULT true, + start_date timestamp without time zone, + end_date timestamp without time zone, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +-- campaign_products +CREATE TABLE IF NOT EXISTS public.campaign_products ( + id SERIAL PRIMARY KEY, + campaign_id integer REFERENCES public.campaigns(id) ON DELETE CASCADE, + product_id integer REFERENCES public.products(id) ON DELETE CASCADE, + display_order integer DEFAULT 0, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + UNIQUE(campaign_id, product_id) +); + +-- clicks +CREATE TABLE IF NOT EXISTS public.clicks ( + id SERIAL PRIMARY KEY, + product_id integer REFERENCES public.products(id) ON DELETE CASCADE, + campaign_id integer REFERENCES public.campaigns(id) ON DELETE SET NULL, + ip_address character varying(45), + user_agent text, + referer text, + clicked_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +-- proxies +CREATE TABLE IF NOT EXISTS public.proxies ( + id SERIAL PRIMARY KEY, + host character varying(255) NOT NULL, + port integer NOT NULL, + protocol character varying(10) DEFAULT 'http', + username character varying(255), + password character varying(255), + active boolean DEFAULT true, + is_anonymous boolean DEFAULT false, + last_tested_at timestamp without time zone, + last_test_result boolean, + response_time_ms integer, + fail_count integer DEFAULT 0, + success_count integer DEFAULT 0, + country_code character varying(2), + state character varying(50), + city character varying(100), + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + UNIQUE(host, port, protocol) +); + +-- failed_proxies +CREATE TABLE IF NOT EXISTS public.failed_proxies ( + id SERIAL PRIMARY KEY, + host character varying(255) NOT NULL, + port integer NOT NULL, + protocol character varying(10) DEFAULT 'http', + failure_reason text, + failed_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + UNIQUE(host, port, protocol) +); + +-- proxy_test_jobs +CREATE TABLE IF NOT EXISTS public.proxy_test_jobs ( + id SERIAL PRIMARY KEY, + status character varying(50) DEFAULT 'pending', + total_proxies integer DEFAULT 0, + tested_proxies integer DEFAULT 0, + successful_proxies integer DEFAULT 0, + failed_proxies integer DEFAULT 0, + started_at timestamp without time zone, + completed_at timestamp without time zone, + error_message text, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +-- crawl_jobs +CREATE TABLE IF NOT EXISTS public.crawl_jobs ( + id SERIAL PRIMARY KEY, + store_id integer REFERENCES public.stores(id) ON DELETE CASCADE, + status character varying(50) DEFAULT 'pending', + job_type character varying(50) DEFAULT 'full', + category_slug character varying(255), + priority integer DEFAULT 0, + scheduled_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + started_at timestamp without time zone, + completed_at timestamp without time zone, + products_found integer DEFAULT 0, + products_updated integer DEFAULT 0, + products_created integer DEFAULT 0, + error_message text, + retry_count integer DEFAULT 0, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +-- crawler_schedule +CREATE TABLE IF NOT EXISTS public.crawler_schedule ( + id SERIAL PRIMARY KEY, + schedule_type character varying(100) NOT NULL UNIQUE, + cron_expression character varying(100) NOT NULL, + is_active boolean DEFAULT true, + last_run_at timestamp without time zone, + next_run_at timestamp without time zone, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +-- store_crawl_schedule +CREATE TABLE IF NOT EXISTS public.store_crawl_schedule ( + id SERIAL PRIMARY KEY, + store_id integer NOT NULL REFERENCES public.stores(id) ON DELETE CASCADE UNIQUE, + cron_expression character varying(100) NOT NULL, + is_active boolean DEFAULT true, + priority integer DEFAULT 0, + last_run_at timestamp without time zone, + next_run_at timestamp without time zone, + last_status character varying(50), + last_error text, + consecutive_failures integer DEFAULT 0, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +-- dispensary_crawl_schedule +CREATE TABLE IF NOT EXISTS public.dispensary_crawl_schedule ( + id SERIAL PRIMARY KEY, + dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE UNIQUE, + cron_expression character varying(100) NOT NULL, + is_active boolean DEFAULT true, + priority integer DEFAULT 0, + last_run_at timestamp without time zone, + next_run_at timestamp without time zone, + last_status character varying(50), + last_error text, + consecutive_failures integer DEFAULT 0, + metadata jsonb, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +-- dispensary_crawl_jobs +CREATE TABLE IF NOT EXISTS public.dispensary_crawl_jobs ( + id SERIAL PRIMARY KEY, + dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE, + schedule_id integer REFERENCES public.dispensary_crawl_schedule(id) ON DELETE SET NULL, + status character varying(50) DEFAULT 'pending', + job_type character varying(50) DEFAULT 'full', + priority integer DEFAULT 0, + scheduled_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + started_at timestamp without time zone, + completed_at timestamp without time zone, + products_found integer DEFAULT 0, + products_updated integer DEFAULT 0, + products_created integer DEFAULT 0, + brands_found integer DEFAULT 0, + error_message text, + metadata jsonb, + retry_count integer DEFAULT 0, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +-- dispensary_changes +CREATE TABLE IF NOT EXISTS public.dispensary_changes ( + id SERIAL PRIMARY KEY, + dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE, + field_name character varying(100) NOT NULL, + old_value text, + new_value text, + status character varying(50) DEFAULT 'pending', + source character varying(100), + notes text, + requires_recrawl boolean DEFAULT false, + reviewed_by integer REFERENCES public.users(id), + reviewed_at timestamp without time zone, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +-- crawler_templates +CREATE TABLE IF NOT EXISTS public.crawler_templates ( + id SERIAL PRIMARY KEY, + provider character varying(100) NOT NULL, + name character varying(255) NOT NULL, + version integer DEFAULT 1, + description text, + config jsonb NOT NULL, + is_active boolean DEFAULT true, + is_default_for_provider boolean DEFAULT false, + success_count integer DEFAULT 0, + failure_count integer DEFAULT 0, + last_used_at timestamp without time zone, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + UNIQUE(provider, name, version) +); + +-- crawler_sandboxes +CREATE TABLE IF NOT EXISTS public.crawler_sandboxes ( + id SERIAL PRIMARY KEY, + dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE, + category character varying(100) NOT NULL, + mode character varying(50) DEFAULT 'testing', + status character varying(50) DEFAULT 'pending', + suspected_menu_provider character varying(100), + template_name character varying(255), + config_overrides jsonb, + last_test_at timestamp without time zone, + last_test_result text, + test_count integer DEFAULT 0, + success_count integer DEFAULT 0, + notes text, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +-- sandbox_crawl_jobs +CREATE TABLE IF NOT EXISTS public.sandbox_crawl_jobs ( + id SERIAL PRIMARY KEY, + sandbox_id integer REFERENCES public.crawler_sandboxes(id) ON DELETE SET NULL, + dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE, + category character varying(100) NOT NULL, + status character varying(50) DEFAULT 'pending', + priority integer DEFAULT 0, + scheduled_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + started_at timestamp without time zone, + completed_at timestamp without time zone, + items_found integer DEFAULT 0, + items_saved integer DEFAULT 0, + error_message text, + result_data jsonb, + retry_count integer DEFAULT 0, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +-- brands +CREATE TABLE IF NOT EXISTS public.brands ( + id SERIAL PRIMARY KEY, + store_id integer NOT NULL REFERENCES public.stores(id) ON DELETE CASCADE, + dispensary_id integer REFERENCES public.dispensaries(id), + name character varying(255) NOT NULL, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + first_seen_at timestamp with time zone DEFAULT now(), + last_seen_at timestamp with time zone DEFAULT now(), + UNIQUE(store_id, name) +); + +-- brand_scrape_jobs +CREATE TABLE IF NOT EXISTS public.brand_scrape_jobs ( + id SERIAL PRIMARY KEY, + dispensary_id integer NOT NULL REFERENCES public.dispensaries(id), + brand_slug text NOT NULL, + brand_name text NOT NULL, + status text DEFAULT 'pending' NOT NULL, + worker_id text, + started_at timestamp without time zone, + completed_at timestamp without time zone, + products_found integer DEFAULT 0, + products_saved integer DEFAULT 0, + error_message text, + retry_count integer DEFAULT 0, + created_at timestamp without time zone DEFAULT now(), + updated_at timestamp without time zone DEFAULT now(), + UNIQUE(dispensary_id, brand_slug) +); + +-- brand_history +CREATE TABLE IF NOT EXISTS public.brand_history ( + id SERIAL PRIMARY KEY, + dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE, + brand_name character varying(255) NOT NULL, + event_type character varying(20) NOT NULL, + event_at timestamp with time zone DEFAULT now(), + product_count integer, + metadata jsonb +); + +-- batch_history +CREATE TABLE IF NOT EXISTS public.batch_history ( + id SERIAL PRIMARY KEY, + product_id integer REFERENCES public.products(id) ON DELETE CASCADE, + thc_percentage numeric(5,2), + cbd_percentage numeric(5,2), + terpenes text[], + strain_type character varying(100), + recorded_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +-- price_history +CREATE TABLE IF NOT EXISTS public.price_history ( + id SERIAL PRIMARY KEY, + product_id integer REFERENCES public.products(id) ON DELETE CASCADE, + price numeric(10,2) NOT NULL, + special_price numeric(10,2), + recorded_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +-- product_categories +CREATE TABLE IF NOT EXISTS public.product_categories ( + id SERIAL PRIMARY KEY, + product_id integer NOT NULL REFERENCES public.products(id) ON DELETE CASCADE, + category_slug character varying(255) NOT NULL, + first_seen_at timestamp with time zone DEFAULT now(), + last_seen_at timestamp with time zone DEFAULT now(), + UNIQUE(product_id, category_slug) +); + +-- specials +CREATE TABLE IF NOT EXISTS public.specials ( + id SERIAL PRIMARY KEY, + store_id integer NOT NULL REFERENCES public.stores(id) ON DELETE CASCADE, + product_id integer REFERENCES public.products(id) ON DELETE CASCADE, + title character varying(255) NOT NULL, + description text, + discount_type character varying(50), + discount_value numeric(10,2), + min_purchase numeric(10,2), + valid_date date, + start_time time without time zone, + end_time time without time zone, + days_of_week text[], + is_recurring boolean DEFAULT false, + badge_text character varying(50), + image_url text, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +-- jobs (generic job queue) +CREATE TABLE IF NOT EXISTS public.jobs ( + id SERIAL PRIMARY KEY, + store_id integer REFERENCES public.stores(id) ON DELETE CASCADE, + type character varying(100) NOT NULL, + status character varying(50) DEFAULT 'pending', + payload jsonb, + result jsonb, + error_message text, + started_at timestamp without time zone, + completed_at timestamp without time zone, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +-- wp_dutchie_api_permissions (WordPress API permissions) +CREATE TABLE IF NOT EXISTS public.wp_dutchie_api_permissions ( + id SERIAL PRIMARY KEY, + user_name character varying(255) NOT NULL, + api_key character varying(255) NOT NULL UNIQUE, + allowed_ips text, + allowed_domains text, + is_active smallint DEFAULT 1, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + last_used_at timestamp without time zone, + store_id integer REFERENCES public.stores(id), + store_name character varying(255), + dispensary_id integer REFERENCES public.dispensaries(id) +); + +-- Create indexes (only if they don't exist) +CREATE INDEX IF NOT EXISTS idx_api_token_usage_created_at ON public.api_token_usage(created_at); +CREATE INDEX IF NOT EXISTS idx_api_token_usage_endpoint ON public.api_token_usage(endpoint); +CREATE INDEX IF NOT EXISTS idx_api_token_usage_token_id ON public.api_token_usage(token_id); +CREATE INDEX IF NOT EXISTS idx_api_tokens_active ON public.api_tokens(active); +CREATE INDEX IF NOT EXISTS idx_api_tokens_token ON public.api_tokens(token); +CREATE INDEX IF NOT EXISTS idx_batch_history_product ON public.batch_history(product_id, recorded_at DESC); +CREATE INDEX IF NOT EXISTS idx_batch_history_recorded ON public.batch_history(recorded_at DESC); +CREATE INDEX IF NOT EXISTS idx_brand_history_brand ON public.brand_history(brand_name, event_at DESC); +CREATE INDEX IF NOT EXISTS idx_brand_history_dispensary ON public.brand_history(dispensary_id, event_at DESC); +CREATE INDEX IF NOT EXISTS idx_brand_history_event ON public.brand_history(event_type, event_at DESC); +CREATE INDEX IF NOT EXISTS idx_brand_jobs_dispensary ON public.brand_scrape_jobs(dispensary_id); +CREATE INDEX IF NOT EXISTS idx_brand_jobs_status ON public.brand_scrape_jobs(status); +CREATE INDEX IF NOT EXISTS idx_brands_dispensary ON public.brands(dispensary_id); +CREATE INDEX IF NOT EXISTS idx_brands_last_seen ON public.brands(last_seen_at DESC); +CREATE INDEX IF NOT EXISTS idx_brands_store_id ON public.brands(store_id); +CREATE INDEX IF NOT EXISTS idx_categories_dispensary_id ON public.categories(dispensary_id); +CREATE INDEX IF NOT EXISTS idx_categories_parent_id ON public.categories(parent_id); +CREATE INDEX IF NOT EXISTS idx_categories_path ON public.categories(path); +CREATE INDEX IF NOT EXISTS idx_clicks_campaign_id ON public.clicks(campaign_id); +CREATE INDEX IF NOT EXISTS idx_clicks_clicked_at ON public.clicks(clicked_at); +CREATE INDEX IF NOT EXISTS idx_clicks_product_id ON public.clicks(product_id); +CREATE INDEX IF NOT EXISTS idx_crawl_jobs_status ON public.crawl_jobs(status); +CREATE INDEX IF NOT EXISTS idx_dispensaries_azdhs_id ON public.dispensaries(azdhs_id); +CREATE INDEX IF NOT EXISTS idx_dispensaries_city ON public.dispensaries(city); +CREATE INDEX IF NOT EXISTS idx_dispensaries_crawl_status ON public.dispensaries(crawl_status); +CREATE INDEX IF NOT EXISTS idx_dispensaries_crawler_mode ON public.dispensaries(crawler_mode); +CREATE INDEX IF NOT EXISTS idx_dispensaries_crawler_status ON public.dispensaries(crawler_status); +CREATE INDEX IF NOT EXISTS idx_dispensaries_provider ON public.dispensaries(menu_provider); +CREATE INDEX IF NOT EXISTS idx_dispensaries_provider_confidence ON public.dispensaries(menu_provider_confidence); +CREATE INDEX IF NOT EXISTS idx_dispensaries_slug ON public.dispensaries(slug); +CREATE INDEX IF NOT EXISTS idx_dispensaries_state ON public.dispensaries(state); +CREATE INDEX IF NOT EXISTS idx_dispensary_changes_created_at ON public.dispensary_changes(created_at DESC); +CREATE INDEX IF NOT EXISTS idx_dispensary_changes_dispensary_status ON public.dispensary_changes(dispensary_id, status); +CREATE INDEX IF NOT EXISTS idx_dispensary_changes_status ON public.dispensary_changes(status); +CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_dispensary ON public.dispensary_crawl_jobs(dispensary_id, created_at DESC); +CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_recent ON public.dispensary_crawl_jobs(created_at DESC); +CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_status ON public.dispensary_crawl_jobs(status); +CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_schedule_active ON public.dispensary_crawl_schedule(is_active); +CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_schedule_status ON public.dispensary_crawl_schedule(last_status); +CREATE INDEX IF NOT EXISTS idx_jobs_status ON public.jobs(status); +CREATE INDEX IF NOT EXISTS idx_jobs_store_id ON public.jobs(store_id); +CREATE INDEX IF NOT EXISTS idx_jobs_type ON public.jobs(type); +CREATE INDEX IF NOT EXISTS idx_price_history_product ON public.price_history(product_id, recorded_at DESC); +CREATE INDEX IF NOT EXISTS idx_price_history_recorded ON public.price_history(recorded_at DESC); +CREATE INDEX IF NOT EXISTS idx_product_categories_product ON public.product_categories(product_id); +CREATE INDEX IF NOT EXISTS idx_products_availability_status ON public.products(availability_status); +CREATE INDEX IF NOT EXISTS idx_products_brand_external ON public.products(brand_external_id); +CREATE INDEX IF NOT EXISTS idx_products_dispensary_id ON public.products(dispensary_id); +CREATE INDEX IF NOT EXISTS idx_products_enterprise ON public.products(enterprise_product_id); +CREATE INDEX IF NOT EXISTS idx_products_is_special ON public.products(is_on_special); +CREATE INDEX IF NOT EXISTS idx_products_sku ON public.products(sku); +CREATE INDEX IF NOT EXISTS idx_products_status ON public.products(status); +CREATE INDEX IF NOT EXISTS idx_products_stock_status ON public.products(stock_status); +CREATE INDEX IF NOT EXISTS idx_products_subcategory ON public.products(subcategory); +CREATE INDEX IF NOT EXISTS idx_proxies_location ON public.proxies(country_code, state, city); +CREATE INDEX IF NOT EXISTS idx_proxy_test_jobs_created_at ON public.proxy_test_jobs(created_at DESC); +CREATE INDEX IF NOT EXISTS idx_proxy_test_jobs_status ON public.proxy_test_jobs(status); +CREATE INDEX IF NOT EXISTS idx_sandbox_category ON public.crawler_sandboxes(category); +CREATE INDEX IF NOT EXISTS idx_sandbox_dispensary ON public.crawler_sandboxes(dispensary_id); +CREATE INDEX IF NOT EXISTS idx_sandbox_job_category ON public.sandbox_crawl_jobs(category); +CREATE INDEX IF NOT EXISTS idx_sandbox_job_dispensary ON public.sandbox_crawl_jobs(dispensary_id); +CREATE INDEX IF NOT EXISTS idx_sandbox_job_status ON public.sandbox_crawl_jobs(status); +CREATE INDEX IF NOT EXISTS idx_sandbox_mode ON public.crawler_sandboxes(mode); +CREATE INDEX IF NOT EXISTS idx_sandbox_status ON public.crawler_sandboxes(status); +CREATE INDEX IF NOT EXISTS idx_sandbox_suspected_provider ON public.crawler_sandboxes(suspected_menu_provider); +CREATE INDEX IF NOT EXISTS idx_sandbox_template ON public.crawler_sandboxes(template_name); +CREATE INDEX IF NOT EXISTS idx_specials_product_id ON public.specials(product_id); +CREATE INDEX IF NOT EXISTS idx_stores_dispensary_id ON public.stores(dispensary_id); +CREATE INDEX IF NOT EXISTS idx_template_active ON public.crawler_templates(is_active); +CREATE INDEX IF NOT EXISTS idx_template_provider ON public.crawler_templates(provider); +CREATE INDEX IF NOT EXISTS idx_wp_api_permissions_store_id ON public.wp_dutchie_api_permissions(store_id); + +-- Create triggers (drop first if exists to avoid errors) +DROP TRIGGER IF EXISTS api_tokens_updated_at ON public.api_tokens; +CREATE TRIGGER api_tokens_updated_at BEFORE UPDATE ON public.api_tokens FOR EACH ROW EXECUTE FUNCTION public.update_api_token_updated_at(); + +DROP TRIGGER IF EXISTS trigger_crawl_jobs_updated_at ON public.crawl_jobs; +CREATE TRIGGER trigger_crawl_jobs_updated_at BEFORE UPDATE ON public.crawl_jobs FOR EACH ROW EXECUTE FUNCTION public.update_schedule_updated_at(); + +DROP TRIGGER IF EXISTS trigger_crawler_schedule_updated_at ON public.crawler_schedule; +CREATE TRIGGER trigger_crawler_schedule_updated_at BEFORE UPDATE ON public.crawler_schedule FOR EACH ROW EXECUTE FUNCTION public.update_schedule_updated_at(); + +DROP TRIGGER IF EXISTS trigger_sandbox_job_updated_at ON public.sandbox_crawl_jobs; +CREATE TRIGGER trigger_sandbox_job_updated_at BEFORE UPDATE ON public.sandbox_crawl_jobs FOR EACH ROW EXECUTE FUNCTION public.update_sandbox_timestamp(); + +DROP TRIGGER IF EXISTS trigger_sandbox_updated_at ON public.crawler_sandboxes; +CREATE TRIGGER trigger_sandbox_updated_at BEFORE UPDATE ON public.crawler_sandboxes FOR EACH ROW EXECUTE FUNCTION public.update_sandbox_timestamp(); + +DROP TRIGGER IF EXISTS trigger_set_requires_recrawl ON public.dispensary_changes; +CREATE TRIGGER trigger_set_requires_recrawl BEFORE INSERT ON public.dispensary_changes FOR EACH ROW EXECUTE FUNCTION public.set_requires_recrawl(); + +DROP TRIGGER IF EXISTS trigger_store_crawl_schedule_updated_at ON public.store_crawl_schedule; +CREATE TRIGGER trigger_store_crawl_schedule_updated_at BEFORE UPDATE ON public.store_crawl_schedule FOR EACH ROW EXECUTE FUNCTION public.update_schedule_updated_at(); + +DROP TRIGGER IF EXISTS trigger_template_updated_at ON public.crawler_templates; +CREATE TRIGGER trigger_template_updated_at BEFORE UPDATE ON public.crawler_templates FOR EACH ROW EXECUTE FUNCTION public.update_sandbox_timestamp(); + +DROP TRIGGER IF EXISTS trigger_update_brand_scrape_jobs_timestamp ON public.brand_scrape_jobs; +CREATE TRIGGER trigger_update_brand_scrape_jobs_timestamp BEFORE UPDATE ON public.brand_scrape_jobs FOR EACH ROW EXECUTE FUNCTION public.update_brand_scrape_jobs_updated_at(); + +-- ============================================ +-- Scheduler tables (job_schedules, job_run_logs) +-- ============================================ + +-- Function for updating job_schedules.updated_at +CREATE OR REPLACE FUNCTION public.update_job_schedule_updated_at() RETURNS trigger + LANGUAGE plpgsql + AS $$ +BEGIN + NEW.updated_at = NOW(); + RETURN NEW; +END; +$$; + +-- job_schedules - tracks scheduled job definitions +CREATE TABLE IF NOT EXISTS public.job_schedules ( + id SERIAL PRIMARY KEY, + job_name character varying(100) NOT NULL UNIQUE, + description text, + enabled boolean DEFAULT true, + base_interval_minutes integer DEFAULT 240, + jitter_minutes integer DEFAULT 30, + last_run_at timestamp without time zone, + last_status character varying(20), + last_error_message text, + last_duration_ms integer, + next_run_at timestamp without time zone, + job_config jsonb DEFAULT '{}'::jsonb, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +CREATE INDEX IF NOT EXISTS idx_job_schedules_enabled ON public.job_schedules(enabled); +CREATE INDEX IF NOT EXISTS idx_job_schedules_next_run ON public.job_schedules(next_run_at) WHERE enabled = true; + +DROP TRIGGER IF EXISTS trigger_job_schedule_updated_at ON public.job_schedules; +CREATE TRIGGER trigger_job_schedule_updated_at BEFORE UPDATE ON public.job_schedules FOR EACH ROW EXECUTE FUNCTION public.update_job_schedule_updated_at(); + +-- job_run_logs - tracks individual job execution history +CREATE TABLE IF NOT EXISTS public.job_run_logs ( + id SERIAL PRIMARY KEY, + schedule_id integer REFERENCES public.job_schedules(id) ON DELETE SET NULL, + job_name character varying(100) NOT NULL, + status character varying(20) NOT NULL, + started_at timestamp without time zone NOT NULL, + completed_at timestamp without time zone, + duration_ms integer, + error_message text, + items_processed integer DEFAULT 0, + items_succeeded integer DEFAULT 0, + items_failed integer DEFAULT 0, + metadata jsonb DEFAULT '{}'::jsonb, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP +); + +CREATE INDEX IF NOT EXISTS idx_job_run_logs_schedule ON public.job_run_logs(schedule_id); +CREATE INDEX IF NOT EXISTS idx_job_run_logs_job_name ON public.job_run_logs(job_name); +CREATE INDEX IF NOT EXISTS idx_job_run_logs_started_at ON public.job_run_logs(started_at DESC); +CREATE INDEX IF NOT EXISTS idx_job_run_logs_status ON public.job_run_logs(status); + +-- Insert default admin user if not exists +INSERT INTO public.users (email, password_hash, role) +SELECT 'admin@example.com', '$2b$10$K8/KqJyNqJKqJyNqJKqJyOqJKqJyNqJKqJyNqJKqJyNqJKqJyNqJK', 'admin' +WHERE NOT EXISTS (SELECT 1 FROM public.users WHERE email = 'admin@example.com'); + +-- Done! +SELECT 'Migration 031 completed successfully' as status;