-- Migration 031: Consolidate Schema for crawlsy DB -- This migration creates all tables from the legacy schema for the consolidated database -- Run with: psql $CRAWLSY_DATABASE_URL -f migrations/031_consolidate_schema.sql -- Functions (needed for triggers) CREATE OR REPLACE FUNCTION public.set_requires_recrawl() RETURNS trigger LANGUAGE plpgsql AS $$ BEGIN IF NEW.field_name IN ('website', 'menu_url') THEN NEW.requires_recrawl := TRUE; END IF; RETURN NEW; END; $$; CREATE OR REPLACE FUNCTION public.update_api_token_updated_at() RETURNS trigger LANGUAGE plpgsql AS $$ BEGIN NEW.updated_at = CURRENT_TIMESTAMP; RETURN NEW; END; $$; CREATE OR REPLACE FUNCTION public.update_brand_scrape_jobs_updated_at() RETURNS trigger LANGUAGE plpgsql AS $$ BEGIN NEW.updated_at = NOW(); RETURN NEW; END; $$; CREATE OR REPLACE FUNCTION public.update_sandbox_timestamp() RETURNS trigger LANGUAGE plpgsql AS $$ BEGIN NEW.updated_at = NOW(); RETURN NEW; END; $$; CREATE OR REPLACE FUNCTION public.update_schedule_updated_at() RETURNS trigger LANGUAGE plpgsql AS $$ BEGIN NEW.updated_at = NOW(); RETURN NEW; END; $$; -- Core tables (in dependency order) -- users CREATE TABLE IF NOT EXISTS public.users ( id SERIAL PRIMARY KEY, email character varying(255) NOT NULL UNIQUE, password_hash character varying(255) NOT NULL, role character varying(50) DEFAULT 'user', created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); -- settings CREATE TABLE IF NOT EXISTS public.settings ( key character varying(255) PRIMARY KEY, value text, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); -- azdhs_list CREATE TABLE IF NOT EXISTS public.azdhs_list ( id SERIAL PRIMARY KEY, name character varying(255) NOT NULL, company_name character varying(255), slug character varying(255), address character varying(500), city character varying(100), state character varying(2) DEFAULT 'AZ', zip character varying(10), phone character varying(20), email character varying(255), status_line text, azdhs_url text, latitude numeric(10,8), longitude numeric(11,8), created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, website text, dba_name character varying(255), google_rating numeric(2,1), google_review_count integer ); -- dispensaries CREATE TABLE IF NOT EXISTS public.dispensaries ( id SERIAL PRIMARY KEY, azdhs_id integer UNIQUE REFERENCES public.azdhs_list(id), name character varying(255) NOT NULL, company_name character varying(255), dba_name character varying(255), slug character varying(255) UNIQUE, address character varying(500), city character varying(100), state character varying(2) DEFAULT 'AZ', zip character varying(10), phone character varying(20), email character varying(255), website text, menu_url text, latitude numeric(10,8), longitude numeric(11,8), hours_of_operation jsonb, social_media jsonb, license_number character varying(100), license_type character varying(50), license_status character varying(50), license_expiry date, google_place_id character varying(255), google_rating numeric(2,1), google_review_count integer, yelp_id character varying(255), yelp_rating numeric(2,1), yelp_review_count integer, last_enriched_at timestamp without time zone, menu_provider character varying(100), menu_provider_confidence integer, provider_type character varying(100), provider_detected_at timestamp without time zone, detection_method character varying(100), detection_metadata jsonb, menu_scrape_status character varying(50), menu_scrape_error text, menu_last_scraped_at timestamp without time zone, menu_product_count integer DEFAULT 0, crawl_status character varying(50) DEFAULT 'idle', last_crawl_at timestamp without time zone, next_crawl_at timestamp without time zone, crawl_frequency_hours integer DEFAULT 24, consecutive_failures integer DEFAULT 0, scrape_enabled boolean DEFAULT true, scrape_priority integer DEFAULT 0, crawler_mode character varying(50) DEFAULT 'sandbox', crawler_status character varying(50) DEFAULT 'idle', product_crawler_mode character varying(50) DEFAULT 'sandbox', product_provider character varying(100), specials_crawler_mode character varying(50) DEFAULT 'disabled', specials_provider character varying(100), brand_crawler_mode character varying(50) DEFAULT 'disabled', brand_provider character varying(100), metadata_crawler_mode character varying(50) DEFAULT 'disabled', metadata_provider character varying(100), notes text, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, platform_dispensary_id character varying(100), dutchie_url text, dutchie_detected_at timestamp without time zone, dutchie_retailer_id character varying(100), external_id character varying(255) ); -- stores CREATE TABLE IF NOT EXISTS public.stores ( id SERIAL PRIMARY KEY, name character varying(255) NOT NULL, slug character varying(255) UNIQUE, menu_url text, website_url text, image_url text, description text, dutchie_plus boolean DEFAULT false, city character varying(100), state character varying(50), address text, phone character varying(20), hours jsonb, logo_url text, logo_public_id character varying(255), logo_width integer, logo_height integer, scrape_enabled boolean DEFAULT true, last_scraped_at timestamp without time zone, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, status character varying(50) DEFAULT 'active', dispensary_id integer REFERENCES public.dispensaries(id) ON DELETE SET NULL ); -- api_tokens CREATE TABLE IF NOT EXISTS public.api_tokens ( id SERIAL PRIMARY KEY, name character varying(255) NOT NULL, token character varying(255) NOT NULL UNIQUE, description text, user_id integer REFERENCES public.users(id) ON DELETE CASCADE, active boolean DEFAULT true, rate_limit integer DEFAULT 100, allowed_endpoints text[], expires_at timestamp without time zone, last_used_at timestamp without time zone, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); -- api_token_usage CREATE TABLE IF NOT EXISTS public.api_token_usage ( id SERIAL PRIMARY KEY, token_id integer REFERENCES public.api_tokens(id) ON DELETE CASCADE, endpoint character varying(255) NOT NULL, method character varying(10) NOT NULL, status_code integer, response_time_ms integer, request_size integer, response_size integer, ip_address inet, user_agent text, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); -- categories CREATE TABLE IF NOT EXISTS public.categories ( id SERIAL PRIMARY KEY, store_id integer REFERENCES public.stores(id) ON DELETE CASCADE, dispensary_id integer REFERENCES public.dispensaries(id) ON DELETE CASCADE, name character varying(255) NOT NULL, slug character varying(255) NOT NULL, dutchie_url text NOT NULL, scrape_enabled boolean DEFAULT true, last_scraped_at timestamp without time zone, product_count integer DEFAULT 0, parent_id integer REFERENCES public.categories(id) ON DELETE CASCADE, path character varying(500), created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, UNIQUE(store_id, slug) ); -- products CREATE TABLE IF NOT EXISTS public.products ( id SERIAL PRIMARY KEY, store_id integer REFERENCES public.stores(id) ON DELETE CASCADE, dispensary_id integer REFERENCES public.dispensaries(id) ON DELETE CASCADE, category_id integer REFERENCES public.categories(id) ON DELETE SET NULL, name character varying(255) NOT NULL, slug character varying(255) NOT NULL, brand character varying(255), brand_slug character varying(255), brand_external_id character varying(100), enterprise_product_id character varying(100), category character varying(100), subcategory character varying(100), strain_type character varying(50), description text, effects text[], price numeric(10,2), special_price numeric(10,2), is_on_special boolean DEFAULT false, weight character varying(50), unit character varying(50), thc_percentage numeric(5,2), cbd_percentage numeric(5,2), terpenes text[], image_url text, image_public_id character varying(255), image_width integer, image_height integer, dutchie_id character varying(100), dutchie_url text, sku character varying(100), in_stock boolean DEFAULT true, stock_status character varying(50) DEFAULT 'in_stock', stock_quantity integer, availability_status character varying(50) DEFAULT 'available', status character varying(50) DEFAULT 'active', created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, UNIQUE(store_id, slug) ); -- dutchie_products (AZ pipeline products) CREATE TABLE IF NOT EXISTS public.dutchie_products ( id SERIAL PRIMARY KEY, dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE, external_product_id character varying(100) NOT NULL, name character varying(500) NOT NULL, brand_name character varying(255), category character varying(100), subcategory character varying(100), strain_type character varying(50), description text, effects text[], thc_content character varying(50), cbd_content character varying(50), primary_image_url text, additional_images text[], stock_status character varying(50) DEFAULT 'in_stock', first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, UNIQUE(dispensary_id, external_product_id) ); -- dutchie_product_snapshots CREATE TABLE IF NOT EXISTS public.dutchie_product_snapshots ( id SERIAL PRIMARY KEY, dutchie_product_id integer NOT NULL REFERENCES public.dutchie_products(id) ON DELETE CASCADE, dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE, options jsonb, raw_product_data jsonb, crawled_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); -- campaigns CREATE TABLE IF NOT EXISTS public.campaigns ( id SERIAL PRIMARY KEY, name character varying(255) NOT NULL, slug character varying(255) NOT NULL UNIQUE, description text, display_style character varying(50) DEFAULT 'grid', active boolean DEFAULT true, start_date timestamp without time zone, end_date timestamp without time zone, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); -- campaign_products CREATE TABLE IF NOT EXISTS public.campaign_products ( id SERIAL PRIMARY KEY, campaign_id integer REFERENCES public.campaigns(id) ON DELETE CASCADE, product_id integer REFERENCES public.products(id) ON DELETE CASCADE, display_order integer DEFAULT 0, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, UNIQUE(campaign_id, product_id) ); -- clicks CREATE TABLE IF NOT EXISTS public.clicks ( id SERIAL PRIMARY KEY, product_id integer REFERENCES public.products(id) ON DELETE CASCADE, campaign_id integer REFERENCES public.campaigns(id) ON DELETE SET NULL, ip_address character varying(45), user_agent text, referer text, clicked_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); -- proxies CREATE TABLE IF NOT EXISTS public.proxies ( id SERIAL PRIMARY KEY, host character varying(255) NOT NULL, port integer NOT NULL, protocol character varying(10) DEFAULT 'http', username character varying(255), password character varying(255), active boolean DEFAULT true, is_anonymous boolean DEFAULT false, last_tested_at timestamp without time zone, last_test_result boolean, response_time_ms integer, fail_count integer DEFAULT 0, success_count integer DEFAULT 0, country_code character varying(2), state character varying(50), city character varying(100), created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, UNIQUE(host, port, protocol) ); -- failed_proxies CREATE TABLE IF NOT EXISTS public.failed_proxies ( id SERIAL PRIMARY KEY, host character varying(255) NOT NULL, port integer NOT NULL, protocol character varying(10) DEFAULT 'http', failure_reason text, failed_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, UNIQUE(host, port, protocol) ); -- proxy_test_jobs CREATE TABLE IF NOT EXISTS public.proxy_test_jobs ( id SERIAL PRIMARY KEY, status character varying(50) DEFAULT 'pending', total_proxies integer DEFAULT 0, tested_proxies integer DEFAULT 0, successful_proxies integer DEFAULT 0, failed_proxies integer DEFAULT 0, started_at timestamp without time zone, completed_at timestamp without time zone, error_message text, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); -- crawl_jobs CREATE TABLE IF NOT EXISTS public.crawl_jobs ( id SERIAL PRIMARY KEY, store_id integer REFERENCES public.stores(id) ON DELETE CASCADE, status character varying(50) DEFAULT 'pending', job_type character varying(50) DEFAULT 'full', category_slug character varying(255), priority integer DEFAULT 0, scheduled_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, started_at timestamp without time zone, completed_at timestamp without time zone, products_found integer DEFAULT 0, products_updated integer DEFAULT 0, products_created integer DEFAULT 0, error_message text, retry_count integer DEFAULT 0, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); -- crawler_schedule CREATE TABLE IF NOT EXISTS public.crawler_schedule ( id SERIAL PRIMARY KEY, schedule_type character varying(100) NOT NULL UNIQUE, cron_expression character varying(100) NOT NULL, is_active boolean DEFAULT true, last_run_at timestamp without time zone, next_run_at timestamp without time zone, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); -- store_crawl_schedule CREATE TABLE IF NOT EXISTS public.store_crawl_schedule ( id SERIAL PRIMARY KEY, store_id integer NOT NULL REFERENCES public.stores(id) ON DELETE CASCADE UNIQUE, cron_expression character varying(100) NOT NULL, is_active boolean DEFAULT true, priority integer DEFAULT 0, last_run_at timestamp without time zone, next_run_at timestamp without time zone, last_status character varying(50), last_error text, consecutive_failures integer DEFAULT 0, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); -- dispensary_crawl_schedule CREATE TABLE IF NOT EXISTS public.dispensary_crawl_schedule ( id SERIAL PRIMARY KEY, dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE UNIQUE, cron_expression character varying(100) NOT NULL, is_active boolean DEFAULT true, priority integer DEFAULT 0, last_run_at timestamp without time zone, next_run_at timestamp without time zone, last_status character varying(50), last_error text, consecutive_failures integer DEFAULT 0, metadata jsonb, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); -- dispensary_crawl_jobs CREATE TABLE IF NOT EXISTS public.dispensary_crawl_jobs ( id SERIAL PRIMARY KEY, dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE, schedule_id integer REFERENCES public.dispensary_crawl_schedule(id) ON DELETE SET NULL, status character varying(50) DEFAULT 'pending', job_type character varying(50) DEFAULT 'full', priority integer DEFAULT 0, scheduled_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, started_at timestamp without time zone, completed_at timestamp without time zone, products_found integer DEFAULT 0, products_updated integer DEFAULT 0, products_created integer DEFAULT 0, brands_found integer DEFAULT 0, error_message text, metadata jsonb, retry_count integer DEFAULT 0, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); -- dispensary_changes CREATE TABLE IF NOT EXISTS public.dispensary_changes ( id SERIAL PRIMARY KEY, dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE, field_name character varying(100) NOT NULL, old_value text, new_value text, status character varying(50) DEFAULT 'pending', source character varying(100), notes text, requires_recrawl boolean DEFAULT false, reviewed_by integer REFERENCES public.users(id), reviewed_at timestamp without time zone, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); -- crawler_templates CREATE TABLE IF NOT EXISTS public.crawler_templates ( id SERIAL PRIMARY KEY, provider character varying(100) NOT NULL, name character varying(255) NOT NULL, version integer DEFAULT 1, description text, config jsonb NOT NULL, is_active boolean DEFAULT true, is_default_for_provider boolean DEFAULT false, success_count integer DEFAULT 0, failure_count integer DEFAULT 0, last_used_at timestamp without time zone, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, UNIQUE(provider, name, version) ); -- crawler_sandboxes CREATE TABLE IF NOT EXISTS public.crawler_sandboxes ( id SERIAL PRIMARY KEY, dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE, category character varying(100) NOT NULL, mode character varying(50) DEFAULT 'testing', status character varying(50) DEFAULT 'pending', suspected_menu_provider character varying(100), template_name character varying(255), config_overrides jsonb, last_test_at timestamp without time zone, last_test_result text, test_count integer DEFAULT 0, success_count integer DEFAULT 0, notes text, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); -- sandbox_crawl_jobs CREATE TABLE IF NOT EXISTS public.sandbox_crawl_jobs ( id SERIAL PRIMARY KEY, sandbox_id integer REFERENCES public.crawler_sandboxes(id) ON DELETE SET NULL, dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE, category character varying(100) NOT NULL, status character varying(50) DEFAULT 'pending', priority integer DEFAULT 0, scheduled_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, started_at timestamp without time zone, completed_at timestamp without time zone, items_found integer DEFAULT 0, items_saved integer DEFAULT 0, error_message text, result_data jsonb, retry_count integer DEFAULT 0, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); -- brands CREATE TABLE IF NOT EXISTS public.brands ( id SERIAL PRIMARY KEY, store_id integer NOT NULL REFERENCES public.stores(id) ON DELETE CASCADE, dispensary_id integer REFERENCES public.dispensaries(id), name character varying(255) NOT NULL, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, first_seen_at timestamp with time zone DEFAULT now(), last_seen_at timestamp with time zone DEFAULT now(), UNIQUE(store_id, name) ); -- brand_scrape_jobs CREATE TABLE IF NOT EXISTS public.brand_scrape_jobs ( id SERIAL PRIMARY KEY, dispensary_id integer NOT NULL REFERENCES public.dispensaries(id), brand_slug text NOT NULL, brand_name text NOT NULL, status text DEFAULT 'pending' NOT NULL, worker_id text, started_at timestamp without time zone, completed_at timestamp without time zone, products_found integer DEFAULT 0, products_saved integer DEFAULT 0, error_message text, retry_count integer DEFAULT 0, created_at timestamp without time zone DEFAULT now(), updated_at timestamp without time zone DEFAULT now(), UNIQUE(dispensary_id, brand_slug) ); -- brand_history CREATE TABLE IF NOT EXISTS public.brand_history ( id SERIAL PRIMARY KEY, dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE, brand_name character varying(255) NOT NULL, event_type character varying(20) NOT NULL, event_at timestamp with time zone DEFAULT now(), product_count integer, metadata jsonb ); -- batch_history CREATE TABLE IF NOT EXISTS public.batch_history ( id SERIAL PRIMARY KEY, product_id integer REFERENCES public.products(id) ON DELETE CASCADE, thc_percentage numeric(5,2), cbd_percentage numeric(5,2), terpenes text[], strain_type character varying(100), recorded_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); -- price_history CREATE TABLE IF NOT EXISTS public.price_history ( id SERIAL PRIMARY KEY, product_id integer REFERENCES public.products(id) ON DELETE CASCADE, price numeric(10,2) NOT NULL, special_price numeric(10,2), recorded_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); -- product_categories CREATE TABLE IF NOT EXISTS public.product_categories ( id SERIAL PRIMARY KEY, product_id integer NOT NULL REFERENCES public.products(id) ON DELETE CASCADE, category_slug character varying(255) NOT NULL, first_seen_at timestamp with time zone DEFAULT now(), last_seen_at timestamp with time zone DEFAULT now(), UNIQUE(product_id, category_slug) ); -- specials CREATE TABLE IF NOT EXISTS public.specials ( id SERIAL PRIMARY KEY, store_id integer NOT NULL REFERENCES public.stores(id) ON DELETE CASCADE, product_id integer REFERENCES public.products(id) ON DELETE CASCADE, title character varying(255) NOT NULL, description text, discount_type character varying(50), discount_value numeric(10,2), min_purchase numeric(10,2), valid_date date, start_time time without time zone, end_time time without time zone, days_of_week text[], is_recurring boolean DEFAULT false, badge_text character varying(50), image_url text, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); -- jobs (generic job queue) CREATE TABLE IF NOT EXISTS public.jobs ( id SERIAL PRIMARY KEY, store_id integer REFERENCES public.stores(id) ON DELETE CASCADE, type character varying(100) NOT NULL, status character varying(50) DEFAULT 'pending', payload jsonb, result jsonb, error_message text, started_at timestamp without time zone, completed_at timestamp without time zone, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); -- wp_dutchie_api_permissions (WordPress API permissions) CREATE TABLE IF NOT EXISTS public.wp_dutchie_api_permissions ( id SERIAL PRIMARY KEY, user_name character varying(255) NOT NULL, api_key character varying(255) NOT NULL UNIQUE, allowed_ips text, allowed_domains text, is_active smallint DEFAULT 1, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, last_used_at timestamp without time zone, store_id integer REFERENCES public.stores(id), store_name character varying(255), dispensary_id integer REFERENCES public.dispensaries(id) ); -- Create indexes (only if they don't exist) CREATE INDEX IF NOT EXISTS idx_api_token_usage_created_at ON public.api_token_usage(created_at); CREATE INDEX IF NOT EXISTS idx_api_token_usage_endpoint ON public.api_token_usage(endpoint); CREATE INDEX IF NOT EXISTS idx_api_token_usage_token_id ON public.api_token_usage(token_id); CREATE INDEX IF NOT EXISTS idx_api_tokens_active ON public.api_tokens(active); CREATE INDEX IF NOT EXISTS idx_api_tokens_token ON public.api_tokens(token); CREATE INDEX IF NOT EXISTS idx_batch_history_product ON public.batch_history(product_id, recorded_at DESC); CREATE INDEX IF NOT EXISTS idx_batch_history_recorded ON public.batch_history(recorded_at DESC); CREATE INDEX IF NOT EXISTS idx_brand_history_brand ON public.brand_history(brand_name, event_at DESC); CREATE INDEX IF NOT EXISTS idx_brand_history_dispensary ON public.brand_history(dispensary_id, event_at DESC); CREATE INDEX IF NOT EXISTS idx_brand_history_event ON public.brand_history(event_type, event_at DESC); CREATE INDEX IF NOT EXISTS idx_brand_jobs_dispensary ON public.brand_scrape_jobs(dispensary_id); CREATE INDEX IF NOT EXISTS idx_brand_jobs_status ON public.brand_scrape_jobs(status); CREATE INDEX IF NOT EXISTS idx_brands_dispensary ON public.brands(dispensary_id); CREATE INDEX IF NOT EXISTS idx_brands_last_seen ON public.brands(last_seen_at DESC); CREATE INDEX IF NOT EXISTS idx_brands_store_id ON public.brands(store_id); CREATE INDEX IF NOT EXISTS idx_categories_dispensary_id ON public.categories(dispensary_id); CREATE INDEX IF NOT EXISTS idx_categories_parent_id ON public.categories(parent_id); CREATE INDEX IF NOT EXISTS idx_categories_path ON public.categories(path); CREATE INDEX IF NOT EXISTS idx_clicks_campaign_id ON public.clicks(campaign_id); CREATE INDEX IF NOT EXISTS idx_clicks_clicked_at ON public.clicks(clicked_at); CREATE INDEX IF NOT EXISTS idx_clicks_product_id ON public.clicks(product_id); CREATE INDEX IF NOT EXISTS idx_crawl_jobs_status ON public.crawl_jobs(status); CREATE INDEX IF NOT EXISTS idx_dispensaries_azdhs_id ON public.dispensaries(azdhs_id); CREATE INDEX IF NOT EXISTS idx_dispensaries_city ON public.dispensaries(city); CREATE INDEX IF NOT EXISTS idx_dispensaries_crawl_status ON public.dispensaries(crawl_status); CREATE INDEX IF NOT EXISTS idx_dispensaries_crawler_mode ON public.dispensaries(crawler_mode); CREATE INDEX IF NOT EXISTS idx_dispensaries_crawler_status ON public.dispensaries(crawler_status); CREATE INDEX IF NOT EXISTS idx_dispensaries_provider ON public.dispensaries(menu_provider); CREATE INDEX IF NOT EXISTS idx_dispensaries_provider_confidence ON public.dispensaries(menu_provider_confidence); CREATE INDEX IF NOT EXISTS idx_dispensaries_slug ON public.dispensaries(slug); CREATE INDEX IF NOT EXISTS idx_dispensaries_state ON public.dispensaries(state); CREATE INDEX IF NOT EXISTS idx_dispensary_changes_created_at ON public.dispensary_changes(created_at DESC); CREATE INDEX IF NOT EXISTS idx_dispensary_changes_dispensary_status ON public.dispensary_changes(dispensary_id, status); CREATE INDEX IF NOT EXISTS idx_dispensary_changes_status ON public.dispensary_changes(status); CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_dispensary ON public.dispensary_crawl_jobs(dispensary_id, created_at DESC); CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_recent ON public.dispensary_crawl_jobs(created_at DESC); CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_status ON public.dispensary_crawl_jobs(status); CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_schedule_active ON public.dispensary_crawl_schedule(is_active); CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_schedule_status ON public.dispensary_crawl_schedule(last_status); CREATE INDEX IF NOT EXISTS idx_jobs_status ON public.jobs(status); CREATE INDEX IF NOT EXISTS idx_jobs_store_id ON public.jobs(store_id); CREATE INDEX IF NOT EXISTS idx_jobs_type ON public.jobs(type); CREATE INDEX IF NOT EXISTS idx_price_history_product ON public.price_history(product_id, recorded_at DESC); CREATE INDEX IF NOT EXISTS idx_price_history_recorded ON public.price_history(recorded_at DESC); CREATE INDEX IF NOT EXISTS idx_product_categories_product ON public.product_categories(product_id); CREATE INDEX IF NOT EXISTS idx_products_availability_status ON public.products(availability_status); CREATE INDEX IF NOT EXISTS idx_products_brand_external ON public.products(brand_external_id); CREATE INDEX IF NOT EXISTS idx_products_dispensary_id ON public.products(dispensary_id); CREATE INDEX IF NOT EXISTS idx_products_enterprise ON public.products(enterprise_product_id); CREATE INDEX IF NOT EXISTS idx_products_is_special ON public.products(is_on_special); CREATE INDEX IF NOT EXISTS idx_products_sku ON public.products(sku); CREATE INDEX IF NOT EXISTS idx_products_status ON public.products(status); CREATE INDEX IF NOT EXISTS idx_products_stock_status ON public.products(stock_status); CREATE INDEX IF NOT EXISTS idx_products_subcategory ON public.products(subcategory); CREATE INDEX IF NOT EXISTS idx_proxies_location ON public.proxies(country_code, state, city); CREATE INDEX IF NOT EXISTS idx_proxy_test_jobs_created_at ON public.proxy_test_jobs(created_at DESC); CREATE INDEX IF NOT EXISTS idx_proxy_test_jobs_status ON public.proxy_test_jobs(status); CREATE INDEX IF NOT EXISTS idx_sandbox_category ON public.crawler_sandboxes(category); CREATE INDEX IF NOT EXISTS idx_sandbox_dispensary ON public.crawler_sandboxes(dispensary_id); CREATE INDEX IF NOT EXISTS idx_sandbox_job_category ON public.sandbox_crawl_jobs(category); CREATE INDEX IF NOT EXISTS idx_sandbox_job_dispensary ON public.sandbox_crawl_jobs(dispensary_id); CREATE INDEX IF NOT EXISTS idx_sandbox_job_status ON public.sandbox_crawl_jobs(status); CREATE INDEX IF NOT EXISTS idx_sandbox_mode ON public.crawler_sandboxes(mode); CREATE INDEX IF NOT EXISTS idx_sandbox_status ON public.crawler_sandboxes(status); CREATE INDEX IF NOT EXISTS idx_sandbox_suspected_provider ON public.crawler_sandboxes(suspected_menu_provider); CREATE INDEX IF NOT EXISTS idx_sandbox_template ON public.crawler_sandboxes(template_name); CREATE INDEX IF NOT EXISTS idx_specials_product_id ON public.specials(product_id); CREATE INDEX IF NOT EXISTS idx_stores_dispensary_id ON public.stores(dispensary_id); CREATE INDEX IF NOT EXISTS idx_template_active ON public.crawler_templates(is_active); CREATE INDEX IF NOT EXISTS idx_template_provider ON public.crawler_templates(provider); CREATE INDEX IF NOT EXISTS idx_wp_api_permissions_store_id ON public.wp_dutchie_api_permissions(store_id); -- Create triggers (drop first if exists to avoid errors) DROP TRIGGER IF EXISTS api_tokens_updated_at ON public.api_tokens; CREATE TRIGGER api_tokens_updated_at BEFORE UPDATE ON public.api_tokens FOR EACH ROW EXECUTE FUNCTION public.update_api_token_updated_at(); DROP TRIGGER IF EXISTS trigger_crawl_jobs_updated_at ON public.crawl_jobs; CREATE TRIGGER trigger_crawl_jobs_updated_at BEFORE UPDATE ON public.crawl_jobs FOR EACH ROW EXECUTE FUNCTION public.update_schedule_updated_at(); DROP TRIGGER IF EXISTS trigger_crawler_schedule_updated_at ON public.crawler_schedule; CREATE TRIGGER trigger_crawler_schedule_updated_at BEFORE UPDATE ON public.crawler_schedule FOR EACH ROW EXECUTE FUNCTION public.update_schedule_updated_at(); DROP TRIGGER IF EXISTS trigger_sandbox_job_updated_at ON public.sandbox_crawl_jobs; CREATE TRIGGER trigger_sandbox_job_updated_at BEFORE UPDATE ON public.sandbox_crawl_jobs FOR EACH ROW EXECUTE FUNCTION public.update_sandbox_timestamp(); DROP TRIGGER IF EXISTS trigger_sandbox_updated_at ON public.crawler_sandboxes; CREATE TRIGGER trigger_sandbox_updated_at BEFORE UPDATE ON public.crawler_sandboxes FOR EACH ROW EXECUTE FUNCTION public.update_sandbox_timestamp(); DROP TRIGGER IF EXISTS trigger_set_requires_recrawl ON public.dispensary_changes; CREATE TRIGGER trigger_set_requires_recrawl BEFORE INSERT ON public.dispensary_changes FOR EACH ROW EXECUTE FUNCTION public.set_requires_recrawl(); DROP TRIGGER IF EXISTS trigger_store_crawl_schedule_updated_at ON public.store_crawl_schedule; CREATE TRIGGER trigger_store_crawl_schedule_updated_at BEFORE UPDATE ON public.store_crawl_schedule FOR EACH ROW EXECUTE FUNCTION public.update_schedule_updated_at(); DROP TRIGGER IF EXISTS trigger_template_updated_at ON public.crawler_templates; CREATE TRIGGER trigger_template_updated_at BEFORE UPDATE ON public.crawler_templates FOR EACH ROW EXECUTE FUNCTION public.update_sandbox_timestamp(); DROP TRIGGER IF EXISTS trigger_update_brand_scrape_jobs_timestamp ON public.brand_scrape_jobs; CREATE TRIGGER trigger_update_brand_scrape_jobs_timestamp BEFORE UPDATE ON public.brand_scrape_jobs FOR EACH ROW EXECUTE FUNCTION public.update_brand_scrape_jobs_updated_at(); -- ============================================ -- Scheduler tables (job_schedules, job_run_logs) -- ============================================ -- Function for updating job_schedules.updated_at CREATE OR REPLACE FUNCTION public.update_job_schedule_updated_at() RETURNS trigger LANGUAGE plpgsql AS $$ BEGIN NEW.updated_at = NOW(); RETURN NEW; END; $$; -- job_schedules - tracks scheduled job definitions CREATE TABLE IF NOT EXISTS public.job_schedules ( id SERIAL PRIMARY KEY, job_name character varying(100) NOT NULL UNIQUE, description text, enabled boolean DEFAULT true, base_interval_minutes integer DEFAULT 240, jitter_minutes integer DEFAULT 30, last_run_at timestamp without time zone, last_status character varying(20), last_error_message text, last_duration_ms integer, next_run_at timestamp without time zone, job_config jsonb DEFAULT '{}'::jsonb, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_job_schedules_enabled ON public.job_schedules(enabled); CREATE INDEX IF NOT EXISTS idx_job_schedules_next_run ON public.job_schedules(next_run_at) WHERE enabled = true; DROP TRIGGER IF EXISTS trigger_job_schedule_updated_at ON public.job_schedules; CREATE TRIGGER trigger_job_schedule_updated_at BEFORE UPDATE ON public.job_schedules FOR EACH ROW EXECUTE FUNCTION public.update_job_schedule_updated_at(); -- job_run_logs - tracks individual job execution history CREATE TABLE IF NOT EXISTS public.job_run_logs ( id SERIAL PRIMARY KEY, schedule_id integer REFERENCES public.job_schedules(id) ON DELETE SET NULL, job_name character varying(100) NOT NULL, status character varying(20) NOT NULL, started_at timestamp without time zone NOT NULL, completed_at timestamp without time zone, duration_ms integer, error_message text, items_processed integer DEFAULT 0, items_succeeded integer DEFAULT 0, items_failed integer DEFAULT 0, metadata jsonb DEFAULT '{}'::jsonb, created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_job_run_logs_schedule ON public.job_run_logs(schedule_id); CREATE INDEX IF NOT EXISTS idx_job_run_logs_job_name ON public.job_run_logs(job_name); CREATE INDEX IF NOT EXISTS idx_job_run_logs_started_at ON public.job_run_logs(started_at DESC); CREATE INDEX IF NOT EXISTS idx_job_run_logs_status ON public.job_run_logs(status); -- Insert default admin user if not exists INSERT INTO public.users (email, password_hash, role) SELECT 'admin@example.com', '$2b$10$K8/KqJyNqJKqJyNqJKqJyOqJKqJyNqJKqJyNqJKqJyNqJKqJyNqJK', 'admin' WHERE NOT EXISTS (SELECT 1 FROM public.users WHERE email = 'admin@example.com'); -- ============================================ -- Dashboard views (consolidated schema) -- ============================================ DROP VIEW IF EXISTS public.v_dashboard_stats CASCADE; DROP VIEW IF EXISTS public.v_latest_snapshots CASCADE; DROP VIEW IF EXISTS public.v_brands CASCADE; DROP VIEW IF EXISTS public.v_categories CASCADE; DROP VIEW IF EXISTS public.v_product_brands CASCADE; DROP VIEW IF EXISTS public.v_product_categories CASCADE; -- Latest snapshot per product (most recent crawl data) CREATE OR REPLACE VIEW public.v_latest_snapshots AS SELECT DISTINCT ON (dutchie_product_id) s.* FROM public.dutchie_product_snapshots s ORDER BY dutchie_product_id, crawled_at DESC; -- Dashboard stats CREATE OR REPLACE VIEW public.v_dashboard_stats AS SELECT (SELECT COUNT(*) FROM public.dispensaries WHERE state = 'AZ') as dispensary_count, (SELECT COUNT(*) FROM public.dutchie_products) as product_count, (SELECT COUNT(*) FROM public.dutchie_product_snapshots WHERE crawled_at > NOW() - INTERVAL '24 hours') as snapshots_24h, (SELECT MAX(crawled_at) FROM public.dutchie_product_snapshots) as last_crawl_time, (SELECT COUNT(*) FROM public.crawl_jobs WHERE status = 'failed' AND created_at > NOW() - INTERVAL '24 hours') as failed_jobs_24h, (SELECT COUNT(DISTINCT brand_name) FROM public.dutchie_products WHERE brand_name IS NOT NULL) as brand_count, (SELECT COUNT(DISTINCT (type, subcategory)) FROM public.dutchie_products WHERE type IS NOT NULL) as category_count; -- Brands derived from products CREATE OR REPLACE VIEW public.v_brands AS SELECT brand_name, brand_id, MAX(brand_logo_url) as brand_logo_url, COUNT(*) as product_count, COUNT(DISTINCT dispensary_id) as dispensary_count, ARRAY_AGG(DISTINCT type) FILTER (WHERE type IS NOT NULL) as product_types FROM public.dutchie_products WHERE brand_name IS NOT NULL GROUP BY brand_name, brand_id ORDER BY product_count DESC; -- Categories derived from products CREATE OR REPLACE VIEW public.v_categories AS SELECT type, subcategory, COUNT(*) as product_count, COUNT(DISTINCT dispensary_id) as dispensary_count, COUNT(DISTINCT brand_name) as brand_count, AVG(thc) as avg_thc, MIN(thc) as min_thc, MAX(thc) as max_thc FROM public.dutchie_products WHERE type IS NOT NULL GROUP BY type, subcategory ORDER BY type, subcategory; -- Dispensary crawl status view (consolidated schema) DROP VIEW IF EXISTS public.dispensary_crawl_status CASCADE; CREATE OR REPLACE VIEW public.dispensary_crawl_status AS SELECT d.id AS dispensary_id, COALESCE(d.dba_name, d.name) AS dispensary_name, d.slug AS dispensary_slug, d.city, d.state, d.menu_url, d.menu_type, d.scrape_enabled, d.last_crawl_at, d.crawl_status, d.crawl_error, cs.cron_expression, cs.is_active, cs.priority, cs.last_run_at, cs.next_run_at, cs.last_status AS schedule_last_status, cs.last_error AS schedule_last_error, cs.consecutive_failures, j.id AS latest_job_id, j.status AS latest_job_status, j.job_type AS latest_job_type, j.started_at AS latest_job_started, j.completed_at AS latest_job_completed, j.products_found AS latest_products_found, j.products_created AS latest_products_created, j.products_updated AS latest_products_updated, j.error_message AS latest_job_error FROM public.dispensaries d LEFT JOIN public.dispensary_crawl_schedule cs ON cs.dispensary_id = d.id LEFT JOIN LATERAL ( SELECT * FROM public.dispensary_crawl_jobs dj WHERE dj.dispensary_id = d.id ORDER BY dj.created_at DESC LIMIT 1 ) j ON true WHERE d.state = 'AZ'; -- Done! SELECT 'Migration 031 completed successfully' as status;