Files
cannaiq/backend/migrations/031_consolidate_schema.sql

952 lines
39 KiB
PL/PgSQL

-- Migration 031: Consolidate Schema for crawlsy DB
-- This migration creates all tables from the legacy schema for the consolidated database
-- Run with: psql $CRAWLSY_DATABASE_URL -f migrations/031_consolidate_schema.sql
-- Functions (needed for triggers)
CREATE OR REPLACE FUNCTION public.set_requires_recrawl() RETURNS trigger
LANGUAGE plpgsql
AS $$
BEGIN
IF NEW.field_name IN ('website', 'menu_url') THEN
NEW.requires_recrawl := TRUE;
END IF;
RETURN NEW;
END;
$$;
CREATE OR REPLACE FUNCTION public.update_api_token_updated_at() RETURNS trigger
LANGUAGE plpgsql
AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$;
CREATE OR REPLACE FUNCTION public.update_brand_scrape_jobs_updated_at() RETURNS trigger
LANGUAGE plpgsql
AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$;
CREATE OR REPLACE FUNCTION public.update_sandbox_timestamp() RETURNS trigger
LANGUAGE plpgsql
AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$;
CREATE OR REPLACE FUNCTION public.update_schedule_updated_at() RETURNS trigger
LANGUAGE plpgsql
AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$;
-- Core tables (in dependency order)
-- users
CREATE TABLE IF NOT EXISTS public.users (
id SERIAL PRIMARY KEY,
email character varying(255) NOT NULL UNIQUE,
password_hash character varying(255) NOT NULL,
role character varying(50) DEFAULT 'user',
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
-- settings
CREATE TABLE IF NOT EXISTS public.settings (
key character varying(255) PRIMARY KEY,
value text,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
-- azdhs_list
CREATE TABLE IF NOT EXISTS public.azdhs_list (
id SERIAL PRIMARY KEY,
name character varying(255) NOT NULL,
company_name character varying(255),
slug character varying(255),
address character varying(500),
city character varying(100),
state character varying(2) DEFAULT 'AZ',
zip character varying(10),
phone character varying(20),
email character varying(255),
status_line text,
azdhs_url text,
latitude numeric(10,8),
longitude numeric(11,8),
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
website text,
dba_name character varying(255),
google_rating numeric(2,1),
google_review_count integer
);
-- dispensaries
CREATE TABLE IF NOT EXISTS public.dispensaries (
id SERIAL PRIMARY KEY,
azdhs_id integer UNIQUE REFERENCES public.azdhs_list(id),
name character varying(255) NOT NULL,
company_name character varying(255),
dba_name character varying(255),
slug character varying(255) UNIQUE,
address character varying(500),
city character varying(100),
state character varying(2) DEFAULT 'AZ',
zip character varying(10),
phone character varying(20),
email character varying(255),
website text,
menu_url text,
latitude numeric(10,8),
longitude numeric(11,8),
hours_of_operation jsonb,
social_media jsonb,
license_number character varying(100),
license_type character varying(50),
license_status character varying(50),
license_expiry date,
google_place_id character varying(255),
google_rating numeric(2,1),
google_review_count integer,
yelp_id character varying(255),
yelp_rating numeric(2,1),
yelp_review_count integer,
last_enriched_at timestamp without time zone,
menu_provider character varying(100),
menu_provider_confidence integer,
provider_type character varying(100),
provider_detected_at timestamp without time zone,
detection_method character varying(100),
detection_metadata jsonb,
menu_scrape_status character varying(50),
menu_scrape_error text,
menu_last_scraped_at timestamp without time zone,
menu_product_count integer DEFAULT 0,
crawl_status character varying(50) DEFAULT 'idle',
last_crawl_at timestamp without time zone,
next_crawl_at timestamp without time zone,
crawl_frequency_hours integer DEFAULT 24,
consecutive_failures integer DEFAULT 0,
scrape_enabled boolean DEFAULT true,
scrape_priority integer DEFAULT 0,
crawler_mode character varying(50) DEFAULT 'sandbox',
crawler_status character varying(50) DEFAULT 'idle',
product_crawler_mode character varying(50) DEFAULT 'sandbox',
product_provider character varying(100),
specials_crawler_mode character varying(50) DEFAULT 'disabled',
specials_provider character varying(100),
brand_crawler_mode character varying(50) DEFAULT 'disabled',
brand_provider character varying(100),
metadata_crawler_mode character varying(50) DEFAULT 'disabled',
metadata_provider character varying(100),
notes text,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
platform_dispensary_id character varying(100),
dutchie_url text,
dutchie_detected_at timestamp without time zone,
dutchie_retailer_id character varying(100),
external_id character varying(255)
);
-- stores
CREATE TABLE IF NOT EXISTS public.stores (
id SERIAL PRIMARY KEY,
name character varying(255) NOT NULL,
slug character varying(255) UNIQUE,
menu_url text,
website_url text,
image_url text,
description text,
dutchie_plus boolean DEFAULT false,
city character varying(100),
state character varying(50),
address text,
phone character varying(20),
hours jsonb,
logo_url text,
logo_public_id character varying(255),
logo_width integer,
logo_height integer,
scrape_enabled boolean DEFAULT true,
last_scraped_at timestamp without time zone,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
status character varying(50) DEFAULT 'active',
dispensary_id integer REFERENCES public.dispensaries(id) ON DELETE SET NULL
);
-- api_tokens
CREATE TABLE IF NOT EXISTS public.api_tokens (
id SERIAL PRIMARY KEY,
name character varying(255) NOT NULL,
token character varying(255) NOT NULL UNIQUE,
description text,
user_id integer REFERENCES public.users(id) ON DELETE CASCADE,
active boolean DEFAULT true,
rate_limit integer DEFAULT 100,
allowed_endpoints text[],
expires_at timestamp without time zone,
last_used_at timestamp without time zone,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
-- api_token_usage
CREATE TABLE IF NOT EXISTS public.api_token_usage (
id SERIAL PRIMARY KEY,
token_id integer REFERENCES public.api_tokens(id) ON DELETE CASCADE,
endpoint character varying(255) NOT NULL,
method character varying(10) NOT NULL,
status_code integer,
response_time_ms integer,
request_size integer,
response_size integer,
ip_address inet,
user_agent text,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
-- categories
CREATE TABLE IF NOT EXISTS public.categories (
id SERIAL PRIMARY KEY,
store_id integer REFERENCES public.stores(id) ON DELETE CASCADE,
dispensary_id integer REFERENCES public.dispensaries(id) ON DELETE CASCADE,
name character varying(255) NOT NULL,
slug character varying(255) NOT NULL,
dutchie_url text NOT NULL,
scrape_enabled boolean DEFAULT true,
last_scraped_at timestamp without time zone,
product_count integer DEFAULT 0,
parent_id integer REFERENCES public.categories(id) ON DELETE CASCADE,
path character varying(500),
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
UNIQUE(store_id, slug)
);
-- products
CREATE TABLE IF NOT EXISTS public.products (
id SERIAL PRIMARY KEY,
store_id integer REFERENCES public.stores(id) ON DELETE CASCADE,
dispensary_id integer REFERENCES public.dispensaries(id) ON DELETE CASCADE,
category_id integer REFERENCES public.categories(id) ON DELETE SET NULL,
name character varying(255) NOT NULL,
slug character varying(255) NOT NULL,
brand character varying(255),
brand_slug character varying(255),
brand_external_id character varying(100),
enterprise_product_id character varying(100),
category character varying(100),
subcategory character varying(100),
strain_type character varying(50),
description text,
effects text[],
price numeric(10,2),
special_price numeric(10,2),
is_on_special boolean DEFAULT false,
weight character varying(50),
unit character varying(50),
thc_percentage numeric(5,2),
cbd_percentage numeric(5,2),
terpenes text[],
image_url text,
image_public_id character varying(255),
image_width integer,
image_height integer,
dutchie_id character varying(100),
dutchie_url text,
sku character varying(100),
in_stock boolean DEFAULT true,
stock_status character varying(50) DEFAULT 'in_stock',
stock_quantity integer,
availability_status character varying(50) DEFAULT 'available',
status character varying(50) DEFAULT 'active',
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
UNIQUE(store_id, slug)
);
-- dutchie_products (AZ pipeline products)
CREATE TABLE IF NOT EXISTS public.dutchie_products (
id SERIAL PRIMARY KEY,
dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE,
external_product_id character varying(100) NOT NULL,
name character varying(500) NOT NULL,
brand_name character varying(255),
category character varying(100),
subcategory character varying(100),
strain_type character varying(50),
description text,
effects text[],
thc_content character varying(50),
cbd_content character varying(50),
primary_image_url text,
additional_images text[],
stock_status character varying(50) DEFAULT 'in_stock',
first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
UNIQUE(dispensary_id, external_product_id)
);
-- dutchie_product_snapshots
CREATE TABLE IF NOT EXISTS public.dutchie_product_snapshots (
id SERIAL PRIMARY KEY,
dutchie_product_id integer NOT NULL REFERENCES public.dutchie_products(id) ON DELETE CASCADE,
dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE,
options jsonb,
raw_product_data jsonb,
crawled_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
-- campaigns
CREATE TABLE IF NOT EXISTS public.campaigns (
id SERIAL PRIMARY KEY,
name character varying(255) NOT NULL,
slug character varying(255) NOT NULL UNIQUE,
description text,
display_style character varying(50) DEFAULT 'grid',
active boolean DEFAULT true,
start_date timestamp without time zone,
end_date timestamp without time zone,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
-- campaign_products
CREATE TABLE IF NOT EXISTS public.campaign_products (
id SERIAL PRIMARY KEY,
campaign_id integer REFERENCES public.campaigns(id) ON DELETE CASCADE,
product_id integer REFERENCES public.products(id) ON DELETE CASCADE,
display_order integer DEFAULT 0,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
UNIQUE(campaign_id, product_id)
);
-- clicks
CREATE TABLE IF NOT EXISTS public.clicks (
id SERIAL PRIMARY KEY,
product_id integer REFERENCES public.products(id) ON DELETE CASCADE,
campaign_id integer REFERENCES public.campaigns(id) ON DELETE SET NULL,
ip_address character varying(45),
user_agent text,
referer text,
clicked_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
-- proxies
CREATE TABLE IF NOT EXISTS public.proxies (
id SERIAL PRIMARY KEY,
host character varying(255) NOT NULL,
port integer NOT NULL,
protocol character varying(10) DEFAULT 'http',
username character varying(255),
password character varying(255),
active boolean DEFAULT true,
is_anonymous boolean DEFAULT false,
last_tested_at timestamp without time zone,
last_test_result boolean,
response_time_ms integer,
fail_count integer DEFAULT 0,
success_count integer DEFAULT 0,
country_code character varying(2),
state character varying(50),
city character varying(100),
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
UNIQUE(host, port, protocol)
);
-- failed_proxies
CREATE TABLE IF NOT EXISTS public.failed_proxies (
id SERIAL PRIMARY KEY,
host character varying(255) NOT NULL,
port integer NOT NULL,
protocol character varying(10) DEFAULT 'http',
failure_reason text,
failed_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
UNIQUE(host, port, protocol)
);
-- proxy_test_jobs
CREATE TABLE IF NOT EXISTS public.proxy_test_jobs (
id SERIAL PRIMARY KEY,
status character varying(50) DEFAULT 'pending',
total_proxies integer DEFAULT 0,
tested_proxies integer DEFAULT 0,
successful_proxies integer DEFAULT 0,
failed_proxies integer DEFAULT 0,
started_at timestamp without time zone,
completed_at timestamp without time zone,
error_message text,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
-- crawl_jobs
CREATE TABLE IF NOT EXISTS public.crawl_jobs (
id SERIAL PRIMARY KEY,
store_id integer REFERENCES public.stores(id) ON DELETE CASCADE,
status character varying(50) DEFAULT 'pending',
job_type character varying(50) DEFAULT 'full',
category_slug character varying(255),
priority integer DEFAULT 0,
scheduled_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
started_at timestamp without time zone,
completed_at timestamp without time zone,
products_found integer DEFAULT 0,
products_updated integer DEFAULT 0,
products_created integer DEFAULT 0,
error_message text,
retry_count integer DEFAULT 0,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
-- crawler_schedule
CREATE TABLE IF NOT EXISTS public.crawler_schedule (
id SERIAL PRIMARY KEY,
schedule_type character varying(100) NOT NULL UNIQUE,
cron_expression character varying(100) NOT NULL,
is_active boolean DEFAULT true,
last_run_at timestamp without time zone,
next_run_at timestamp without time zone,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
-- store_crawl_schedule
CREATE TABLE IF NOT EXISTS public.store_crawl_schedule (
id SERIAL PRIMARY KEY,
store_id integer NOT NULL REFERENCES public.stores(id) ON DELETE CASCADE UNIQUE,
cron_expression character varying(100) NOT NULL,
is_active boolean DEFAULT true,
priority integer DEFAULT 0,
last_run_at timestamp without time zone,
next_run_at timestamp without time zone,
last_status character varying(50),
last_error text,
consecutive_failures integer DEFAULT 0,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
-- dispensary_crawl_schedule
CREATE TABLE IF NOT EXISTS public.dispensary_crawl_schedule (
id SERIAL PRIMARY KEY,
dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE UNIQUE,
cron_expression character varying(100) NOT NULL,
is_active boolean DEFAULT true,
priority integer DEFAULT 0,
last_run_at timestamp without time zone,
next_run_at timestamp without time zone,
last_status character varying(50),
last_error text,
consecutive_failures integer DEFAULT 0,
metadata jsonb,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
-- dispensary_crawl_jobs
CREATE TABLE IF NOT EXISTS public.dispensary_crawl_jobs (
id SERIAL PRIMARY KEY,
dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE,
schedule_id integer REFERENCES public.dispensary_crawl_schedule(id) ON DELETE SET NULL,
status character varying(50) DEFAULT 'pending',
job_type character varying(50) DEFAULT 'full',
priority integer DEFAULT 0,
scheduled_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
started_at timestamp without time zone,
completed_at timestamp without time zone,
products_found integer DEFAULT 0,
products_updated integer DEFAULT 0,
products_created integer DEFAULT 0,
brands_found integer DEFAULT 0,
error_message text,
metadata jsonb,
retry_count integer DEFAULT 0,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
-- dispensary_changes
CREATE TABLE IF NOT EXISTS public.dispensary_changes (
id SERIAL PRIMARY KEY,
dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE,
field_name character varying(100) NOT NULL,
old_value text,
new_value text,
status character varying(50) DEFAULT 'pending',
source character varying(100),
notes text,
requires_recrawl boolean DEFAULT false,
reviewed_by integer REFERENCES public.users(id),
reviewed_at timestamp without time zone,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
-- crawler_templates
CREATE TABLE IF NOT EXISTS public.crawler_templates (
id SERIAL PRIMARY KEY,
provider character varying(100) NOT NULL,
name character varying(255) NOT NULL,
version integer DEFAULT 1,
description text,
config jsonb NOT NULL,
is_active boolean DEFAULT true,
is_default_for_provider boolean DEFAULT false,
success_count integer DEFAULT 0,
failure_count integer DEFAULT 0,
last_used_at timestamp without time zone,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
UNIQUE(provider, name, version)
);
-- crawler_sandboxes
CREATE TABLE IF NOT EXISTS public.crawler_sandboxes (
id SERIAL PRIMARY KEY,
dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE,
category character varying(100) NOT NULL,
mode character varying(50) DEFAULT 'testing',
status character varying(50) DEFAULT 'pending',
suspected_menu_provider character varying(100),
template_name character varying(255),
config_overrides jsonb,
last_test_at timestamp without time zone,
last_test_result text,
test_count integer DEFAULT 0,
success_count integer DEFAULT 0,
notes text,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
-- sandbox_crawl_jobs
CREATE TABLE IF NOT EXISTS public.sandbox_crawl_jobs (
id SERIAL PRIMARY KEY,
sandbox_id integer REFERENCES public.crawler_sandboxes(id) ON DELETE SET NULL,
dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE,
category character varying(100) NOT NULL,
status character varying(50) DEFAULT 'pending',
priority integer DEFAULT 0,
scheduled_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
started_at timestamp without time zone,
completed_at timestamp without time zone,
items_found integer DEFAULT 0,
items_saved integer DEFAULT 0,
error_message text,
result_data jsonb,
retry_count integer DEFAULT 0,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
-- brands
CREATE TABLE IF NOT EXISTS public.brands (
id SERIAL PRIMARY KEY,
store_id integer NOT NULL REFERENCES public.stores(id) ON DELETE CASCADE,
dispensary_id integer REFERENCES public.dispensaries(id),
name character varying(255) NOT NULL,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
first_seen_at timestamp with time zone DEFAULT now(),
last_seen_at timestamp with time zone DEFAULT now(),
UNIQUE(store_id, name)
);
-- brand_scrape_jobs
CREATE TABLE IF NOT EXISTS public.brand_scrape_jobs (
id SERIAL PRIMARY KEY,
dispensary_id integer NOT NULL REFERENCES public.dispensaries(id),
brand_slug text NOT NULL,
brand_name text NOT NULL,
status text DEFAULT 'pending' NOT NULL,
worker_id text,
started_at timestamp without time zone,
completed_at timestamp without time zone,
products_found integer DEFAULT 0,
products_saved integer DEFAULT 0,
error_message text,
retry_count integer DEFAULT 0,
created_at timestamp without time zone DEFAULT now(),
updated_at timestamp without time zone DEFAULT now(),
UNIQUE(dispensary_id, brand_slug)
);
-- brand_history
CREATE TABLE IF NOT EXISTS public.brand_history (
id SERIAL PRIMARY KEY,
dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE,
brand_name character varying(255) NOT NULL,
event_type character varying(20) NOT NULL,
event_at timestamp with time zone DEFAULT now(),
product_count integer,
metadata jsonb
);
-- batch_history
CREATE TABLE IF NOT EXISTS public.batch_history (
id SERIAL PRIMARY KEY,
product_id integer REFERENCES public.products(id) ON DELETE CASCADE,
thc_percentage numeric(5,2),
cbd_percentage numeric(5,2),
terpenes text[],
strain_type character varying(100),
recorded_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
-- price_history
CREATE TABLE IF NOT EXISTS public.price_history (
id SERIAL PRIMARY KEY,
product_id integer REFERENCES public.products(id) ON DELETE CASCADE,
price numeric(10,2) NOT NULL,
special_price numeric(10,2),
recorded_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
-- product_categories
CREATE TABLE IF NOT EXISTS public.product_categories (
id SERIAL PRIMARY KEY,
product_id integer NOT NULL REFERENCES public.products(id) ON DELETE CASCADE,
category_slug character varying(255) NOT NULL,
first_seen_at timestamp with time zone DEFAULT now(),
last_seen_at timestamp with time zone DEFAULT now(),
UNIQUE(product_id, category_slug)
);
-- specials
CREATE TABLE IF NOT EXISTS public.specials (
id SERIAL PRIMARY KEY,
store_id integer NOT NULL REFERENCES public.stores(id) ON DELETE CASCADE,
product_id integer REFERENCES public.products(id) ON DELETE CASCADE,
title character varying(255) NOT NULL,
description text,
discount_type character varying(50),
discount_value numeric(10,2),
min_purchase numeric(10,2),
valid_date date,
start_time time without time zone,
end_time time without time zone,
days_of_week text[],
is_recurring boolean DEFAULT false,
badge_text character varying(50),
image_url text,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
-- jobs (generic job queue)
CREATE TABLE IF NOT EXISTS public.jobs (
id SERIAL PRIMARY KEY,
store_id integer REFERENCES public.stores(id) ON DELETE CASCADE,
type character varying(100) NOT NULL,
status character varying(50) DEFAULT 'pending',
payload jsonb,
result jsonb,
error_message text,
started_at timestamp without time zone,
completed_at timestamp without time zone,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
-- wp_dutchie_api_permissions (WordPress API permissions)
CREATE TABLE IF NOT EXISTS public.wp_dutchie_api_permissions (
id SERIAL PRIMARY KEY,
user_name character varying(255) NOT NULL,
api_key character varying(255) NOT NULL UNIQUE,
allowed_ips text,
allowed_domains text,
is_active smallint DEFAULT 1,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
last_used_at timestamp without time zone,
store_id integer REFERENCES public.stores(id),
store_name character varying(255),
dispensary_id integer REFERENCES public.dispensaries(id)
);
-- Create indexes (only if they don't exist)
CREATE INDEX IF NOT EXISTS idx_api_token_usage_created_at ON public.api_token_usage(created_at);
CREATE INDEX IF NOT EXISTS idx_api_token_usage_endpoint ON public.api_token_usage(endpoint);
CREATE INDEX IF NOT EXISTS idx_api_token_usage_token_id ON public.api_token_usage(token_id);
CREATE INDEX IF NOT EXISTS idx_api_tokens_active ON public.api_tokens(active);
CREATE INDEX IF NOT EXISTS idx_api_tokens_token ON public.api_tokens(token);
CREATE INDEX IF NOT EXISTS idx_batch_history_product ON public.batch_history(product_id, recorded_at DESC);
CREATE INDEX IF NOT EXISTS idx_batch_history_recorded ON public.batch_history(recorded_at DESC);
CREATE INDEX IF NOT EXISTS idx_brand_history_brand ON public.brand_history(brand_name, event_at DESC);
CREATE INDEX IF NOT EXISTS idx_brand_history_dispensary ON public.brand_history(dispensary_id, event_at DESC);
CREATE INDEX IF NOT EXISTS idx_brand_history_event ON public.brand_history(event_type, event_at DESC);
CREATE INDEX IF NOT EXISTS idx_brand_jobs_dispensary ON public.brand_scrape_jobs(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_brand_jobs_status ON public.brand_scrape_jobs(status);
CREATE INDEX IF NOT EXISTS idx_brands_dispensary ON public.brands(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_brands_last_seen ON public.brands(last_seen_at DESC);
CREATE INDEX IF NOT EXISTS idx_brands_store_id ON public.brands(store_id);
CREATE INDEX IF NOT EXISTS idx_categories_dispensary_id ON public.categories(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_categories_parent_id ON public.categories(parent_id);
CREATE INDEX IF NOT EXISTS idx_categories_path ON public.categories(path);
CREATE INDEX IF NOT EXISTS idx_clicks_campaign_id ON public.clicks(campaign_id);
CREATE INDEX IF NOT EXISTS idx_clicks_clicked_at ON public.clicks(clicked_at);
CREATE INDEX IF NOT EXISTS idx_clicks_product_id ON public.clicks(product_id);
CREATE INDEX IF NOT EXISTS idx_crawl_jobs_status ON public.crawl_jobs(status);
CREATE INDEX IF NOT EXISTS idx_dispensaries_azdhs_id ON public.dispensaries(azdhs_id);
CREATE INDEX IF NOT EXISTS idx_dispensaries_city ON public.dispensaries(city);
CREATE INDEX IF NOT EXISTS idx_dispensaries_crawl_status ON public.dispensaries(crawl_status);
CREATE INDEX IF NOT EXISTS idx_dispensaries_crawler_mode ON public.dispensaries(crawler_mode);
CREATE INDEX IF NOT EXISTS idx_dispensaries_crawler_status ON public.dispensaries(crawler_status);
CREATE INDEX IF NOT EXISTS idx_dispensaries_provider ON public.dispensaries(menu_provider);
CREATE INDEX IF NOT EXISTS idx_dispensaries_provider_confidence ON public.dispensaries(menu_provider_confidence);
CREATE INDEX IF NOT EXISTS idx_dispensaries_slug ON public.dispensaries(slug);
CREATE INDEX IF NOT EXISTS idx_dispensaries_state ON public.dispensaries(state);
CREATE INDEX IF NOT EXISTS idx_dispensary_changes_created_at ON public.dispensary_changes(created_at DESC);
CREATE INDEX IF NOT EXISTS idx_dispensary_changes_dispensary_status ON public.dispensary_changes(dispensary_id, status);
CREATE INDEX IF NOT EXISTS idx_dispensary_changes_status ON public.dispensary_changes(status);
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_dispensary ON public.dispensary_crawl_jobs(dispensary_id, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_recent ON public.dispensary_crawl_jobs(created_at DESC);
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_status ON public.dispensary_crawl_jobs(status);
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_schedule_active ON public.dispensary_crawl_schedule(is_active);
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_schedule_status ON public.dispensary_crawl_schedule(last_status);
CREATE INDEX IF NOT EXISTS idx_jobs_status ON public.jobs(status);
CREATE INDEX IF NOT EXISTS idx_jobs_store_id ON public.jobs(store_id);
CREATE INDEX IF NOT EXISTS idx_jobs_type ON public.jobs(type);
CREATE INDEX IF NOT EXISTS idx_price_history_product ON public.price_history(product_id, recorded_at DESC);
CREATE INDEX IF NOT EXISTS idx_price_history_recorded ON public.price_history(recorded_at DESC);
CREATE INDEX IF NOT EXISTS idx_product_categories_product ON public.product_categories(product_id);
CREATE INDEX IF NOT EXISTS idx_products_availability_status ON public.products(availability_status);
CREATE INDEX IF NOT EXISTS idx_products_brand_external ON public.products(brand_external_id);
CREATE INDEX IF NOT EXISTS idx_products_dispensary_id ON public.products(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_products_enterprise ON public.products(enterprise_product_id);
CREATE INDEX IF NOT EXISTS idx_products_is_special ON public.products(is_on_special);
CREATE INDEX IF NOT EXISTS idx_products_sku ON public.products(sku);
CREATE INDEX IF NOT EXISTS idx_products_status ON public.products(status);
CREATE INDEX IF NOT EXISTS idx_products_stock_status ON public.products(stock_status);
CREATE INDEX IF NOT EXISTS idx_products_subcategory ON public.products(subcategory);
CREATE INDEX IF NOT EXISTS idx_proxies_location ON public.proxies(country_code, state, city);
CREATE INDEX IF NOT EXISTS idx_proxy_test_jobs_created_at ON public.proxy_test_jobs(created_at DESC);
CREATE INDEX IF NOT EXISTS idx_proxy_test_jobs_status ON public.proxy_test_jobs(status);
CREATE INDEX IF NOT EXISTS idx_sandbox_category ON public.crawler_sandboxes(category);
CREATE INDEX IF NOT EXISTS idx_sandbox_dispensary ON public.crawler_sandboxes(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_sandbox_job_category ON public.sandbox_crawl_jobs(category);
CREATE INDEX IF NOT EXISTS idx_sandbox_job_dispensary ON public.sandbox_crawl_jobs(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_sandbox_job_status ON public.sandbox_crawl_jobs(status);
CREATE INDEX IF NOT EXISTS idx_sandbox_mode ON public.crawler_sandboxes(mode);
CREATE INDEX IF NOT EXISTS idx_sandbox_status ON public.crawler_sandboxes(status);
CREATE INDEX IF NOT EXISTS idx_sandbox_suspected_provider ON public.crawler_sandboxes(suspected_menu_provider);
CREATE INDEX IF NOT EXISTS idx_sandbox_template ON public.crawler_sandboxes(template_name);
CREATE INDEX IF NOT EXISTS idx_specials_product_id ON public.specials(product_id);
CREATE INDEX IF NOT EXISTS idx_stores_dispensary_id ON public.stores(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_template_active ON public.crawler_templates(is_active);
CREATE INDEX IF NOT EXISTS idx_template_provider ON public.crawler_templates(provider);
CREATE INDEX IF NOT EXISTS idx_wp_api_permissions_store_id ON public.wp_dutchie_api_permissions(store_id);
-- Create triggers (drop first if exists to avoid errors)
DROP TRIGGER IF EXISTS api_tokens_updated_at ON public.api_tokens;
CREATE TRIGGER api_tokens_updated_at BEFORE UPDATE ON public.api_tokens FOR EACH ROW EXECUTE FUNCTION public.update_api_token_updated_at();
DROP TRIGGER IF EXISTS trigger_crawl_jobs_updated_at ON public.crawl_jobs;
CREATE TRIGGER trigger_crawl_jobs_updated_at BEFORE UPDATE ON public.crawl_jobs FOR EACH ROW EXECUTE FUNCTION public.update_schedule_updated_at();
DROP TRIGGER IF EXISTS trigger_crawler_schedule_updated_at ON public.crawler_schedule;
CREATE TRIGGER trigger_crawler_schedule_updated_at BEFORE UPDATE ON public.crawler_schedule FOR EACH ROW EXECUTE FUNCTION public.update_schedule_updated_at();
DROP TRIGGER IF EXISTS trigger_sandbox_job_updated_at ON public.sandbox_crawl_jobs;
CREATE TRIGGER trigger_sandbox_job_updated_at BEFORE UPDATE ON public.sandbox_crawl_jobs FOR EACH ROW EXECUTE FUNCTION public.update_sandbox_timestamp();
DROP TRIGGER IF EXISTS trigger_sandbox_updated_at ON public.crawler_sandboxes;
CREATE TRIGGER trigger_sandbox_updated_at BEFORE UPDATE ON public.crawler_sandboxes FOR EACH ROW EXECUTE FUNCTION public.update_sandbox_timestamp();
DROP TRIGGER IF EXISTS trigger_set_requires_recrawl ON public.dispensary_changes;
CREATE TRIGGER trigger_set_requires_recrawl BEFORE INSERT ON public.dispensary_changes FOR EACH ROW EXECUTE FUNCTION public.set_requires_recrawl();
DROP TRIGGER IF EXISTS trigger_store_crawl_schedule_updated_at ON public.store_crawl_schedule;
CREATE TRIGGER trigger_store_crawl_schedule_updated_at BEFORE UPDATE ON public.store_crawl_schedule FOR EACH ROW EXECUTE FUNCTION public.update_schedule_updated_at();
DROP TRIGGER IF EXISTS trigger_template_updated_at ON public.crawler_templates;
CREATE TRIGGER trigger_template_updated_at BEFORE UPDATE ON public.crawler_templates FOR EACH ROW EXECUTE FUNCTION public.update_sandbox_timestamp();
DROP TRIGGER IF EXISTS trigger_update_brand_scrape_jobs_timestamp ON public.brand_scrape_jobs;
CREATE TRIGGER trigger_update_brand_scrape_jobs_timestamp BEFORE UPDATE ON public.brand_scrape_jobs FOR EACH ROW EXECUTE FUNCTION public.update_brand_scrape_jobs_updated_at();
-- ============================================
-- Scheduler tables (job_schedules, job_run_logs)
-- ============================================
-- Function for updating job_schedules.updated_at
CREATE OR REPLACE FUNCTION public.update_job_schedule_updated_at() RETURNS trigger
LANGUAGE plpgsql
AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$;
-- job_schedules - tracks scheduled job definitions
CREATE TABLE IF NOT EXISTS public.job_schedules (
id SERIAL PRIMARY KEY,
job_name character varying(100) NOT NULL UNIQUE,
description text,
enabled boolean DEFAULT true,
base_interval_minutes integer DEFAULT 240,
jitter_minutes integer DEFAULT 30,
last_run_at timestamp without time zone,
last_status character varying(20),
last_error_message text,
last_duration_ms integer,
next_run_at timestamp without time zone,
job_config jsonb DEFAULT '{}'::jsonb,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_job_schedules_enabled ON public.job_schedules(enabled);
CREATE INDEX IF NOT EXISTS idx_job_schedules_next_run ON public.job_schedules(next_run_at) WHERE enabled = true;
DROP TRIGGER IF EXISTS trigger_job_schedule_updated_at ON public.job_schedules;
CREATE TRIGGER trigger_job_schedule_updated_at BEFORE UPDATE ON public.job_schedules FOR EACH ROW EXECUTE FUNCTION public.update_job_schedule_updated_at();
-- job_run_logs - tracks individual job execution history
CREATE TABLE IF NOT EXISTS public.job_run_logs (
id SERIAL PRIMARY KEY,
schedule_id integer REFERENCES public.job_schedules(id) ON DELETE SET NULL,
job_name character varying(100) NOT NULL,
status character varying(20) NOT NULL,
started_at timestamp without time zone NOT NULL,
completed_at timestamp without time zone,
duration_ms integer,
error_message text,
items_processed integer DEFAULT 0,
items_succeeded integer DEFAULT 0,
items_failed integer DEFAULT 0,
metadata jsonb DEFAULT '{}'::jsonb,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_job_run_logs_schedule ON public.job_run_logs(schedule_id);
CREATE INDEX IF NOT EXISTS idx_job_run_logs_job_name ON public.job_run_logs(job_name);
CREATE INDEX IF NOT EXISTS idx_job_run_logs_started_at ON public.job_run_logs(started_at DESC);
CREATE INDEX IF NOT EXISTS idx_job_run_logs_status ON public.job_run_logs(status);
-- Insert default admin user if not exists
INSERT INTO public.users (email, password_hash, role)
SELECT 'admin@example.com', '$2b$10$K8/KqJyNqJKqJyNqJKqJyOqJKqJyNqJKqJyNqJKqJyNqJKqJyNqJK', 'admin'
WHERE NOT EXISTS (SELECT 1 FROM public.users WHERE email = 'admin@example.com');
-- ============================================
-- Dashboard views (consolidated schema)
-- ============================================
DROP VIEW IF EXISTS public.v_dashboard_stats CASCADE;
DROP VIEW IF EXISTS public.v_latest_snapshots CASCADE;
DROP VIEW IF EXISTS public.v_brands CASCADE;
DROP VIEW IF EXISTS public.v_categories CASCADE;
DROP VIEW IF EXISTS public.v_product_brands CASCADE;
DROP VIEW IF EXISTS public.v_product_categories CASCADE;
-- Latest snapshot per product (most recent crawl data)
CREATE OR REPLACE VIEW public.v_latest_snapshots AS
SELECT DISTINCT ON (dutchie_product_id)
s.*
FROM public.dutchie_product_snapshots s
ORDER BY dutchie_product_id, crawled_at DESC;
-- Dashboard stats
CREATE OR REPLACE VIEW public.v_dashboard_stats AS
SELECT
(SELECT COUNT(*) FROM public.dispensaries WHERE state = 'AZ') as dispensary_count,
(SELECT COUNT(*) FROM public.dutchie_products) as product_count,
(SELECT COUNT(*) FROM public.dutchie_product_snapshots WHERE crawled_at > NOW() - INTERVAL '24 hours') as snapshots_24h,
(SELECT MAX(crawled_at) FROM public.dutchie_product_snapshots) as last_crawl_time,
(SELECT COUNT(*) FROM public.crawl_jobs WHERE status = 'failed' AND created_at > NOW() - INTERVAL '24 hours') as failed_jobs_24h,
(SELECT COUNT(DISTINCT brand_name) FROM public.dutchie_products WHERE brand_name IS NOT NULL) as brand_count,
(SELECT COUNT(DISTINCT (type, subcategory)) FROM public.dutchie_products WHERE type IS NOT NULL) as category_count;
-- Brands derived from products
CREATE OR REPLACE VIEW public.v_brands AS
SELECT
brand_name,
brand_id,
MAX(brand_logo_url) as brand_logo_url,
COUNT(*) as product_count,
COUNT(DISTINCT dispensary_id) as dispensary_count,
ARRAY_AGG(DISTINCT type) FILTER (WHERE type IS NOT NULL) as product_types
FROM public.dutchie_products
WHERE brand_name IS NOT NULL
GROUP BY brand_name, brand_id
ORDER BY product_count DESC;
-- Categories derived from products
CREATE OR REPLACE VIEW public.v_categories AS
SELECT
type,
subcategory,
COUNT(*) as product_count,
COUNT(DISTINCT dispensary_id) as dispensary_count,
AVG(thc) as avg_thc,
MIN(thc) as min_thc,
MAX(thc) as max_thc
FROM public.dutchie_products
WHERE type IS NOT NULL
GROUP BY type, subcategory
ORDER BY type, subcategory;
-- Dispensary crawl status view (consolidated schema)
DROP VIEW IF EXISTS public.dispensary_crawl_status CASCADE;
CREATE OR REPLACE VIEW public.dispensary_crawl_status AS
SELECT
d.id AS dispensary_id,
COALESCE(d.dba_name, d.name) AS dispensary_name,
d.slug AS dispensary_slug,
d.city,
d.state,
d.menu_url,
d.menu_type,
d.scrape_enabled,
d.last_crawl_at,
d.crawl_status,
d.crawl_error,
cs.cron_expression,
cs.is_active,
cs.priority,
cs.last_run_at,
cs.next_run_at,
cs.last_status AS schedule_last_status,
cs.last_error AS schedule_last_error,
cs.consecutive_failures,
j.id AS latest_job_id,
j.status AS latest_job_status,
j.job_type AS latest_job_type,
j.started_at AS latest_job_started,
j.completed_at AS latest_job_completed,
j.products_found AS latest_products_found,
j.products_created AS latest_products_created,
j.products_updated AS latest_products_updated,
j.error_message AS latest_job_error
FROM public.dispensaries d
LEFT JOIN public.dispensary_crawl_schedule cs ON cs.dispensary_id = d.id
LEFT JOIN LATERAL (
SELECT *
FROM public.dispensary_crawl_jobs dj
WHERE dj.dispensary_id = d.id
ORDER BY dj.created_at DESC
LIMIT 1
) j ON true
WHERE d.state = 'AZ';
-- Done!
SELECT 'Migration 031 completed successfully' as status;