952 lines
39 KiB
PL/PgSQL
952 lines
39 KiB
PL/PgSQL
-- Migration 031: Consolidate Schema for crawlsy DB
|
|
-- This migration creates all tables from the legacy schema for the consolidated database
|
|
-- Run with: psql $CRAWLSY_DATABASE_URL -f migrations/031_consolidate_schema.sql
|
|
|
|
-- Functions (needed for triggers)
|
|
CREATE OR REPLACE FUNCTION public.set_requires_recrawl() RETURNS trigger
|
|
LANGUAGE plpgsql
|
|
AS $$
|
|
BEGIN
|
|
IF NEW.field_name IN ('website', 'menu_url') THEN
|
|
NEW.requires_recrawl := TRUE;
|
|
END IF;
|
|
RETURN NEW;
|
|
END;
|
|
$$;
|
|
|
|
CREATE OR REPLACE FUNCTION public.update_api_token_updated_at() RETURNS trigger
|
|
LANGUAGE plpgsql
|
|
AS $$
|
|
BEGIN
|
|
NEW.updated_at = CURRENT_TIMESTAMP;
|
|
RETURN NEW;
|
|
END;
|
|
$$;
|
|
|
|
CREATE OR REPLACE FUNCTION public.update_brand_scrape_jobs_updated_at() RETURNS trigger
|
|
LANGUAGE plpgsql
|
|
AS $$
|
|
BEGIN
|
|
NEW.updated_at = NOW();
|
|
RETURN NEW;
|
|
END;
|
|
$$;
|
|
|
|
CREATE OR REPLACE FUNCTION public.update_sandbox_timestamp() RETURNS trigger
|
|
LANGUAGE plpgsql
|
|
AS $$
|
|
BEGIN
|
|
NEW.updated_at = NOW();
|
|
RETURN NEW;
|
|
END;
|
|
$$;
|
|
|
|
CREATE OR REPLACE FUNCTION public.update_schedule_updated_at() RETURNS trigger
|
|
LANGUAGE plpgsql
|
|
AS $$
|
|
BEGIN
|
|
NEW.updated_at = NOW();
|
|
RETURN NEW;
|
|
END;
|
|
$$;
|
|
|
|
-- Core tables (in dependency order)
|
|
|
|
-- users
|
|
CREATE TABLE IF NOT EXISTS public.users (
|
|
id SERIAL PRIMARY KEY,
|
|
email character varying(255) NOT NULL UNIQUE,
|
|
password_hash character varying(255) NOT NULL,
|
|
role character varying(50) DEFAULT 'user',
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- settings
|
|
CREATE TABLE IF NOT EXISTS public.settings (
|
|
key character varying(255) PRIMARY KEY,
|
|
value text,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- azdhs_list
|
|
CREATE TABLE IF NOT EXISTS public.azdhs_list (
|
|
id SERIAL PRIMARY KEY,
|
|
name character varying(255) NOT NULL,
|
|
company_name character varying(255),
|
|
slug character varying(255),
|
|
address character varying(500),
|
|
city character varying(100),
|
|
state character varying(2) DEFAULT 'AZ',
|
|
zip character varying(10),
|
|
phone character varying(20),
|
|
email character varying(255),
|
|
status_line text,
|
|
azdhs_url text,
|
|
latitude numeric(10,8),
|
|
longitude numeric(11,8),
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
website text,
|
|
dba_name character varying(255),
|
|
google_rating numeric(2,1),
|
|
google_review_count integer
|
|
);
|
|
|
|
-- dispensaries
|
|
CREATE TABLE IF NOT EXISTS public.dispensaries (
|
|
id SERIAL PRIMARY KEY,
|
|
azdhs_id integer UNIQUE REFERENCES public.azdhs_list(id),
|
|
name character varying(255) NOT NULL,
|
|
company_name character varying(255),
|
|
dba_name character varying(255),
|
|
slug character varying(255) UNIQUE,
|
|
address character varying(500),
|
|
city character varying(100),
|
|
state character varying(2) DEFAULT 'AZ',
|
|
zip character varying(10),
|
|
phone character varying(20),
|
|
email character varying(255),
|
|
website text,
|
|
menu_url text,
|
|
latitude numeric(10,8),
|
|
longitude numeric(11,8),
|
|
hours_of_operation jsonb,
|
|
social_media jsonb,
|
|
license_number character varying(100),
|
|
license_type character varying(50),
|
|
license_status character varying(50),
|
|
license_expiry date,
|
|
google_place_id character varying(255),
|
|
google_rating numeric(2,1),
|
|
google_review_count integer,
|
|
yelp_id character varying(255),
|
|
yelp_rating numeric(2,1),
|
|
yelp_review_count integer,
|
|
last_enriched_at timestamp without time zone,
|
|
menu_provider character varying(100),
|
|
menu_provider_confidence integer,
|
|
provider_type character varying(100),
|
|
provider_detected_at timestamp without time zone,
|
|
detection_method character varying(100),
|
|
detection_metadata jsonb,
|
|
menu_scrape_status character varying(50),
|
|
menu_scrape_error text,
|
|
menu_last_scraped_at timestamp without time zone,
|
|
menu_product_count integer DEFAULT 0,
|
|
crawl_status character varying(50) DEFAULT 'idle',
|
|
last_crawl_at timestamp without time zone,
|
|
next_crawl_at timestamp without time zone,
|
|
crawl_frequency_hours integer DEFAULT 24,
|
|
consecutive_failures integer DEFAULT 0,
|
|
scrape_enabled boolean DEFAULT true,
|
|
scrape_priority integer DEFAULT 0,
|
|
crawler_mode character varying(50) DEFAULT 'sandbox',
|
|
crawler_status character varying(50) DEFAULT 'idle',
|
|
product_crawler_mode character varying(50) DEFAULT 'sandbox',
|
|
product_provider character varying(100),
|
|
specials_crawler_mode character varying(50) DEFAULT 'disabled',
|
|
specials_provider character varying(100),
|
|
brand_crawler_mode character varying(50) DEFAULT 'disabled',
|
|
brand_provider character varying(100),
|
|
metadata_crawler_mode character varying(50) DEFAULT 'disabled',
|
|
metadata_provider character varying(100),
|
|
notes text,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
platform_dispensary_id character varying(100),
|
|
dutchie_url text,
|
|
dutchie_detected_at timestamp without time zone,
|
|
dutchie_retailer_id character varying(100),
|
|
external_id character varying(255)
|
|
);
|
|
|
|
-- stores
|
|
CREATE TABLE IF NOT EXISTS public.stores (
|
|
id SERIAL PRIMARY KEY,
|
|
name character varying(255) NOT NULL,
|
|
slug character varying(255) UNIQUE,
|
|
menu_url text,
|
|
website_url text,
|
|
image_url text,
|
|
description text,
|
|
dutchie_plus boolean DEFAULT false,
|
|
city character varying(100),
|
|
state character varying(50),
|
|
address text,
|
|
phone character varying(20),
|
|
hours jsonb,
|
|
logo_url text,
|
|
logo_public_id character varying(255),
|
|
logo_width integer,
|
|
logo_height integer,
|
|
scrape_enabled boolean DEFAULT true,
|
|
last_scraped_at timestamp without time zone,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
status character varying(50) DEFAULT 'active',
|
|
dispensary_id integer REFERENCES public.dispensaries(id) ON DELETE SET NULL
|
|
);
|
|
|
|
-- api_tokens
|
|
CREATE TABLE IF NOT EXISTS public.api_tokens (
|
|
id SERIAL PRIMARY KEY,
|
|
name character varying(255) NOT NULL,
|
|
token character varying(255) NOT NULL UNIQUE,
|
|
description text,
|
|
user_id integer REFERENCES public.users(id) ON DELETE CASCADE,
|
|
active boolean DEFAULT true,
|
|
rate_limit integer DEFAULT 100,
|
|
allowed_endpoints text[],
|
|
expires_at timestamp without time zone,
|
|
last_used_at timestamp without time zone,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- api_token_usage
|
|
CREATE TABLE IF NOT EXISTS public.api_token_usage (
|
|
id SERIAL PRIMARY KEY,
|
|
token_id integer REFERENCES public.api_tokens(id) ON DELETE CASCADE,
|
|
endpoint character varying(255) NOT NULL,
|
|
method character varying(10) NOT NULL,
|
|
status_code integer,
|
|
response_time_ms integer,
|
|
request_size integer,
|
|
response_size integer,
|
|
ip_address inet,
|
|
user_agent text,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- categories
|
|
CREATE TABLE IF NOT EXISTS public.categories (
|
|
id SERIAL PRIMARY KEY,
|
|
store_id integer REFERENCES public.stores(id) ON DELETE CASCADE,
|
|
dispensary_id integer REFERENCES public.dispensaries(id) ON DELETE CASCADE,
|
|
name character varying(255) NOT NULL,
|
|
slug character varying(255) NOT NULL,
|
|
dutchie_url text NOT NULL,
|
|
scrape_enabled boolean DEFAULT true,
|
|
last_scraped_at timestamp without time zone,
|
|
product_count integer DEFAULT 0,
|
|
parent_id integer REFERENCES public.categories(id) ON DELETE CASCADE,
|
|
path character varying(500),
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(store_id, slug)
|
|
);
|
|
|
|
-- products
|
|
CREATE TABLE IF NOT EXISTS public.products (
|
|
id SERIAL PRIMARY KEY,
|
|
store_id integer REFERENCES public.stores(id) ON DELETE CASCADE,
|
|
dispensary_id integer REFERENCES public.dispensaries(id) ON DELETE CASCADE,
|
|
category_id integer REFERENCES public.categories(id) ON DELETE SET NULL,
|
|
name character varying(255) NOT NULL,
|
|
slug character varying(255) NOT NULL,
|
|
brand character varying(255),
|
|
brand_slug character varying(255),
|
|
brand_external_id character varying(100),
|
|
enterprise_product_id character varying(100),
|
|
category character varying(100),
|
|
subcategory character varying(100),
|
|
strain_type character varying(50),
|
|
description text,
|
|
effects text[],
|
|
price numeric(10,2),
|
|
special_price numeric(10,2),
|
|
is_on_special boolean DEFAULT false,
|
|
weight character varying(50),
|
|
unit character varying(50),
|
|
thc_percentage numeric(5,2),
|
|
cbd_percentage numeric(5,2),
|
|
terpenes text[],
|
|
image_url text,
|
|
image_public_id character varying(255),
|
|
image_width integer,
|
|
image_height integer,
|
|
dutchie_id character varying(100),
|
|
dutchie_url text,
|
|
sku character varying(100),
|
|
in_stock boolean DEFAULT true,
|
|
stock_status character varying(50) DEFAULT 'in_stock',
|
|
stock_quantity integer,
|
|
availability_status character varying(50) DEFAULT 'available',
|
|
status character varying(50) DEFAULT 'active',
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(store_id, slug)
|
|
);
|
|
|
|
-- dutchie_products (AZ pipeline products)
|
|
CREATE TABLE IF NOT EXISTS public.dutchie_products (
|
|
id SERIAL PRIMARY KEY,
|
|
dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE,
|
|
external_product_id character varying(100) NOT NULL,
|
|
name character varying(500) NOT NULL,
|
|
brand_name character varying(255),
|
|
category character varying(100),
|
|
subcategory character varying(100),
|
|
strain_type character varying(50),
|
|
description text,
|
|
effects text[],
|
|
thc_content character varying(50),
|
|
cbd_content character varying(50),
|
|
primary_image_url text,
|
|
additional_images text[],
|
|
stock_status character varying(50) DEFAULT 'in_stock',
|
|
first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(dispensary_id, external_product_id)
|
|
);
|
|
|
|
-- dutchie_product_snapshots
|
|
CREATE TABLE IF NOT EXISTS public.dutchie_product_snapshots (
|
|
id SERIAL PRIMARY KEY,
|
|
dutchie_product_id integer NOT NULL REFERENCES public.dutchie_products(id) ON DELETE CASCADE,
|
|
dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE,
|
|
options jsonb,
|
|
raw_product_data jsonb,
|
|
crawled_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- campaigns
|
|
CREATE TABLE IF NOT EXISTS public.campaigns (
|
|
id SERIAL PRIMARY KEY,
|
|
name character varying(255) NOT NULL,
|
|
slug character varying(255) NOT NULL UNIQUE,
|
|
description text,
|
|
display_style character varying(50) DEFAULT 'grid',
|
|
active boolean DEFAULT true,
|
|
start_date timestamp without time zone,
|
|
end_date timestamp without time zone,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- campaign_products
|
|
CREATE TABLE IF NOT EXISTS public.campaign_products (
|
|
id SERIAL PRIMARY KEY,
|
|
campaign_id integer REFERENCES public.campaigns(id) ON DELETE CASCADE,
|
|
product_id integer REFERENCES public.products(id) ON DELETE CASCADE,
|
|
display_order integer DEFAULT 0,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(campaign_id, product_id)
|
|
);
|
|
|
|
-- clicks
|
|
CREATE TABLE IF NOT EXISTS public.clicks (
|
|
id SERIAL PRIMARY KEY,
|
|
product_id integer REFERENCES public.products(id) ON DELETE CASCADE,
|
|
campaign_id integer REFERENCES public.campaigns(id) ON DELETE SET NULL,
|
|
ip_address character varying(45),
|
|
user_agent text,
|
|
referer text,
|
|
clicked_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- proxies
|
|
CREATE TABLE IF NOT EXISTS public.proxies (
|
|
id SERIAL PRIMARY KEY,
|
|
host character varying(255) NOT NULL,
|
|
port integer NOT NULL,
|
|
protocol character varying(10) DEFAULT 'http',
|
|
username character varying(255),
|
|
password character varying(255),
|
|
active boolean DEFAULT true,
|
|
is_anonymous boolean DEFAULT false,
|
|
last_tested_at timestamp without time zone,
|
|
last_test_result boolean,
|
|
response_time_ms integer,
|
|
fail_count integer DEFAULT 0,
|
|
success_count integer DEFAULT 0,
|
|
country_code character varying(2),
|
|
state character varying(50),
|
|
city character varying(100),
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(host, port, protocol)
|
|
);
|
|
|
|
-- failed_proxies
|
|
CREATE TABLE IF NOT EXISTS public.failed_proxies (
|
|
id SERIAL PRIMARY KEY,
|
|
host character varying(255) NOT NULL,
|
|
port integer NOT NULL,
|
|
protocol character varying(10) DEFAULT 'http',
|
|
failure_reason text,
|
|
failed_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(host, port, protocol)
|
|
);
|
|
|
|
-- proxy_test_jobs
|
|
CREATE TABLE IF NOT EXISTS public.proxy_test_jobs (
|
|
id SERIAL PRIMARY KEY,
|
|
status character varying(50) DEFAULT 'pending',
|
|
total_proxies integer DEFAULT 0,
|
|
tested_proxies integer DEFAULT 0,
|
|
successful_proxies integer DEFAULT 0,
|
|
failed_proxies integer DEFAULT 0,
|
|
started_at timestamp without time zone,
|
|
completed_at timestamp without time zone,
|
|
error_message text,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- crawl_jobs
|
|
CREATE TABLE IF NOT EXISTS public.crawl_jobs (
|
|
id SERIAL PRIMARY KEY,
|
|
store_id integer REFERENCES public.stores(id) ON DELETE CASCADE,
|
|
status character varying(50) DEFAULT 'pending',
|
|
job_type character varying(50) DEFAULT 'full',
|
|
category_slug character varying(255),
|
|
priority integer DEFAULT 0,
|
|
scheduled_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
started_at timestamp without time zone,
|
|
completed_at timestamp without time zone,
|
|
products_found integer DEFAULT 0,
|
|
products_updated integer DEFAULT 0,
|
|
products_created integer DEFAULT 0,
|
|
error_message text,
|
|
retry_count integer DEFAULT 0,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- crawler_schedule
|
|
CREATE TABLE IF NOT EXISTS public.crawler_schedule (
|
|
id SERIAL PRIMARY KEY,
|
|
schedule_type character varying(100) NOT NULL UNIQUE,
|
|
cron_expression character varying(100) NOT NULL,
|
|
is_active boolean DEFAULT true,
|
|
last_run_at timestamp without time zone,
|
|
next_run_at timestamp without time zone,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- store_crawl_schedule
|
|
CREATE TABLE IF NOT EXISTS public.store_crawl_schedule (
|
|
id SERIAL PRIMARY KEY,
|
|
store_id integer NOT NULL REFERENCES public.stores(id) ON DELETE CASCADE UNIQUE,
|
|
cron_expression character varying(100) NOT NULL,
|
|
is_active boolean DEFAULT true,
|
|
priority integer DEFAULT 0,
|
|
last_run_at timestamp without time zone,
|
|
next_run_at timestamp without time zone,
|
|
last_status character varying(50),
|
|
last_error text,
|
|
consecutive_failures integer DEFAULT 0,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- dispensary_crawl_schedule
|
|
CREATE TABLE IF NOT EXISTS public.dispensary_crawl_schedule (
|
|
id SERIAL PRIMARY KEY,
|
|
dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE UNIQUE,
|
|
cron_expression character varying(100) NOT NULL,
|
|
is_active boolean DEFAULT true,
|
|
priority integer DEFAULT 0,
|
|
last_run_at timestamp without time zone,
|
|
next_run_at timestamp without time zone,
|
|
last_status character varying(50),
|
|
last_error text,
|
|
consecutive_failures integer DEFAULT 0,
|
|
metadata jsonb,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- dispensary_crawl_jobs
|
|
CREATE TABLE IF NOT EXISTS public.dispensary_crawl_jobs (
|
|
id SERIAL PRIMARY KEY,
|
|
dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE,
|
|
schedule_id integer REFERENCES public.dispensary_crawl_schedule(id) ON DELETE SET NULL,
|
|
status character varying(50) DEFAULT 'pending',
|
|
job_type character varying(50) DEFAULT 'full',
|
|
priority integer DEFAULT 0,
|
|
scheduled_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
started_at timestamp without time zone,
|
|
completed_at timestamp without time zone,
|
|
products_found integer DEFAULT 0,
|
|
products_updated integer DEFAULT 0,
|
|
products_created integer DEFAULT 0,
|
|
brands_found integer DEFAULT 0,
|
|
error_message text,
|
|
metadata jsonb,
|
|
retry_count integer DEFAULT 0,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- dispensary_changes
|
|
CREATE TABLE IF NOT EXISTS public.dispensary_changes (
|
|
id SERIAL PRIMARY KEY,
|
|
dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE,
|
|
field_name character varying(100) NOT NULL,
|
|
old_value text,
|
|
new_value text,
|
|
status character varying(50) DEFAULT 'pending',
|
|
source character varying(100),
|
|
notes text,
|
|
requires_recrawl boolean DEFAULT false,
|
|
reviewed_by integer REFERENCES public.users(id),
|
|
reviewed_at timestamp without time zone,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- crawler_templates
|
|
CREATE TABLE IF NOT EXISTS public.crawler_templates (
|
|
id SERIAL PRIMARY KEY,
|
|
provider character varying(100) NOT NULL,
|
|
name character varying(255) NOT NULL,
|
|
version integer DEFAULT 1,
|
|
description text,
|
|
config jsonb NOT NULL,
|
|
is_active boolean DEFAULT true,
|
|
is_default_for_provider boolean DEFAULT false,
|
|
success_count integer DEFAULT 0,
|
|
failure_count integer DEFAULT 0,
|
|
last_used_at timestamp without time zone,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(provider, name, version)
|
|
);
|
|
|
|
-- crawler_sandboxes
|
|
CREATE TABLE IF NOT EXISTS public.crawler_sandboxes (
|
|
id SERIAL PRIMARY KEY,
|
|
dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE,
|
|
category character varying(100) NOT NULL,
|
|
mode character varying(50) DEFAULT 'testing',
|
|
status character varying(50) DEFAULT 'pending',
|
|
suspected_menu_provider character varying(100),
|
|
template_name character varying(255),
|
|
config_overrides jsonb,
|
|
last_test_at timestamp without time zone,
|
|
last_test_result text,
|
|
test_count integer DEFAULT 0,
|
|
success_count integer DEFAULT 0,
|
|
notes text,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- sandbox_crawl_jobs
|
|
CREATE TABLE IF NOT EXISTS public.sandbox_crawl_jobs (
|
|
id SERIAL PRIMARY KEY,
|
|
sandbox_id integer REFERENCES public.crawler_sandboxes(id) ON DELETE SET NULL,
|
|
dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE,
|
|
category character varying(100) NOT NULL,
|
|
status character varying(50) DEFAULT 'pending',
|
|
priority integer DEFAULT 0,
|
|
scheduled_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
started_at timestamp without time zone,
|
|
completed_at timestamp without time zone,
|
|
items_found integer DEFAULT 0,
|
|
items_saved integer DEFAULT 0,
|
|
error_message text,
|
|
result_data jsonb,
|
|
retry_count integer DEFAULT 0,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- brands
|
|
CREATE TABLE IF NOT EXISTS public.brands (
|
|
id SERIAL PRIMARY KEY,
|
|
store_id integer NOT NULL REFERENCES public.stores(id) ON DELETE CASCADE,
|
|
dispensary_id integer REFERENCES public.dispensaries(id),
|
|
name character varying(255) NOT NULL,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
first_seen_at timestamp with time zone DEFAULT now(),
|
|
last_seen_at timestamp with time zone DEFAULT now(),
|
|
UNIQUE(store_id, name)
|
|
);
|
|
|
|
-- brand_scrape_jobs
|
|
CREATE TABLE IF NOT EXISTS public.brand_scrape_jobs (
|
|
id SERIAL PRIMARY KEY,
|
|
dispensary_id integer NOT NULL REFERENCES public.dispensaries(id),
|
|
brand_slug text NOT NULL,
|
|
brand_name text NOT NULL,
|
|
status text DEFAULT 'pending' NOT NULL,
|
|
worker_id text,
|
|
started_at timestamp without time zone,
|
|
completed_at timestamp without time zone,
|
|
products_found integer DEFAULT 0,
|
|
products_saved integer DEFAULT 0,
|
|
error_message text,
|
|
retry_count integer DEFAULT 0,
|
|
created_at timestamp without time zone DEFAULT now(),
|
|
updated_at timestamp without time zone DEFAULT now(),
|
|
UNIQUE(dispensary_id, brand_slug)
|
|
);
|
|
|
|
-- brand_history
|
|
CREATE TABLE IF NOT EXISTS public.brand_history (
|
|
id SERIAL PRIMARY KEY,
|
|
dispensary_id integer NOT NULL REFERENCES public.dispensaries(id) ON DELETE CASCADE,
|
|
brand_name character varying(255) NOT NULL,
|
|
event_type character varying(20) NOT NULL,
|
|
event_at timestamp with time zone DEFAULT now(),
|
|
product_count integer,
|
|
metadata jsonb
|
|
);
|
|
|
|
-- batch_history
|
|
CREATE TABLE IF NOT EXISTS public.batch_history (
|
|
id SERIAL PRIMARY KEY,
|
|
product_id integer REFERENCES public.products(id) ON DELETE CASCADE,
|
|
thc_percentage numeric(5,2),
|
|
cbd_percentage numeric(5,2),
|
|
terpenes text[],
|
|
strain_type character varying(100),
|
|
recorded_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- price_history
|
|
CREATE TABLE IF NOT EXISTS public.price_history (
|
|
id SERIAL PRIMARY KEY,
|
|
product_id integer REFERENCES public.products(id) ON DELETE CASCADE,
|
|
price numeric(10,2) NOT NULL,
|
|
special_price numeric(10,2),
|
|
recorded_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- product_categories
|
|
CREATE TABLE IF NOT EXISTS public.product_categories (
|
|
id SERIAL PRIMARY KEY,
|
|
product_id integer NOT NULL REFERENCES public.products(id) ON DELETE CASCADE,
|
|
category_slug character varying(255) NOT NULL,
|
|
first_seen_at timestamp with time zone DEFAULT now(),
|
|
last_seen_at timestamp with time zone DEFAULT now(),
|
|
UNIQUE(product_id, category_slug)
|
|
);
|
|
|
|
-- specials
|
|
CREATE TABLE IF NOT EXISTS public.specials (
|
|
id SERIAL PRIMARY KEY,
|
|
store_id integer NOT NULL REFERENCES public.stores(id) ON DELETE CASCADE,
|
|
product_id integer REFERENCES public.products(id) ON DELETE CASCADE,
|
|
title character varying(255) NOT NULL,
|
|
description text,
|
|
discount_type character varying(50),
|
|
discount_value numeric(10,2),
|
|
min_purchase numeric(10,2),
|
|
valid_date date,
|
|
start_time time without time zone,
|
|
end_time time without time zone,
|
|
days_of_week text[],
|
|
is_recurring boolean DEFAULT false,
|
|
badge_text character varying(50),
|
|
image_url text,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- jobs (generic job queue)
|
|
CREATE TABLE IF NOT EXISTS public.jobs (
|
|
id SERIAL PRIMARY KEY,
|
|
store_id integer REFERENCES public.stores(id) ON DELETE CASCADE,
|
|
type character varying(100) NOT NULL,
|
|
status character varying(50) DEFAULT 'pending',
|
|
payload jsonb,
|
|
result jsonb,
|
|
error_message text,
|
|
started_at timestamp without time zone,
|
|
completed_at timestamp without time zone,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- wp_dutchie_api_permissions (WordPress API permissions)
|
|
CREATE TABLE IF NOT EXISTS public.wp_dutchie_api_permissions (
|
|
id SERIAL PRIMARY KEY,
|
|
user_name character varying(255) NOT NULL,
|
|
api_key character varying(255) NOT NULL UNIQUE,
|
|
allowed_ips text,
|
|
allowed_domains text,
|
|
is_active smallint DEFAULT 1,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
last_used_at timestamp without time zone,
|
|
store_id integer REFERENCES public.stores(id),
|
|
store_name character varying(255),
|
|
dispensary_id integer REFERENCES public.dispensaries(id)
|
|
);
|
|
|
|
-- Create indexes (only if they don't exist)
|
|
CREATE INDEX IF NOT EXISTS idx_api_token_usage_created_at ON public.api_token_usage(created_at);
|
|
CREATE INDEX IF NOT EXISTS idx_api_token_usage_endpoint ON public.api_token_usage(endpoint);
|
|
CREATE INDEX IF NOT EXISTS idx_api_token_usage_token_id ON public.api_token_usage(token_id);
|
|
CREATE INDEX IF NOT EXISTS idx_api_tokens_active ON public.api_tokens(active);
|
|
CREATE INDEX IF NOT EXISTS idx_api_tokens_token ON public.api_tokens(token);
|
|
CREATE INDEX IF NOT EXISTS idx_batch_history_product ON public.batch_history(product_id, recorded_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_batch_history_recorded ON public.batch_history(recorded_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_brand_history_brand ON public.brand_history(brand_name, event_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_brand_history_dispensary ON public.brand_history(dispensary_id, event_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_brand_history_event ON public.brand_history(event_type, event_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_brand_jobs_dispensary ON public.brand_scrape_jobs(dispensary_id);
|
|
CREATE INDEX IF NOT EXISTS idx_brand_jobs_status ON public.brand_scrape_jobs(status);
|
|
CREATE INDEX IF NOT EXISTS idx_brands_dispensary ON public.brands(dispensary_id);
|
|
CREATE INDEX IF NOT EXISTS idx_brands_last_seen ON public.brands(last_seen_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_brands_store_id ON public.brands(store_id);
|
|
CREATE INDEX IF NOT EXISTS idx_categories_dispensary_id ON public.categories(dispensary_id);
|
|
CREATE INDEX IF NOT EXISTS idx_categories_parent_id ON public.categories(parent_id);
|
|
CREATE INDEX IF NOT EXISTS idx_categories_path ON public.categories(path);
|
|
CREATE INDEX IF NOT EXISTS idx_clicks_campaign_id ON public.clicks(campaign_id);
|
|
CREATE INDEX IF NOT EXISTS idx_clicks_clicked_at ON public.clicks(clicked_at);
|
|
CREATE INDEX IF NOT EXISTS idx_clicks_product_id ON public.clicks(product_id);
|
|
CREATE INDEX IF NOT EXISTS idx_crawl_jobs_status ON public.crawl_jobs(status);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_azdhs_id ON public.dispensaries(azdhs_id);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_city ON public.dispensaries(city);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_crawl_status ON public.dispensaries(crawl_status);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_crawler_mode ON public.dispensaries(crawler_mode);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_crawler_status ON public.dispensaries(crawler_status);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_provider ON public.dispensaries(menu_provider);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_provider_confidence ON public.dispensaries(menu_provider_confidence);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_slug ON public.dispensaries(slug);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_state ON public.dispensaries(state);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensary_changes_created_at ON public.dispensary_changes(created_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensary_changes_dispensary_status ON public.dispensary_changes(dispensary_id, status);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensary_changes_status ON public.dispensary_changes(status);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_dispensary ON public.dispensary_crawl_jobs(dispensary_id, created_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_recent ON public.dispensary_crawl_jobs(created_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_jobs_status ON public.dispensary_crawl_jobs(status);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_schedule_active ON public.dispensary_crawl_schedule(is_active);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensary_crawl_schedule_status ON public.dispensary_crawl_schedule(last_status);
|
|
CREATE INDEX IF NOT EXISTS idx_jobs_status ON public.jobs(status);
|
|
CREATE INDEX IF NOT EXISTS idx_jobs_store_id ON public.jobs(store_id);
|
|
CREATE INDEX IF NOT EXISTS idx_jobs_type ON public.jobs(type);
|
|
CREATE INDEX IF NOT EXISTS idx_price_history_product ON public.price_history(product_id, recorded_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_price_history_recorded ON public.price_history(recorded_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_product_categories_product ON public.product_categories(product_id);
|
|
CREATE INDEX IF NOT EXISTS idx_products_availability_status ON public.products(availability_status);
|
|
CREATE INDEX IF NOT EXISTS idx_products_brand_external ON public.products(brand_external_id);
|
|
CREATE INDEX IF NOT EXISTS idx_products_dispensary_id ON public.products(dispensary_id);
|
|
CREATE INDEX IF NOT EXISTS idx_products_enterprise ON public.products(enterprise_product_id);
|
|
CREATE INDEX IF NOT EXISTS idx_products_is_special ON public.products(is_on_special);
|
|
CREATE INDEX IF NOT EXISTS idx_products_sku ON public.products(sku);
|
|
CREATE INDEX IF NOT EXISTS idx_products_status ON public.products(status);
|
|
CREATE INDEX IF NOT EXISTS idx_products_stock_status ON public.products(stock_status);
|
|
CREATE INDEX IF NOT EXISTS idx_products_subcategory ON public.products(subcategory);
|
|
CREATE INDEX IF NOT EXISTS idx_proxies_location ON public.proxies(country_code, state, city);
|
|
CREATE INDEX IF NOT EXISTS idx_proxy_test_jobs_created_at ON public.proxy_test_jobs(created_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_proxy_test_jobs_status ON public.proxy_test_jobs(status);
|
|
CREATE INDEX IF NOT EXISTS idx_sandbox_category ON public.crawler_sandboxes(category);
|
|
CREATE INDEX IF NOT EXISTS idx_sandbox_dispensary ON public.crawler_sandboxes(dispensary_id);
|
|
CREATE INDEX IF NOT EXISTS idx_sandbox_job_category ON public.sandbox_crawl_jobs(category);
|
|
CREATE INDEX IF NOT EXISTS idx_sandbox_job_dispensary ON public.sandbox_crawl_jobs(dispensary_id);
|
|
CREATE INDEX IF NOT EXISTS idx_sandbox_job_status ON public.sandbox_crawl_jobs(status);
|
|
CREATE INDEX IF NOT EXISTS idx_sandbox_mode ON public.crawler_sandboxes(mode);
|
|
CREATE INDEX IF NOT EXISTS idx_sandbox_status ON public.crawler_sandboxes(status);
|
|
CREATE INDEX IF NOT EXISTS idx_sandbox_suspected_provider ON public.crawler_sandboxes(suspected_menu_provider);
|
|
CREATE INDEX IF NOT EXISTS idx_sandbox_template ON public.crawler_sandboxes(template_name);
|
|
CREATE INDEX IF NOT EXISTS idx_specials_product_id ON public.specials(product_id);
|
|
CREATE INDEX IF NOT EXISTS idx_stores_dispensary_id ON public.stores(dispensary_id);
|
|
CREATE INDEX IF NOT EXISTS idx_template_active ON public.crawler_templates(is_active);
|
|
CREATE INDEX IF NOT EXISTS idx_template_provider ON public.crawler_templates(provider);
|
|
CREATE INDEX IF NOT EXISTS idx_wp_api_permissions_store_id ON public.wp_dutchie_api_permissions(store_id);
|
|
|
|
-- Create triggers (drop first if exists to avoid errors)
|
|
DROP TRIGGER IF EXISTS api_tokens_updated_at ON public.api_tokens;
|
|
CREATE TRIGGER api_tokens_updated_at BEFORE UPDATE ON public.api_tokens FOR EACH ROW EXECUTE FUNCTION public.update_api_token_updated_at();
|
|
|
|
DROP TRIGGER IF EXISTS trigger_crawl_jobs_updated_at ON public.crawl_jobs;
|
|
CREATE TRIGGER trigger_crawl_jobs_updated_at BEFORE UPDATE ON public.crawl_jobs FOR EACH ROW EXECUTE FUNCTION public.update_schedule_updated_at();
|
|
|
|
DROP TRIGGER IF EXISTS trigger_crawler_schedule_updated_at ON public.crawler_schedule;
|
|
CREATE TRIGGER trigger_crawler_schedule_updated_at BEFORE UPDATE ON public.crawler_schedule FOR EACH ROW EXECUTE FUNCTION public.update_schedule_updated_at();
|
|
|
|
DROP TRIGGER IF EXISTS trigger_sandbox_job_updated_at ON public.sandbox_crawl_jobs;
|
|
CREATE TRIGGER trigger_sandbox_job_updated_at BEFORE UPDATE ON public.sandbox_crawl_jobs FOR EACH ROW EXECUTE FUNCTION public.update_sandbox_timestamp();
|
|
|
|
DROP TRIGGER IF EXISTS trigger_sandbox_updated_at ON public.crawler_sandboxes;
|
|
CREATE TRIGGER trigger_sandbox_updated_at BEFORE UPDATE ON public.crawler_sandboxes FOR EACH ROW EXECUTE FUNCTION public.update_sandbox_timestamp();
|
|
|
|
DROP TRIGGER IF EXISTS trigger_set_requires_recrawl ON public.dispensary_changes;
|
|
CREATE TRIGGER trigger_set_requires_recrawl BEFORE INSERT ON public.dispensary_changes FOR EACH ROW EXECUTE FUNCTION public.set_requires_recrawl();
|
|
|
|
DROP TRIGGER IF EXISTS trigger_store_crawl_schedule_updated_at ON public.store_crawl_schedule;
|
|
CREATE TRIGGER trigger_store_crawl_schedule_updated_at BEFORE UPDATE ON public.store_crawl_schedule FOR EACH ROW EXECUTE FUNCTION public.update_schedule_updated_at();
|
|
|
|
DROP TRIGGER IF EXISTS trigger_template_updated_at ON public.crawler_templates;
|
|
CREATE TRIGGER trigger_template_updated_at BEFORE UPDATE ON public.crawler_templates FOR EACH ROW EXECUTE FUNCTION public.update_sandbox_timestamp();
|
|
|
|
DROP TRIGGER IF EXISTS trigger_update_brand_scrape_jobs_timestamp ON public.brand_scrape_jobs;
|
|
CREATE TRIGGER trigger_update_brand_scrape_jobs_timestamp BEFORE UPDATE ON public.brand_scrape_jobs FOR EACH ROW EXECUTE FUNCTION public.update_brand_scrape_jobs_updated_at();
|
|
|
|
-- ============================================
|
|
-- Scheduler tables (job_schedules, job_run_logs)
|
|
-- ============================================
|
|
|
|
-- Function for updating job_schedules.updated_at
|
|
CREATE OR REPLACE FUNCTION public.update_job_schedule_updated_at() RETURNS trigger
|
|
LANGUAGE plpgsql
|
|
AS $$
|
|
BEGIN
|
|
NEW.updated_at = NOW();
|
|
RETURN NEW;
|
|
END;
|
|
$$;
|
|
|
|
-- job_schedules - tracks scheduled job definitions
|
|
CREATE TABLE IF NOT EXISTS public.job_schedules (
|
|
id SERIAL PRIMARY KEY,
|
|
job_name character varying(100) NOT NULL UNIQUE,
|
|
description text,
|
|
enabled boolean DEFAULT true,
|
|
base_interval_minutes integer DEFAULT 240,
|
|
jitter_minutes integer DEFAULT 30,
|
|
last_run_at timestamp without time zone,
|
|
last_status character varying(20),
|
|
last_error_message text,
|
|
last_duration_ms integer,
|
|
next_run_at timestamp without time zone,
|
|
job_config jsonb DEFAULT '{}'::jsonb,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_job_schedules_enabled ON public.job_schedules(enabled);
|
|
CREATE INDEX IF NOT EXISTS idx_job_schedules_next_run ON public.job_schedules(next_run_at) WHERE enabled = true;
|
|
|
|
DROP TRIGGER IF EXISTS trigger_job_schedule_updated_at ON public.job_schedules;
|
|
CREATE TRIGGER trigger_job_schedule_updated_at BEFORE UPDATE ON public.job_schedules FOR EACH ROW EXECUTE FUNCTION public.update_job_schedule_updated_at();
|
|
|
|
-- job_run_logs - tracks individual job execution history
|
|
CREATE TABLE IF NOT EXISTS public.job_run_logs (
|
|
id SERIAL PRIMARY KEY,
|
|
schedule_id integer REFERENCES public.job_schedules(id) ON DELETE SET NULL,
|
|
job_name character varying(100) NOT NULL,
|
|
status character varying(20) NOT NULL,
|
|
started_at timestamp without time zone NOT NULL,
|
|
completed_at timestamp without time zone,
|
|
duration_ms integer,
|
|
error_message text,
|
|
items_processed integer DEFAULT 0,
|
|
items_succeeded integer DEFAULT 0,
|
|
items_failed integer DEFAULT 0,
|
|
metadata jsonb DEFAULT '{}'::jsonb,
|
|
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_job_run_logs_schedule ON public.job_run_logs(schedule_id);
|
|
CREATE INDEX IF NOT EXISTS idx_job_run_logs_job_name ON public.job_run_logs(job_name);
|
|
CREATE INDEX IF NOT EXISTS idx_job_run_logs_started_at ON public.job_run_logs(started_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_job_run_logs_status ON public.job_run_logs(status);
|
|
|
|
-- Insert default admin user if not exists
|
|
INSERT INTO public.users (email, password_hash, role)
|
|
SELECT 'admin@example.com', '$2b$10$K8/KqJyNqJKqJyNqJKqJyOqJKqJyNqJKqJyNqJKqJyNqJKqJyNqJK', 'admin'
|
|
WHERE NOT EXISTS (SELECT 1 FROM public.users WHERE email = 'admin@example.com');
|
|
|
|
-- ============================================
|
|
-- Dashboard views (consolidated schema)
|
|
-- ============================================
|
|
|
|
DROP VIEW IF EXISTS public.v_dashboard_stats CASCADE;
|
|
DROP VIEW IF EXISTS public.v_latest_snapshots CASCADE;
|
|
DROP VIEW IF EXISTS public.v_brands CASCADE;
|
|
DROP VIEW IF EXISTS public.v_categories CASCADE;
|
|
DROP VIEW IF EXISTS public.v_product_brands CASCADE;
|
|
DROP VIEW IF EXISTS public.v_product_categories CASCADE;
|
|
|
|
-- Latest snapshot per product (most recent crawl data)
|
|
CREATE OR REPLACE VIEW public.v_latest_snapshots AS
|
|
SELECT DISTINCT ON (dutchie_product_id)
|
|
s.*
|
|
FROM public.dutchie_product_snapshots s
|
|
ORDER BY dutchie_product_id, crawled_at DESC;
|
|
|
|
-- Dashboard stats
|
|
CREATE OR REPLACE VIEW public.v_dashboard_stats AS
|
|
SELECT
|
|
(SELECT COUNT(*) FROM public.dispensaries WHERE state = 'AZ') as dispensary_count,
|
|
(SELECT COUNT(*) FROM public.dutchie_products) as product_count,
|
|
(SELECT COUNT(*) FROM public.dutchie_product_snapshots WHERE crawled_at > NOW() - INTERVAL '24 hours') as snapshots_24h,
|
|
(SELECT MAX(crawled_at) FROM public.dutchie_product_snapshots) as last_crawl_time,
|
|
(SELECT COUNT(*) FROM public.crawl_jobs WHERE status = 'failed' AND created_at > NOW() - INTERVAL '24 hours') as failed_jobs_24h,
|
|
(SELECT COUNT(DISTINCT brand_name) FROM public.dutchie_products WHERE brand_name IS NOT NULL) as brand_count,
|
|
(SELECT COUNT(DISTINCT (type, subcategory)) FROM public.dutchie_products WHERE type IS NOT NULL) as category_count;
|
|
|
|
-- Brands derived from products
|
|
CREATE OR REPLACE VIEW public.v_brands AS
|
|
SELECT
|
|
brand_name,
|
|
brand_id,
|
|
MAX(brand_logo_url) as brand_logo_url,
|
|
COUNT(*) as product_count,
|
|
COUNT(DISTINCT dispensary_id) as dispensary_count,
|
|
ARRAY_AGG(DISTINCT type) FILTER (WHERE type IS NOT NULL) as product_types
|
|
FROM public.dutchie_products
|
|
WHERE brand_name IS NOT NULL
|
|
GROUP BY brand_name, brand_id
|
|
ORDER BY product_count DESC;
|
|
|
|
-- Categories derived from products
|
|
CREATE OR REPLACE VIEW public.v_categories AS
|
|
SELECT
|
|
type,
|
|
subcategory,
|
|
COUNT(*) as product_count,
|
|
COUNT(DISTINCT dispensary_id) as dispensary_count,
|
|
AVG(thc) as avg_thc,
|
|
MIN(thc) as min_thc,
|
|
MAX(thc) as max_thc
|
|
FROM public.dutchie_products
|
|
WHERE type IS NOT NULL
|
|
GROUP BY type, subcategory
|
|
ORDER BY type, subcategory;
|
|
|
|
-- Dispensary crawl status view (consolidated schema)
|
|
DROP VIEW IF EXISTS public.dispensary_crawl_status CASCADE;
|
|
CREATE OR REPLACE VIEW public.dispensary_crawl_status AS
|
|
SELECT
|
|
d.id AS dispensary_id,
|
|
COALESCE(d.dba_name, d.name) AS dispensary_name,
|
|
d.slug AS dispensary_slug,
|
|
d.city,
|
|
d.state,
|
|
d.menu_url,
|
|
d.menu_type,
|
|
d.scrape_enabled,
|
|
d.last_crawl_at,
|
|
d.crawl_status,
|
|
d.crawl_error,
|
|
cs.cron_expression,
|
|
cs.is_active,
|
|
cs.priority,
|
|
cs.last_run_at,
|
|
cs.next_run_at,
|
|
cs.last_status AS schedule_last_status,
|
|
cs.last_error AS schedule_last_error,
|
|
cs.consecutive_failures,
|
|
j.id AS latest_job_id,
|
|
j.status AS latest_job_status,
|
|
j.job_type AS latest_job_type,
|
|
j.started_at AS latest_job_started,
|
|
j.completed_at AS latest_job_completed,
|
|
j.products_found AS latest_products_found,
|
|
j.products_created AS latest_products_created,
|
|
j.products_updated AS latest_products_updated,
|
|
j.error_message AS latest_job_error
|
|
FROM public.dispensaries d
|
|
LEFT JOIN public.dispensary_crawl_schedule cs ON cs.dispensary_id = d.id
|
|
LEFT JOIN LATERAL (
|
|
SELECT *
|
|
FROM public.dispensary_crawl_jobs dj
|
|
WHERE dj.dispensary_id = d.id
|
|
ORDER BY dj.created_at DESC
|
|
LIMIT 1
|
|
) j ON true
|
|
WHERE d.state = 'AZ';
|
|
|
|
-- Done!
|
|
SELECT 'Migration 031 completed successfully' as status;
|