"use strict"; /** * Dutchie AZ Database Schema * * Creates all tables for the isolated Dutchie Arizona data pipeline. * Run this to initialize the dutchie_az database. */ Object.defineProperty(exports, "__esModule", { value: true }); exports.createSchema = createSchema; exports.dropSchema = dropSchema; exports.schemaExists = schemaExists; exports.ensureSchema = ensureSchema; const connection_1 = require("./connection"); /** * SQL statements to create all tables */ const SCHEMA_SQL = ` -- ============================================================ -- DISPENSARIES TABLE -- Stores discovered Dutchie dispensaries in Arizona -- ============================================================ CREATE TABLE IF NOT EXISTS dispensaries ( id SERIAL PRIMARY KEY, platform VARCHAR(20) NOT NULL DEFAULT 'dutchie', name VARCHAR(255) NOT NULL, slug VARCHAR(255) NOT NULL, city VARCHAR(100) NOT NULL, state VARCHAR(10) NOT NULL DEFAULT 'AZ', postal_code VARCHAR(20), address TEXT, latitude DECIMAL(10, 7), longitude DECIMAL(10, 7), platform_dispensary_id VARCHAR(100), is_delivery BOOLEAN DEFAULT false, is_pickup BOOLEAN DEFAULT true, raw_metadata JSONB, last_crawled_at TIMESTAMPTZ, product_count INTEGER DEFAULT 0, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW(), CONSTRAINT uk_dispensaries_platform_slug UNIQUE (platform, slug, city, state) ); CREATE INDEX IF NOT EXISTS idx_dispensaries_platform ON dispensaries(platform); CREATE INDEX IF NOT EXISTS idx_dispensaries_platform_id ON dispensaries(platform_dispensary_id); CREATE INDEX IF NOT EXISTS idx_dispensaries_state ON dispensaries(state); CREATE INDEX IF NOT EXISTS idx_dispensaries_city ON dispensaries(city); -- ============================================================ -- DUTCHIE_PRODUCTS TABLE -- Canonical product identity per store -- ============================================================ CREATE TABLE IF NOT EXISTS dutchie_products ( id SERIAL PRIMARY KEY, dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE, platform VARCHAR(20) NOT NULL DEFAULT 'dutchie', external_product_id VARCHAR(100) NOT NULL, platform_dispensary_id VARCHAR(100) NOT NULL, c_name VARCHAR(500), name VARCHAR(500) NOT NULL, -- Brand brand_name VARCHAR(255), brand_id VARCHAR(100), brand_logo_url TEXT, -- Classification type VARCHAR(100), subcategory VARCHAR(100), strain_type VARCHAR(50), provider VARCHAR(100), -- Potency thc DECIMAL(10, 4), thc_content DECIMAL(10, 4), cbd DECIMAL(10, 4), cbd_content DECIMAL(10, 4), cannabinoids_v2 JSONB, effects JSONB, -- Status / flags status VARCHAR(50), medical_only BOOLEAN DEFAULT false, rec_only BOOLEAN DEFAULT false, featured BOOLEAN DEFAULT false, coming_soon BOOLEAN DEFAULT false, certificate_of_analysis_enabled BOOLEAN DEFAULT false, is_below_threshold BOOLEAN DEFAULT false, is_below_kiosk_threshold BOOLEAN DEFAULT false, options_below_threshold BOOLEAN DEFAULT false, options_below_kiosk_threshold BOOLEAN DEFAULT false, -- Derived stock status: 'in_stock', 'out_of_stock', 'unknown' stock_status VARCHAR(20) DEFAULT 'unknown', total_quantity_available INTEGER DEFAULT 0, -- Images primary_image_url TEXT, images JSONB, -- Misc measurements JSONB, weight VARCHAR(50), past_c_names TEXT[], created_at_dutchie TIMESTAMPTZ, updated_at_dutchie TIMESTAMPTZ, latest_raw_payload JSONB, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW(), CONSTRAINT uk_dutchie_products UNIQUE (dispensary_id, external_product_id) ); CREATE INDEX IF NOT EXISTS idx_dutchie_products_dispensary ON dutchie_products(dispensary_id); CREATE INDEX IF NOT EXISTS idx_dutchie_products_external_id ON dutchie_products(external_product_id); CREATE INDEX IF NOT EXISTS idx_dutchie_products_platform_disp ON dutchie_products(platform_dispensary_id); CREATE INDEX IF NOT EXISTS idx_dutchie_products_brand ON dutchie_products(brand_name); CREATE INDEX IF NOT EXISTS idx_dutchie_products_type ON dutchie_products(type); CREATE INDEX IF NOT EXISTS idx_dutchie_products_subcategory ON dutchie_products(subcategory); CREATE INDEX IF NOT EXISTS idx_dutchie_products_status ON dutchie_products(status); CREATE INDEX IF NOT EXISTS idx_dutchie_products_strain ON dutchie_products(strain_type); CREATE INDEX IF NOT EXISTS idx_dutchie_products_stock_status ON dutchie_products(stock_status); -- ============================================================ -- DUTCHIE_PRODUCT_SNAPSHOTS TABLE -- Historical state per crawl, includes options[] -- ============================================================ CREATE TABLE IF NOT EXISTS dutchie_product_snapshots ( id SERIAL PRIMARY KEY, dutchie_product_id INTEGER NOT NULL REFERENCES dutchie_products(id) ON DELETE CASCADE, dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE, platform_dispensary_id VARCHAR(100) NOT NULL, external_product_id VARCHAR(100) NOT NULL, pricing_type VARCHAR(20) DEFAULT 'unknown', crawl_mode VARCHAR(20) DEFAULT 'mode_a', -- 'mode_a' (UI parity) or 'mode_b' (max coverage) status VARCHAR(50), featured BOOLEAN DEFAULT false, special BOOLEAN DEFAULT false, medical_only BOOLEAN DEFAULT false, rec_only BOOLEAN DEFAULT false, -- Flag indicating if product was present in feed (false = missing_from_feed snapshot) is_present_in_feed BOOLEAN DEFAULT true, -- Derived stock status stock_status VARCHAR(20) DEFAULT 'unknown', -- Price summary (in cents) rec_min_price_cents INTEGER, rec_max_price_cents INTEGER, rec_min_special_price_cents INTEGER, med_min_price_cents INTEGER, med_max_price_cents INTEGER, med_min_special_price_cents INTEGER, wholesale_min_price_cents INTEGER, -- Inventory summary total_quantity_available INTEGER, total_kiosk_quantity_available INTEGER, manual_inventory BOOLEAN DEFAULT false, is_below_threshold BOOLEAN DEFAULT false, is_below_kiosk_threshold BOOLEAN DEFAULT false, -- Option-level data (from POSMetaData.children) options JSONB, -- Full raw product node raw_payload JSONB NOT NULL, crawled_at TIMESTAMPTZ NOT NULL, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_snapshots_product ON dutchie_product_snapshots(dutchie_product_id); CREATE INDEX IF NOT EXISTS idx_snapshots_dispensary ON dutchie_product_snapshots(dispensary_id); CREATE INDEX IF NOT EXISTS idx_snapshots_crawled_at ON dutchie_product_snapshots(crawled_at); CREATE INDEX IF NOT EXISTS idx_snapshots_platform_disp ON dutchie_product_snapshots(platform_dispensary_id); CREATE INDEX IF NOT EXISTS idx_snapshots_external_id ON dutchie_product_snapshots(external_product_id); CREATE INDEX IF NOT EXISTS idx_snapshots_special ON dutchie_product_snapshots(special) WHERE special = true; CREATE INDEX IF NOT EXISTS idx_snapshots_stock_status ON dutchie_product_snapshots(stock_status); CREATE INDEX IF NOT EXISTS idx_snapshots_crawl_mode ON dutchie_product_snapshots(crawl_mode); -- ============================================================ -- CRAWL_JOBS TABLE -- Tracks crawl execution status -- ============================================================ CREATE TABLE IF NOT EXISTS crawl_jobs ( id SERIAL PRIMARY KEY, job_type VARCHAR(50) NOT NULL, dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE SET NULL, status VARCHAR(20) NOT NULL DEFAULT 'pending', started_at TIMESTAMPTZ, completed_at TIMESTAMPTZ, error_message TEXT, products_found INTEGER, snapshots_created INTEGER, metadata JSONB, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_crawl_jobs_type ON crawl_jobs(job_type); CREATE INDEX IF NOT EXISTS idx_crawl_jobs_status ON crawl_jobs(status); CREATE INDEX IF NOT EXISTS idx_crawl_jobs_dispensary ON crawl_jobs(dispensary_id); CREATE INDEX IF NOT EXISTS idx_crawl_jobs_created ON crawl_jobs(created_at); -- ============================================================ -- JOB_SCHEDULES TABLE -- Stores schedule configuration for recurring jobs with jitter support -- Each job has independent timing that "wanders" over time -- ============================================================ CREATE TABLE IF NOT EXISTS job_schedules ( id SERIAL PRIMARY KEY, job_name VARCHAR(100) NOT NULL UNIQUE, description TEXT, enabled BOOLEAN DEFAULT true, -- Timing configuration (jitter makes times "wander") base_interval_minutes INTEGER NOT NULL DEFAULT 240, -- e.g., 4 hours jitter_minutes INTEGER NOT NULL DEFAULT 30, -- e.g., ±30 min -- Last run tracking last_run_at TIMESTAMPTZ, last_status VARCHAR(20), -- 'success', 'error', 'partial', 'running' last_error_message TEXT, last_duration_ms INTEGER, -- Next run (calculated with jitter after each run) next_run_at TIMESTAMPTZ, -- Additional config job_config JSONB, -- e.g., { pricingType: 'rec', useBothModes: true } created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_job_schedules_enabled ON job_schedules(enabled); CREATE INDEX IF NOT EXISTS idx_job_schedules_next_run ON job_schedules(next_run_at); -- ============================================================ -- JOB_RUN_LOGS TABLE -- Stores history of job runs for monitoring -- ============================================================ CREATE TABLE IF NOT EXISTS job_run_logs ( id SERIAL PRIMARY KEY, schedule_id INTEGER NOT NULL REFERENCES job_schedules(id) ON DELETE CASCADE, job_name VARCHAR(100) NOT NULL, status VARCHAR(20) NOT NULL, -- 'pending', 'running', 'success', 'error', 'partial' started_at TIMESTAMPTZ, completed_at TIMESTAMPTZ, duration_ms INTEGER, error_message TEXT, -- Results summary items_processed INTEGER, items_succeeded INTEGER, items_failed INTEGER, metadata JSONB, -- Additional run details created_at TIMESTAMPTZ DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_job_run_logs_schedule ON job_run_logs(schedule_id); CREATE INDEX IF NOT EXISTS idx_job_run_logs_job_name ON job_run_logs(job_name); CREATE INDEX IF NOT EXISTS idx_job_run_logs_status ON job_run_logs(status); CREATE INDEX IF NOT EXISTS idx_job_run_logs_created ON job_run_logs(created_at); -- ============================================================ -- VIEWS FOR EASY QUERYING -- ============================================================ -- Categories derived from products CREATE OR REPLACE VIEW v_categories AS SELECT type, subcategory, COUNT(DISTINCT id) as product_count, COUNT(DISTINCT dispensary_id) as dispensary_count, AVG(thc) as avg_thc, MIN(thc) as min_thc, MAX(thc) as max_thc FROM dutchie_products WHERE type IS NOT NULL GROUP BY type, subcategory ORDER BY type, subcategory; -- Brands derived from products CREATE OR REPLACE VIEW v_brands AS SELECT brand_name, brand_id, MAX(brand_logo_url) as brand_logo_url, COUNT(DISTINCT id) as product_count, COUNT(DISTINCT dispensary_id) as dispensary_count, ARRAY_AGG(DISTINCT type) FILTER (WHERE type IS NOT NULL) as product_types FROM dutchie_products WHERE brand_name IS NOT NULL GROUP BY brand_name, brand_id ORDER BY product_count DESC; -- Latest snapshot per product (most recent crawl data) CREATE OR REPLACE VIEW v_latest_snapshots AS SELECT DISTINCT ON (dutchie_product_id) s.* FROM dutchie_product_snapshots s ORDER BY dutchie_product_id, crawled_at DESC; -- Dashboard stats CREATE OR REPLACE VIEW v_dashboard_stats AS SELECT (SELECT COUNT(*) FROM dispensaries WHERE state = 'AZ') as dispensary_count, (SELECT COUNT(*) FROM dutchie_products) as product_count, (SELECT COUNT(*) FROM dutchie_product_snapshots WHERE crawled_at > NOW() - INTERVAL '24 hours') as snapshots_24h, (SELECT MAX(crawled_at) FROM dutchie_product_snapshots) as last_crawl_time, (SELECT COUNT(*) FROM crawl_jobs WHERE status = 'failed' AND created_at > NOW() - INTERVAL '24 hours') as failed_jobs_24h, (SELECT COUNT(DISTINCT brand_name) FROM dutchie_products WHERE brand_name IS NOT NULL) as brand_count, (SELECT COUNT(DISTINCT (type, subcategory)) FROM dutchie_products WHERE type IS NOT NULL) as category_count; `; /** * Run the schema migration */ async function createSchema() { console.log('[DutchieAZ Schema] Creating database schema...'); const client = await (0, connection_1.getClient)(); try { await client.query('BEGIN'); // Split into individual statements and execute const statements = SCHEMA_SQL .split(';') .map(s => s.trim()) .filter(s => s.length > 0 && !s.startsWith('--')); for (const statement of statements) { if (statement.trim()) { await client.query(statement + ';'); } } await client.query('COMMIT'); console.log('[DutchieAZ Schema] Schema created successfully'); } catch (error) { await client.query('ROLLBACK'); console.error('[DutchieAZ Schema] Failed to create schema:', error); throw error; } finally { client.release(); } } /** * Drop all tables (for development/testing) */ async function dropSchema() { console.log('[DutchieAZ Schema] Dropping all tables...'); await (0, connection_1.query)(` DROP VIEW IF EXISTS v_dashboard_stats CASCADE; DROP VIEW IF EXISTS v_latest_snapshots CASCADE; DROP VIEW IF EXISTS v_brands CASCADE; DROP VIEW IF EXISTS v_categories CASCADE; DROP TABLE IF EXISTS crawl_schedule CASCADE; DROP TABLE IF EXISTS crawl_jobs CASCADE; DROP TABLE IF EXISTS dutchie_product_snapshots CASCADE; DROP TABLE IF EXISTS dutchie_products CASCADE; DROP TABLE IF EXISTS dispensaries CASCADE; `); console.log('[DutchieAZ Schema] All tables dropped'); } /** * Check if schema exists */ async function schemaExists() { try { const result = await (0, connection_1.query)(` SELECT EXISTS ( SELECT FROM information_schema.tables WHERE table_name = 'dispensaries' ) as exists `); return result.rows[0]?.exists === true; } catch (error) { return false; } } /** * Initialize schema if it doesn't exist */ async function ensureSchema() { const exists = await schemaExists(); if (!exists) { await createSchema(); } else { console.log('[DutchieAZ Schema] Schema already exists'); } }