The job_run_logs table tracks scheduled job orchestration, not individual worker jobs. Worker info (worker_id, worker_hostname) belongs on dispensary_crawl_jobs, not job_run_logs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
406 lines
14 KiB
JavaScript
406 lines
14 KiB
JavaScript
"use strict";
|
|
/**
|
|
* Dutchie AZ Database Schema
|
|
*
|
|
* Creates all tables for the isolated Dutchie Arizona data pipeline.
|
|
* Run this to initialize the dutchie_az database.
|
|
*/
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.createSchema = createSchema;
|
|
exports.dropSchema = dropSchema;
|
|
exports.schemaExists = schemaExists;
|
|
exports.ensureSchema = ensureSchema;
|
|
const connection_1 = require("./connection");
|
|
/**
|
|
* SQL statements to create all tables
|
|
*/
|
|
const SCHEMA_SQL = `
|
|
-- ============================================================
|
|
-- DISPENSARIES TABLE
|
|
-- Stores discovered Dutchie dispensaries in Arizona
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS dispensaries (
|
|
id SERIAL PRIMARY KEY,
|
|
platform VARCHAR(20) NOT NULL DEFAULT 'dutchie',
|
|
name VARCHAR(255) NOT NULL,
|
|
slug VARCHAR(255) NOT NULL,
|
|
city VARCHAR(100) NOT NULL,
|
|
state VARCHAR(10) NOT NULL DEFAULT 'AZ',
|
|
postal_code VARCHAR(20),
|
|
address TEXT,
|
|
latitude DECIMAL(10, 7),
|
|
longitude DECIMAL(10, 7),
|
|
platform_dispensary_id VARCHAR(100),
|
|
is_delivery BOOLEAN DEFAULT false,
|
|
is_pickup BOOLEAN DEFAULT true,
|
|
raw_metadata JSONB,
|
|
last_crawled_at TIMESTAMPTZ,
|
|
product_count INTEGER DEFAULT 0,
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
|
|
|
CONSTRAINT uk_dispensaries_platform_slug UNIQUE (platform, slug, city, state)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_platform ON dispensaries(platform);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_platform_id ON dispensaries(platform_dispensary_id);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_state ON dispensaries(state);
|
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_city ON dispensaries(city);
|
|
|
|
-- ============================================================
|
|
-- DUTCHIE_PRODUCTS TABLE
|
|
-- Canonical product identity per store
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS dutchie_products (
|
|
id SERIAL PRIMARY KEY,
|
|
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
|
platform VARCHAR(20) NOT NULL DEFAULT 'dutchie',
|
|
|
|
external_product_id VARCHAR(100) NOT NULL,
|
|
platform_dispensary_id VARCHAR(100) NOT NULL,
|
|
c_name VARCHAR(500),
|
|
name VARCHAR(500) NOT NULL,
|
|
|
|
-- Brand
|
|
brand_name VARCHAR(255),
|
|
brand_id VARCHAR(100),
|
|
brand_logo_url TEXT,
|
|
|
|
-- Classification
|
|
type VARCHAR(100),
|
|
subcategory VARCHAR(100),
|
|
strain_type VARCHAR(50),
|
|
provider VARCHAR(100),
|
|
|
|
-- Potency
|
|
thc DECIMAL(10, 4),
|
|
thc_content DECIMAL(10, 4),
|
|
cbd DECIMAL(10, 4),
|
|
cbd_content DECIMAL(10, 4),
|
|
cannabinoids_v2 JSONB,
|
|
effects JSONB,
|
|
|
|
-- Status / flags
|
|
status VARCHAR(50),
|
|
medical_only BOOLEAN DEFAULT false,
|
|
rec_only BOOLEAN DEFAULT false,
|
|
featured BOOLEAN DEFAULT false,
|
|
coming_soon BOOLEAN DEFAULT false,
|
|
certificate_of_analysis_enabled BOOLEAN DEFAULT false,
|
|
|
|
is_below_threshold BOOLEAN DEFAULT false,
|
|
is_below_kiosk_threshold BOOLEAN DEFAULT false,
|
|
options_below_threshold BOOLEAN DEFAULT false,
|
|
options_below_kiosk_threshold BOOLEAN DEFAULT false,
|
|
|
|
-- Derived stock status: 'in_stock', 'out_of_stock', 'unknown'
|
|
stock_status VARCHAR(20) DEFAULT 'unknown',
|
|
total_quantity_available INTEGER DEFAULT 0,
|
|
|
|
-- Images
|
|
primary_image_url TEXT,
|
|
images JSONB,
|
|
|
|
-- Misc
|
|
measurements JSONB,
|
|
weight VARCHAR(50),
|
|
past_c_names TEXT[],
|
|
|
|
created_at_dutchie TIMESTAMPTZ,
|
|
updated_at_dutchie TIMESTAMPTZ,
|
|
|
|
latest_raw_payload JSONB,
|
|
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
|
|
|
CONSTRAINT uk_dutchie_products UNIQUE (dispensary_id, external_product_id)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_dutchie_products_dispensary ON dutchie_products(dispensary_id);
|
|
CREATE INDEX IF NOT EXISTS idx_dutchie_products_external_id ON dutchie_products(external_product_id);
|
|
CREATE INDEX IF NOT EXISTS idx_dutchie_products_platform_disp ON dutchie_products(platform_dispensary_id);
|
|
CREATE INDEX IF NOT EXISTS idx_dutchie_products_brand ON dutchie_products(brand_name);
|
|
CREATE INDEX IF NOT EXISTS idx_dutchie_products_type ON dutchie_products(type);
|
|
CREATE INDEX IF NOT EXISTS idx_dutchie_products_subcategory ON dutchie_products(subcategory);
|
|
CREATE INDEX IF NOT EXISTS idx_dutchie_products_status ON dutchie_products(status);
|
|
CREATE INDEX IF NOT EXISTS idx_dutchie_products_strain ON dutchie_products(strain_type);
|
|
CREATE INDEX IF NOT EXISTS idx_dutchie_products_stock_status ON dutchie_products(stock_status);
|
|
|
|
-- ============================================================
|
|
-- DUTCHIE_PRODUCT_SNAPSHOTS TABLE
|
|
-- Historical state per crawl, includes options[]
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS dutchie_product_snapshots (
|
|
id SERIAL PRIMARY KEY,
|
|
dutchie_product_id INTEGER NOT NULL REFERENCES dutchie_products(id) ON DELETE CASCADE,
|
|
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
|
platform_dispensary_id VARCHAR(100) NOT NULL,
|
|
external_product_id VARCHAR(100) NOT NULL,
|
|
pricing_type VARCHAR(20) DEFAULT 'unknown',
|
|
crawl_mode VARCHAR(20) DEFAULT 'mode_a', -- 'mode_a' (UI parity) or 'mode_b' (max coverage)
|
|
|
|
status VARCHAR(50),
|
|
featured BOOLEAN DEFAULT false,
|
|
special BOOLEAN DEFAULT false,
|
|
medical_only BOOLEAN DEFAULT false,
|
|
rec_only BOOLEAN DEFAULT false,
|
|
|
|
-- Flag indicating if product was present in feed (false = missing_from_feed snapshot)
|
|
is_present_in_feed BOOLEAN DEFAULT true,
|
|
|
|
-- Derived stock status
|
|
stock_status VARCHAR(20) DEFAULT 'unknown',
|
|
|
|
-- Price summary (in cents)
|
|
rec_min_price_cents INTEGER,
|
|
rec_max_price_cents INTEGER,
|
|
rec_min_special_price_cents INTEGER,
|
|
med_min_price_cents INTEGER,
|
|
med_max_price_cents INTEGER,
|
|
med_min_special_price_cents INTEGER,
|
|
wholesale_min_price_cents INTEGER,
|
|
|
|
-- Inventory summary
|
|
total_quantity_available INTEGER,
|
|
total_kiosk_quantity_available INTEGER,
|
|
manual_inventory BOOLEAN DEFAULT false,
|
|
is_below_threshold BOOLEAN DEFAULT false,
|
|
is_below_kiosk_threshold BOOLEAN DEFAULT false,
|
|
|
|
-- Option-level data (from POSMetaData.children)
|
|
options JSONB,
|
|
|
|
-- Full raw product node
|
|
raw_payload JSONB NOT NULL,
|
|
|
|
crawled_at TIMESTAMPTZ NOT NULL,
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_snapshots_product ON dutchie_product_snapshots(dutchie_product_id);
|
|
CREATE INDEX IF NOT EXISTS idx_snapshots_dispensary ON dutchie_product_snapshots(dispensary_id);
|
|
CREATE INDEX IF NOT EXISTS idx_snapshots_crawled_at ON dutchie_product_snapshots(crawled_at);
|
|
CREATE INDEX IF NOT EXISTS idx_snapshots_platform_disp ON dutchie_product_snapshots(platform_dispensary_id);
|
|
CREATE INDEX IF NOT EXISTS idx_snapshots_external_id ON dutchie_product_snapshots(external_product_id);
|
|
CREATE INDEX IF NOT EXISTS idx_snapshots_special ON dutchie_product_snapshots(special) WHERE special = true;
|
|
CREATE INDEX IF NOT EXISTS idx_snapshots_stock_status ON dutchie_product_snapshots(stock_status);
|
|
CREATE INDEX IF NOT EXISTS idx_snapshots_crawl_mode ON dutchie_product_snapshots(crawl_mode);
|
|
|
|
-- ============================================================
|
|
-- CRAWL_JOBS TABLE
|
|
-- Tracks crawl execution status
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS crawl_jobs (
|
|
id SERIAL PRIMARY KEY,
|
|
job_type VARCHAR(50) NOT NULL,
|
|
dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE SET NULL,
|
|
status VARCHAR(20) NOT NULL DEFAULT 'pending',
|
|
started_at TIMESTAMPTZ,
|
|
completed_at TIMESTAMPTZ,
|
|
error_message TEXT,
|
|
products_found INTEGER,
|
|
snapshots_created INTEGER,
|
|
metadata JSONB,
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_crawl_jobs_type ON crawl_jobs(job_type);
|
|
CREATE INDEX IF NOT EXISTS idx_crawl_jobs_status ON crawl_jobs(status);
|
|
CREATE INDEX IF NOT EXISTS idx_crawl_jobs_dispensary ON crawl_jobs(dispensary_id);
|
|
CREATE INDEX IF NOT EXISTS idx_crawl_jobs_created ON crawl_jobs(created_at);
|
|
|
|
-- ============================================================
|
|
-- JOB_SCHEDULES TABLE
|
|
-- Stores schedule configuration for recurring jobs with jitter support
|
|
-- Each job has independent timing that "wanders" over time
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS job_schedules (
|
|
id SERIAL PRIMARY KEY,
|
|
job_name VARCHAR(100) NOT NULL UNIQUE,
|
|
description TEXT,
|
|
enabled BOOLEAN DEFAULT true,
|
|
|
|
-- Timing configuration (jitter makes times "wander")
|
|
base_interval_minutes INTEGER NOT NULL DEFAULT 240, -- e.g., 4 hours
|
|
jitter_minutes INTEGER NOT NULL DEFAULT 30, -- e.g., ±30 min
|
|
|
|
-- Last run tracking
|
|
last_run_at TIMESTAMPTZ,
|
|
last_status VARCHAR(20), -- 'success', 'error', 'partial', 'running'
|
|
last_error_message TEXT,
|
|
last_duration_ms INTEGER,
|
|
|
|
-- Next run (calculated with jitter after each run)
|
|
next_run_at TIMESTAMPTZ,
|
|
|
|
-- Additional config
|
|
job_config JSONB, -- e.g., { pricingType: 'rec', useBothModes: true }
|
|
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_job_schedules_enabled ON job_schedules(enabled);
|
|
CREATE INDEX IF NOT EXISTS idx_job_schedules_next_run ON job_schedules(next_run_at);
|
|
|
|
-- ============================================================
|
|
-- JOB_RUN_LOGS TABLE
|
|
-- Stores history of job runs for monitoring
|
|
-- ============================================================
|
|
CREATE TABLE IF NOT EXISTS job_run_logs (
|
|
id SERIAL PRIMARY KEY,
|
|
schedule_id INTEGER NOT NULL REFERENCES job_schedules(id) ON DELETE CASCADE,
|
|
job_name VARCHAR(100) NOT NULL,
|
|
status VARCHAR(20) NOT NULL, -- 'pending', 'running', 'success', 'error', 'partial'
|
|
started_at TIMESTAMPTZ,
|
|
completed_at TIMESTAMPTZ,
|
|
duration_ms INTEGER,
|
|
error_message TEXT,
|
|
|
|
-- Results summary
|
|
items_processed INTEGER,
|
|
items_succeeded INTEGER,
|
|
items_failed INTEGER,
|
|
|
|
metadata JSONB, -- Additional run details
|
|
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_job_run_logs_schedule ON job_run_logs(schedule_id);
|
|
CREATE INDEX IF NOT EXISTS idx_job_run_logs_job_name ON job_run_logs(job_name);
|
|
CREATE INDEX IF NOT EXISTS idx_job_run_logs_status ON job_run_logs(status);
|
|
CREATE INDEX IF NOT EXISTS idx_job_run_logs_created ON job_run_logs(created_at);
|
|
|
|
-- ============================================================
|
|
-- VIEWS FOR EASY QUERYING
|
|
-- ============================================================
|
|
|
|
-- Categories derived from products
|
|
CREATE OR REPLACE VIEW v_categories AS
|
|
SELECT
|
|
type,
|
|
subcategory,
|
|
COUNT(DISTINCT id) as product_count,
|
|
COUNT(DISTINCT dispensary_id) as dispensary_count,
|
|
AVG(thc) as avg_thc,
|
|
MIN(thc) as min_thc,
|
|
MAX(thc) as max_thc
|
|
FROM dutchie_products
|
|
WHERE type IS NOT NULL
|
|
GROUP BY type, subcategory
|
|
ORDER BY type, subcategory;
|
|
|
|
-- Brands derived from products
|
|
CREATE OR REPLACE VIEW v_brands AS
|
|
SELECT
|
|
brand_name,
|
|
brand_id,
|
|
MAX(brand_logo_url) as brand_logo_url,
|
|
COUNT(DISTINCT id) as product_count,
|
|
COUNT(DISTINCT dispensary_id) as dispensary_count,
|
|
ARRAY_AGG(DISTINCT type) FILTER (WHERE type IS NOT NULL) as product_types
|
|
FROM dutchie_products
|
|
WHERE brand_name IS NOT NULL
|
|
GROUP BY brand_name, brand_id
|
|
ORDER BY product_count DESC;
|
|
|
|
-- Latest snapshot per product (most recent crawl data)
|
|
CREATE OR REPLACE VIEW v_latest_snapshots AS
|
|
SELECT DISTINCT ON (dutchie_product_id)
|
|
s.*
|
|
FROM dutchie_product_snapshots s
|
|
ORDER BY dutchie_product_id, crawled_at DESC;
|
|
|
|
-- Dashboard stats
|
|
CREATE OR REPLACE VIEW v_dashboard_stats AS
|
|
SELECT
|
|
(SELECT COUNT(*) FROM dispensaries WHERE state = 'AZ') as dispensary_count,
|
|
(SELECT COUNT(*) FROM dutchie_products) as product_count,
|
|
(SELECT COUNT(*) FROM dutchie_product_snapshots WHERE crawled_at > NOW() - INTERVAL '24 hours') as snapshots_24h,
|
|
(SELECT MAX(crawled_at) FROM dutchie_product_snapshots) as last_crawl_time,
|
|
(SELECT COUNT(*) FROM crawl_jobs WHERE status = 'failed' AND created_at > NOW() - INTERVAL '24 hours') as failed_jobs_24h,
|
|
(SELECT COUNT(DISTINCT brand_name) FROM dutchie_products WHERE brand_name IS NOT NULL) as brand_count,
|
|
(SELECT COUNT(DISTINCT (type, subcategory)) FROM dutchie_products WHERE type IS NOT NULL) as category_count;
|
|
`;
|
|
/**
|
|
* Run the schema migration
|
|
*/
|
|
async function createSchema() {
|
|
console.log('[DutchieAZ Schema] Creating database schema...');
|
|
const client = await (0, connection_1.getClient)();
|
|
try {
|
|
await client.query('BEGIN');
|
|
// Split into individual statements and execute
|
|
const statements = SCHEMA_SQL
|
|
.split(';')
|
|
.map(s => s.trim())
|
|
.filter(s => s.length > 0 && !s.startsWith('--'));
|
|
for (const statement of statements) {
|
|
if (statement.trim()) {
|
|
await client.query(statement + ';');
|
|
}
|
|
}
|
|
await client.query('COMMIT');
|
|
console.log('[DutchieAZ Schema] Schema created successfully');
|
|
}
|
|
catch (error) {
|
|
await client.query('ROLLBACK');
|
|
console.error('[DutchieAZ Schema] Failed to create schema:', error);
|
|
throw error;
|
|
}
|
|
finally {
|
|
client.release();
|
|
}
|
|
}
|
|
/**
|
|
* Drop all tables (for development/testing)
|
|
*/
|
|
async function dropSchema() {
|
|
console.log('[DutchieAZ Schema] Dropping all tables...');
|
|
await (0, connection_1.query)(`
|
|
DROP VIEW IF EXISTS v_dashboard_stats CASCADE;
|
|
DROP VIEW IF EXISTS v_latest_snapshots CASCADE;
|
|
DROP VIEW IF EXISTS v_brands CASCADE;
|
|
DROP VIEW IF EXISTS v_categories CASCADE;
|
|
DROP TABLE IF EXISTS crawl_schedule CASCADE;
|
|
DROP TABLE IF EXISTS crawl_jobs CASCADE;
|
|
DROP TABLE IF EXISTS dutchie_product_snapshots CASCADE;
|
|
DROP TABLE IF EXISTS dutchie_products CASCADE;
|
|
DROP TABLE IF EXISTS dispensaries CASCADE;
|
|
`);
|
|
console.log('[DutchieAZ Schema] All tables dropped');
|
|
}
|
|
/**
|
|
* Check if schema exists
|
|
*/
|
|
async function schemaExists() {
|
|
try {
|
|
const result = await (0, connection_1.query)(`
|
|
SELECT EXISTS (
|
|
SELECT FROM information_schema.tables
|
|
WHERE table_name = 'dispensaries'
|
|
) as exists
|
|
`);
|
|
return result.rows[0]?.exists === true;
|
|
}
|
|
catch (error) {
|
|
return false;
|
|
}
|
|
}
|
|
/**
|
|
* Initialize schema if it doesn't exist
|
|
*/
|
|
async function ensureSchema() {
|
|
const exists = await schemaExists();
|
|
if (!exists) {
|
|
await createSchema();
|
|
}
|
|
else {
|
|
console.log('[DutchieAZ Schema] Schema already exists');
|
|
}
|
|
}
|