fix(monitor): remove non-existent worker columns from job_run_logs query

The job_run_logs table tracks scheduled job orchestration, not individual
worker jobs. Worker info (worker_id, worker_hostname) belongs on
dispensary_crawl_jobs, not job_run_logs.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-03 18:45:05 -07:00
parent 54f40d26bb
commit 66e07b2009
466 changed files with 84988 additions and 9226 deletions

View File

@@ -0,0 +1,79 @@
"use strict";
/**
* Dutchie AZ Database Connection
*
* Isolated database connection for Dutchie Arizona data.
* Uses a separate database/schema to prevent cross-contamination with main app data.
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.getDutchieAZPool = getDutchieAZPool;
exports.query = query;
exports.getClient = getClient;
exports.closePool = closePool;
exports.healthCheck = healthCheck;
const pg_1 = require("pg");
// Consolidated DB naming:
// - Prefer CRAWLSY_DATABASE_URL (e.g., crawlsy_local, crawlsy_prod)
// - Then DUTCHIE_AZ_DATABASE_URL (legacy)
// - Finally DATABASE_URL (legacy main DB)
const DUTCHIE_AZ_DATABASE_URL = process.env.CRAWLSY_DATABASE_URL ||
process.env.DUTCHIE_AZ_DATABASE_URL ||
process.env.DATABASE_URL ||
'postgresql://dutchie:dutchie_local_pass@localhost:54320/crawlsy_local';
let pool = null;
/**
* Get the Dutchie AZ database pool (singleton)
*/
function getDutchieAZPool() {
if (!pool) {
pool = new pg_1.Pool({
connectionString: DUTCHIE_AZ_DATABASE_URL,
max: 10,
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 5000,
});
pool.on('error', (err) => {
console.error('[DutchieAZ DB] Unexpected error on idle client:', err);
});
console.log('[DutchieAZ DB] Pool initialized');
}
return pool;
}
/**
* Execute a query on the Dutchie AZ database
*/
async function query(text, params) {
const p = getDutchieAZPool();
const result = await p.query(text, params);
return { rows: result.rows, rowCount: result.rowCount || 0 };
}
/**
* Get a client from the pool for transaction use
*/
async function getClient() {
const p = getDutchieAZPool();
return p.connect();
}
/**
* Close the pool connection
*/
async function closePool() {
if (pool) {
await pool.end();
pool = null;
console.log('[DutchieAZ DB] Pool closed');
}
}
/**
* Check if the database is accessible
*/
async function healthCheck() {
try {
const result = await query('SELECT 1 as ok');
return result.rows.length > 0 && result.rows[0].ok === 1;
}
catch (error) {
console.error('[DutchieAZ DB] Health check failed:', error);
return false;
}
}

30
backend/dist/dutchie-az/db/migrate.js vendored Normal file
View File

@@ -0,0 +1,30 @@
"use strict";
/**
* Dutchie AZ Schema Bootstrap
*
* Run this to create/update the dutchie_az tables (dutchie_products, dutchie_product_snapshots, etc.)
* in the AZ pipeline database. This is separate from the legacy schema.
*
* Usage:
* TS_NODE_TRANSPILE_ONLY=1 npx ts-node src/dutchie-az/db/migrate.ts
* or (after build)
* node dist/dutchie-az/db/migrate.js
*/
Object.defineProperty(exports, "__esModule", { value: true });
const schema_1 = require("./schema");
const connection_1 = require("./connection");
async function main() {
try {
console.log('[DutchieAZ] Running schema migration...');
await (0, schema_1.createSchema)();
console.log('[DutchieAZ] Schema migration complete.');
}
catch (err) {
console.error('[DutchieAZ] Schema migration failed:', err.message);
process.exitCode = 1;
}
finally {
await (0, connection_1.closePool)();
}
}
main();

405
backend/dist/dutchie-az/db/schema.js vendored Normal file
View File

@@ -0,0 +1,405 @@
"use strict";
/**
* Dutchie AZ Database Schema
*
* Creates all tables for the isolated Dutchie Arizona data pipeline.
* Run this to initialize the dutchie_az database.
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.createSchema = createSchema;
exports.dropSchema = dropSchema;
exports.schemaExists = schemaExists;
exports.ensureSchema = ensureSchema;
const connection_1 = require("./connection");
/**
* SQL statements to create all tables
*/
const SCHEMA_SQL = `
-- ============================================================
-- DISPENSARIES TABLE
-- Stores discovered Dutchie dispensaries in Arizona
-- ============================================================
CREATE TABLE IF NOT EXISTS dispensaries (
id SERIAL PRIMARY KEY,
platform VARCHAR(20) NOT NULL DEFAULT 'dutchie',
name VARCHAR(255) NOT NULL,
slug VARCHAR(255) NOT NULL,
city VARCHAR(100) NOT NULL,
state VARCHAR(10) NOT NULL DEFAULT 'AZ',
postal_code VARCHAR(20),
address TEXT,
latitude DECIMAL(10, 7),
longitude DECIMAL(10, 7),
platform_dispensary_id VARCHAR(100),
is_delivery BOOLEAN DEFAULT false,
is_pickup BOOLEAN DEFAULT true,
raw_metadata JSONB,
last_crawled_at TIMESTAMPTZ,
product_count INTEGER DEFAULT 0,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
CONSTRAINT uk_dispensaries_platform_slug UNIQUE (platform, slug, city, state)
);
CREATE INDEX IF NOT EXISTS idx_dispensaries_platform ON dispensaries(platform);
CREATE INDEX IF NOT EXISTS idx_dispensaries_platform_id ON dispensaries(platform_dispensary_id);
CREATE INDEX IF NOT EXISTS idx_dispensaries_state ON dispensaries(state);
CREATE INDEX IF NOT EXISTS idx_dispensaries_city ON dispensaries(city);
-- ============================================================
-- DUTCHIE_PRODUCTS TABLE
-- Canonical product identity per store
-- ============================================================
CREATE TABLE IF NOT EXISTS dutchie_products (
id SERIAL PRIMARY KEY,
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
platform VARCHAR(20) NOT NULL DEFAULT 'dutchie',
external_product_id VARCHAR(100) NOT NULL,
platform_dispensary_id VARCHAR(100) NOT NULL,
c_name VARCHAR(500),
name VARCHAR(500) NOT NULL,
-- Brand
brand_name VARCHAR(255),
brand_id VARCHAR(100),
brand_logo_url TEXT,
-- Classification
type VARCHAR(100),
subcategory VARCHAR(100),
strain_type VARCHAR(50),
provider VARCHAR(100),
-- Potency
thc DECIMAL(10, 4),
thc_content DECIMAL(10, 4),
cbd DECIMAL(10, 4),
cbd_content DECIMAL(10, 4),
cannabinoids_v2 JSONB,
effects JSONB,
-- Status / flags
status VARCHAR(50),
medical_only BOOLEAN DEFAULT false,
rec_only BOOLEAN DEFAULT false,
featured BOOLEAN DEFAULT false,
coming_soon BOOLEAN DEFAULT false,
certificate_of_analysis_enabled BOOLEAN DEFAULT false,
is_below_threshold BOOLEAN DEFAULT false,
is_below_kiosk_threshold BOOLEAN DEFAULT false,
options_below_threshold BOOLEAN DEFAULT false,
options_below_kiosk_threshold BOOLEAN DEFAULT false,
-- Derived stock status: 'in_stock', 'out_of_stock', 'unknown'
stock_status VARCHAR(20) DEFAULT 'unknown',
total_quantity_available INTEGER DEFAULT 0,
-- Images
primary_image_url TEXT,
images JSONB,
-- Misc
measurements JSONB,
weight VARCHAR(50),
past_c_names TEXT[],
created_at_dutchie TIMESTAMPTZ,
updated_at_dutchie TIMESTAMPTZ,
latest_raw_payload JSONB,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
CONSTRAINT uk_dutchie_products UNIQUE (dispensary_id, external_product_id)
);
CREATE INDEX IF NOT EXISTS idx_dutchie_products_dispensary ON dutchie_products(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_dutchie_products_external_id ON dutchie_products(external_product_id);
CREATE INDEX IF NOT EXISTS idx_dutchie_products_platform_disp ON dutchie_products(platform_dispensary_id);
CREATE INDEX IF NOT EXISTS idx_dutchie_products_brand ON dutchie_products(brand_name);
CREATE INDEX IF NOT EXISTS idx_dutchie_products_type ON dutchie_products(type);
CREATE INDEX IF NOT EXISTS idx_dutchie_products_subcategory ON dutchie_products(subcategory);
CREATE INDEX IF NOT EXISTS idx_dutchie_products_status ON dutchie_products(status);
CREATE INDEX IF NOT EXISTS idx_dutchie_products_strain ON dutchie_products(strain_type);
CREATE INDEX IF NOT EXISTS idx_dutchie_products_stock_status ON dutchie_products(stock_status);
-- ============================================================
-- DUTCHIE_PRODUCT_SNAPSHOTS TABLE
-- Historical state per crawl, includes options[]
-- ============================================================
CREATE TABLE IF NOT EXISTS dutchie_product_snapshots (
id SERIAL PRIMARY KEY,
dutchie_product_id INTEGER NOT NULL REFERENCES dutchie_products(id) ON DELETE CASCADE,
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
platform_dispensary_id VARCHAR(100) NOT NULL,
external_product_id VARCHAR(100) NOT NULL,
pricing_type VARCHAR(20) DEFAULT 'unknown',
crawl_mode VARCHAR(20) DEFAULT 'mode_a', -- 'mode_a' (UI parity) or 'mode_b' (max coverage)
status VARCHAR(50),
featured BOOLEAN DEFAULT false,
special BOOLEAN DEFAULT false,
medical_only BOOLEAN DEFAULT false,
rec_only BOOLEAN DEFAULT false,
-- Flag indicating if product was present in feed (false = missing_from_feed snapshot)
is_present_in_feed BOOLEAN DEFAULT true,
-- Derived stock status
stock_status VARCHAR(20) DEFAULT 'unknown',
-- Price summary (in cents)
rec_min_price_cents INTEGER,
rec_max_price_cents INTEGER,
rec_min_special_price_cents INTEGER,
med_min_price_cents INTEGER,
med_max_price_cents INTEGER,
med_min_special_price_cents INTEGER,
wholesale_min_price_cents INTEGER,
-- Inventory summary
total_quantity_available INTEGER,
total_kiosk_quantity_available INTEGER,
manual_inventory BOOLEAN DEFAULT false,
is_below_threshold BOOLEAN DEFAULT false,
is_below_kiosk_threshold BOOLEAN DEFAULT false,
-- Option-level data (from POSMetaData.children)
options JSONB,
-- Full raw product node
raw_payload JSONB NOT NULL,
crawled_at TIMESTAMPTZ NOT NULL,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_snapshots_product ON dutchie_product_snapshots(dutchie_product_id);
CREATE INDEX IF NOT EXISTS idx_snapshots_dispensary ON dutchie_product_snapshots(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_snapshots_crawled_at ON dutchie_product_snapshots(crawled_at);
CREATE INDEX IF NOT EXISTS idx_snapshots_platform_disp ON dutchie_product_snapshots(platform_dispensary_id);
CREATE INDEX IF NOT EXISTS idx_snapshots_external_id ON dutchie_product_snapshots(external_product_id);
CREATE INDEX IF NOT EXISTS idx_snapshots_special ON dutchie_product_snapshots(special) WHERE special = true;
CREATE INDEX IF NOT EXISTS idx_snapshots_stock_status ON dutchie_product_snapshots(stock_status);
CREATE INDEX IF NOT EXISTS idx_snapshots_crawl_mode ON dutchie_product_snapshots(crawl_mode);
-- ============================================================
-- CRAWL_JOBS TABLE
-- Tracks crawl execution status
-- ============================================================
CREATE TABLE IF NOT EXISTS crawl_jobs (
id SERIAL PRIMARY KEY,
job_type VARCHAR(50) NOT NULL,
dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE SET NULL,
status VARCHAR(20) NOT NULL DEFAULT 'pending',
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
error_message TEXT,
products_found INTEGER,
snapshots_created INTEGER,
metadata JSONB,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_crawl_jobs_type ON crawl_jobs(job_type);
CREATE INDEX IF NOT EXISTS idx_crawl_jobs_status ON crawl_jobs(status);
CREATE INDEX IF NOT EXISTS idx_crawl_jobs_dispensary ON crawl_jobs(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_crawl_jobs_created ON crawl_jobs(created_at);
-- ============================================================
-- JOB_SCHEDULES TABLE
-- Stores schedule configuration for recurring jobs with jitter support
-- Each job has independent timing that "wanders" over time
-- ============================================================
CREATE TABLE IF NOT EXISTS job_schedules (
id SERIAL PRIMARY KEY,
job_name VARCHAR(100) NOT NULL UNIQUE,
description TEXT,
enabled BOOLEAN DEFAULT true,
-- Timing configuration (jitter makes times "wander")
base_interval_minutes INTEGER NOT NULL DEFAULT 240, -- e.g., 4 hours
jitter_minutes INTEGER NOT NULL DEFAULT 30, -- e.g., ±30 min
-- Last run tracking
last_run_at TIMESTAMPTZ,
last_status VARCHAR(20), -- 'success', 'error', 'partial', 'running'
last_error_message TEXT,
last_duration_ms INTEGER,
-- Next run (calculated with jitter after each run)
next_run_at TIMESTAMPTZ,
-- Additional config
job_config JSONB, -- e.g., { pricingType: 'rec', useBothModes: true }
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_job_schedules_enabled ON job_schedules(enabled);
CREATE INDEX IF NOT EXISTS idx_job_schedules_next_run ON job_schedules(next_run_at);
-- ============================================================
-- JOB_RUN_LOGS TABLE
-- Stores history of job runs for monitoring
-- ============================================================
CREATE TABLE IF NOT EXISTS job_run_logs (
id SERIAL PRIMARY KEY,
schedule_id INTEGER NOT NULL REFERENCES job_schedules(id) ON DELETE CASCADE,
job_name VARCHAR(100) NOT NULL,
status VARCHAR(20) NOT NULL, -- 'pending', 'running', 'success', 'error', 'partial'
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
duration_ms INTEGER,
error_message TEXT,
-- Results summary
items_processed INTEGER,
items_succeeded INTEGER,
items_failed INTEGER,
metadata JSONB, -- Additional run details
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_job_run_logs_schedule ON job_run_logs(schedule_id);
CREATE INDEX IF NOT EXISTS idx_job_run_logs_job_name ON job_run_logs(job_name);
CREATE INDEX IF NOT EXISTS idx_job_run_logs_status ON job_run_logs(status);
CREATE INDEX IF NOT EXISTS idx_job_run_logs_created ON job_run_logs(created_at);
-- ============================================================
-- VIEWS FOR EASY QUERYING
-- ============================================================
-- Categories derived from products
CREATE OR REPLACE VIEW v_categories AS
SELECT
type,
subcategory,
COUNT(DISTINCT id) as product_count,
COUNT(DISTINCT dispensary_id) as dispensary_count,
AVG(thc) as avg_thc,
MIN(thc) as min_thc,
MAX(thc) as max_thc
FROM dutchie_products
WHERE type IS NOT NULL
GROUP BY type, subcategory
ORDER BY type, subcategory;
-- Brands derived from products
CREATE OR REPLACE VIEW v_brands AS
SELECT
brand_name,
brand_id,
MAX(brand_logo_url) as brand_logo_url,
COUNT(DISTINCT id) as product_count,
COUNT(DISTINCT dispensary_id) as dispensary_count,
ARRAY_AGG(DISTINCT type) FILTER (WHERE type IS NOT NULL) as product_types
FROM dutchie_products
WHERE brand_name IS NOT NULL
GROUP BY brand_name, brand_id
ORDER BY product_count DESC;
-- Latest snapshot per product (most recent crawl data)
CREATE OR REPLACE VIEW v_latest_snapshots AS
SELECT DISTINCT ON (dutchie_product_id)
s.*
FROM dutchie_product_snapshots s
ORDER BY dutchie_product_id, crawled_at DESC;
-- Dashboard stats
CREATE OR REPLACE VIEW v_dashboard_stats AS
SELECT
(SELECT COUNT(*) FROM dispensaries WHERE state = 'AZ') as dispensary_count,
(SELECT COUNT(*) FROM dutchie_products) as product_count,
(SELECT COUNT(*) FROM dutchie_product_snapshots WHERE crawled_at > NOW() - INTERVAL '24 hours') as snapshots_24h,
(SELECT MAX(crawled_at) FROM dutchie_product_snapshots) as last_crawl_time,
(SELECT COUNT(*) FROM crawl_jobs WHERE status = 'failed' AND created_at > NOW() - INTERVAL '24 hours') as failed_jobs_24h,
(SELECT COUNT(DISTINCT brand_name) FROM dutchie_products WHERE brand_name IS NOT NULL) as brand_count,
(SELECT COUNT(DISTINCT (type, subcategory)) FROM dutchie_products WHERE type IS NOT NULL) as category_count;
`;
/**
* Run the schema migration
*/
async function createSchema() {
console.log('[DutchieAZ Schema] Creating database schema...');
const client = await (0, connection_1.getClient)();
try {
await client.query('BEGIN');
// Split into individual statements and execute
const statements = SCHEMA_SQL
.split(';')
.map(s => s.trim())
.filter(s => s.length > 0 && !s.startsWith('--'));
for (const statement of statements) {
if (statement.trim()) {
await client.query(statement + ';');
}
}
await client.query('COMMIT');
console.log('[DutchieAZ Schema] Schema created successfully');
}
catch (error) {
await client.query('ROLLBACK');
console.error('[DutchieAZ Schema] Failed to create schema:', error);
throw error;
}
finally {
client.release();
}
}
/**
* Drop all tables (for development/testing)
*/
async function dropSchema() {
console.log('[DutchieAZ Schema] Dropping all tables...');
await (0, connection_1.query)(`
DROP VIEW IF EXISTS v_dashboard_stats CASCADE;
DROP VIEW IF EXISTS v_latest_snapshots CASCADE;
DROP VIEW IF EXISTS v_brands CASCADE;
DROP VIEW IF EXISTS v_categories CASCADE;
DROP TABLE IF EXISTS crawl_schedule CASCADE;
DROP TABLE IF EXISTS crawl_jobs CASCADE;
DROP TABLE IF EXISTS dutchie_product_snapshots CASCADE;
DROP TABLE IF EXISTS dutchie_products CASCADE;
DROP TABLE IF EXISTS dispensaries CASCADE;
`);
console.log('[DutchieAZ Schema] All tables dropped');
}
/**
* Check if schema exists
*/
async function schemaExists() {
try {
const result = await (0, connection_1.query)(`
SELECT EXISTS (
SELECT FROM information_schema.tables
WHERE table_name = 'dispensaries'
) as exists
`);
return result.rows[0]?.exists === true;
}
catch (error) {
return false;
}
}
/**
* Initialize schema if it doesn't exist
*/
async function ensureSchema() {
const exists = await schemaExists();
if (!exists) {
await createSchema();
}
else {
console.log('[DutchieAZ Schema] Schema already exists');
}
}