diff --git a/backend/src/dutchie-az/README_DUTCHIE_AZ.md b/backend/src/dutchie-az/README_DUTCHIE_AZ.md new file mode 100644 index 00000000..ca9b79bf --- /dev/null +++ b/backend/src/dutchie-az/README_DUTCHIE_AZ.md @@ -0,0 +1,199 @@ +# Dutchie AZ Pipeline + +## Overview + +The Dutchie AZ pipeline is the **only** authorized way to crawl Dutchie dispensary menus. It uses Dutchie's GraphQL API directly (no DOM scraping) and writes to an isolated database with a proper snapshot model. + +## Key Principles + +1. **GraphQL Only** - All Dutchie data is fetched via their FilteredProducts GraphQL API +2. **Isolated Database** - Data lives in `dutchie_az_*` tables, NOT the legacy `products` table +3. **Append-Only Snapshots** - Every crawl creates snapshots, never overwrites historical data +4. **Stock Status Tracking** - Derived from `POSMetaData.children` inventory data +5. **Missing Product Detection** - Products not in feed are marked with `isPresentInFeed=false` + +## Directory Structure + +``` +src/dutchie-az/ +├── db/ +│ ├── connection.ts # Database connection pool +│ └── schema.ts # Table definitions and migrations +├── routes/ +│ └── index.ts # REST API endpoints +├── services/ +│ ├── graphql-client.ts # Direct GraphQL fetch (Mode A + Mode B) +│ ├── product-crawler.ts # Main crawler orchestration +│ └── scheduler.ts # Jittered scheduling with wandering intervals +└── types/ + └── index.ts # TypeScript interfaces +``` + +## Data Model + +### Tables + +- **dispensaries** - Arizona Dutchie stores with `platform_dispensary_id` +- **dutchie_products** - Canonical product identity (one row per product per store) +- **dutchie_product_snapshots** - Historical state per crawl (append-only) +- **job_schedules** - Scheduler configuration with jitter support +- **job_run_logs** - Execution history + +### Stock Status + +The `stock_status` field is derived from `POSMetaData.children`: + +```typescript +function deriveStockStatus(children?: POSChild[]): StockStatus { + if (!children || children.length === 0) return 'unknown'; + const totalAvailable = children.reduce((sum, c) => + sum + (c.quantityAvailable || 0), 0); + return totalAvailable > 0 ? 'in_stock' : 'out_of_stock'; +} +``` + +### Two-Mode Crawling + +Mode A (UI Parity): +- `Status: null` - Returns what the UI shows +- Best for "current inventory" snapshot + +Mode B (Max Coverage): +- `Status: 'Active'` - Returns all active products +- Catches items with `isBelowThreshold: true` + +Both modes are merged to get maximum product coverage. + +## API Endpoints + +All endpoints are mounted at `/api/dutchie-az/`: + +``` +GET /api/dutchie-az/dispensaries - List all dispensaries +GET /api/dutchie-az/dispensaries/:id - Get dispensary details +GET /api/dutchie-az/products - List products (with filters) +GET /api/dutchie-az/products/:id - Get product with snapshots +GET /api/dutchie-az/products/:id/snapshots - Get product snapshot history +POST /api/dutchie-az/crawl/:dispensaryId - Trigger manual crawl +GET /api/dutchie-az/schedule - Get scheduler status +POST /api/dutchie-az/schedule/run - Manually run scheduled jobs +GET /api/dutchie-az/stats - Dashboard statistics +``` + +## Scheduler + +The scheduler uses **jitter** to avoid detection patterns: + +```typescript +// Each job has independent "wandering" timing +interface JobSchedule { + base_interval_minutes: number; // e.g., 240 (4 hours) + jitter_minutes: number; // e.g., 30 (±30 min) + next_run_at: Date; // Calculated with jitter after each run +} +``` + +Jobs run when `next_run_at <= NOW()`. After completion, the next run is calculated: +``` +next_run_at = NOW() + base_interval + random(-jitter, +jitter) +``` + +This prevents crawls from clustering at predictable times. + +## Manual Testing + +### Run a single dispensary crawl: + +```bash +DATABASE_URL="..." npx tsx -e " +const { crawlDispensaryProducts } = require('./src/dutchie-az/services/product-crawler'); +const { query } = require('./src/dutchie-az/db/connection'); + +async function test() { + const { rows } = await query('SELECT * FROM dispensaries LIMIT 1'); + if (!rows[0]) return console.log('No dispensaries found'); + + const result = await crawlDispensaryProducts(rows[0], 'rec', { useBothModes: true }); + console.log(JSON.stringify(result, null, 2)); +} +test(); +" +``` + +### Check stock status distribution: + +```sql +SELECT stock_status, COUNT(*) +FROM dutchie_products +GROUP BY stock_status; +``` + +### View recent snapshots: + +```sql +SELECT + p.name, + s.stock_status, + s.is_present_in_feed, + s.crawled_at +FROM dutchie_product_snapshots s +JOIN dutchie_products p ON p.id = s.dutchie_product_id +ORDER BY s.crawled_at DESC +LIMIT 20; +``` + +## Deprecated Code + +The following files are **DEPRECATED** and will throw errors if called: + +- `src/scrapers/dutchie-graphql.ts` - Wrote to legacy `products` table +- `src/scrapers/dutchie-graphql-direct.ts` - Wrote to legacy `products` table +- `src/scrapers/templates/dutchie.ts` - HTML/DOM scraper (unreliable) +- `src/scraper-v2/engine.ts` DutchieSpider - DOM-based extraction + +If `store-crawl-orchestrator.ts` detects `provider='dutchie'` with `mode='production'`, it now routes to this dutchie-az pipeline automatically. + +## Integration with Legacy System + +The `store-crawl-orchestrator.ts` bridges the legacy stores system with dutchie-az: + +1. When a store has `product_provider='dutchie'` and `product_crawler_mode='production'` +2. The orchestrator looks up the corresponding dispensary in `dutchie_az.dispensaries` +3. It calls `crawlDispensaryProducts()` from the dutchie-az pipeline +4. Results are logged but data stays in the dutchie_az tables + +To use the dutchie-az pipeline independently: +- Navigate to `/dutchie-az-schedule` in the UI +- Use the REST API endpoints directly +- Run the scheduler service + +## Environment Variables + +```bash +# Database connection for dutchie-az (same DB, separate tables) +DATABASE_URL=postgresql://user:pass@host:port/database +``` + +## Troubleshooting + +### "Dispensary not found in dutchie-az database" + +The dispensary must exist in `dutchie_az.dispensaries` before crawling. Either: +1. Run discovery to populate dispensaries +2. Manually insert the dispensary with `platform_dispensary_id` + +### GraphQL returns empty products + +1. Check `platform_dispensary_id` is correct (the internal Dutchie ID, not slug) +2. Verify the dispensary is online and has menu data +3. Try both `rec` and `med` pricing types + +### Snapshots show `stock_status='unknown'` + +The product likely has no `POSMetaData.children` array. This happens for: +- Products without inventory tracking +- Manually managed inventory + +--- + +Last updated: December 2025 diff --git a/backend/src/dutchie-az/config/dutchie.ts b/backend/src/dutchie-az/config/dutchie.ts new file mode 100644 index 00000000..2e05fd03 --- /dev/null +++ b/backend/src/dutchie-az/config/dutchie.ts @@ -0,0 +1,122 @@ +/** + * Dutchie Configuration + * + * Centralized configuration for Dutchie GraphQL API interaction. + * Update hashes here when Dutchie changes their persisted query system. + */ + +export const dutchieConfig = { + // ============================================================ + // GRAPHQL PERSISTED QUERY HASHES + // ============================================================ + // + // These hashes identify specific GraphQL operations. + // If Dutchie changes their schema, you may need to capture + // new hashes from live browser traffic (Network tab → graphql requests). + + /** FilteredProducts - main product listing query */ + filteredProductsHash: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0', + + /** GetAddressBasedDispensaryData - resolve slug to internal ID */ + getDispensaryDataHash: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b', + + /** + * ConsumerDispensaries - geo-based discovery + * NOTE: This is a placeholder guess. If discovery fails, either: + * 1. Capture the real hash from live traffic + * 2. Rely on known AZDHS slugs instead (set useDiscovery: false) + */ + consumerDispensariesHash: '0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b', + + // ============================================================ + // BEHAVIOR FLAGS + // ============================================================ + + /** Enable geo-based discovery (false = use known AZDHS slugs only) */ + useDiscovery: true, + + /** Prefer GET requests (true) or POST (false). GET is default. */ + preferGet: true, + + /** + * Enable POST fallback when GET fails with 405 or blocked. + * If true, will retry failed GETs as POSTs. + */ + enablePostFallback: true, + + // ============================================================ + // PAGINATION & RETRY + // ============================================================ + + /** Products per page for pagination */ + perPage: 100, + + /** Maximum pages to fetch (safety limit) */ + maxPages: 200, + + /** Number of retries for failed page fetches */ + maxRetries: 1, + + /** Delay between pages in ms */ + pageDelayMs: 500, + + /** Delay between modes in ms */ + modeDelayMs: 2000, + + // ============================================================ + // HTTP HEADERS + // ============================================================ + + /** Default headers to mimic browser requests */ + defaultHeaders: { + 'accept': 'application/json, text/plain, */*', + 'accept-language': 'en-US,en;q=0.9', + 'apollographql-client-name': 'Marketplace (production)', + } as Record, + + /** User agent string */ + userAgent: + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + + // ============================================================ + // BROWSER LAUNCH OPTIONS + // ============================================================ + + browserArgs: [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-dev-shm-usage', + '--disable-blink-features=AutomationControlled', + ], + + /** Navigation timeout in ms */ + navigationTimeout: 60000, + + /** Initial page load delay in ms */ + pageLoadDelay: 2000, +}; + +/** + * Get GraphQL hashes object for backward compatibility + */ +export const GRAPHQL_HASHES = { + FilteredProducts: dutchieConfig.filteredProductsHash, + GetAddressBasedDispensaryData: dutchieConfig.getDispensaryDataHash, + ConsumerDispensaries: dutchieConfig.consumerDispensariesHash, +}; + +/** + * Arizona geo centerpoints for discovery scans + */ +export const ARIZONA_CENTERPOINTS = [ + { name: 'Phoenix', lat: 33.4484, lng: -112.074 }, + { name: 'Tucson', lat: 32.2226, lng: -110.9747 }, + { name: 'Flagstaff', lat: 35.1983, lng: -111.6513 }, + { name: 'Mesa', lat: 33.4152, lng: -111.8315 }, + { name: 'Scottsdale', lat: 33.4942, lng: -111.9261 }, + { name: 'Tempe', lat: 33.4255, lng: -111.94 }, + { name: 'Yuma', lat: 32.6927, lng: -114.6277 }, + { name: 'Prescott', lat: 34.54, lng: -112.4685 }, + { name: 'Lake Havasu', lat: 34.4839, lng: -114.3224 }, + { name: 'Sierra Vista', lat: 31.5455, lng: -110.2773 }, +]; diff --git a/backend/src/dutchie-az/db/connection.ts b/backend/src/dutchie-az/db/connection.ts new file mode 100644 index 00000000..552a61bf --- /dev/null +++ b/backend/src/dutchie-az/db/connection.ts @@ -0,0 +1,78 @@ +/** + * Dutchie AZ Database Connection + * + * Isolated database connection for Dutchie Arizona data. + * Uses a separate database/schema to prevent cross-contamination with main app data. + */ + +import { Pool, PoolClient } from 'pg'; + +// Environment variable for Dutchie AZ database (falls back to main DB with schema prefix) +const DUTCHIE_AZ_DATABASE_URL = + process.env.DUTCHIE_AZ_DATABASE_URL || + process.env.DATABASE_URL || + 'postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_az'; + +let pool: Pool | null = null; + +/** + * Get the Dutchie AZ database pool (singleton) + */ +export function getDutchieAZPool(): Pool { + if (!pool) { + pool = new Pool({ + connectionString: DUTCHIE_AZ_DATABASE_URL, + max: 10, + idleTimeoutMillis: 30000, + connectionTimeoutMillis: 5000, + }); + + pool.on('error', (err) => { + console.error('[DutchieAZ DB] Unexpected error on idle client:', err); + }); + + console.log('[DutchieAZ DB] Pool initialized'); + } + return pool; +} + +/** + * Execute a query on the Dutchie AZ database + */ +export async function query(text: string, params?: any[]): Promise<{ rows: T[]; rowCount: number }> { + const p = getDutchieAZPool(); + const result = await p.query(text, params); + return { rows: result.rows as T[], rowCount: result.rowCount || 0 }; +} + +/** + * Get a client from the pool for transaction use + */ +export async function getClient(): Promise { + const p = getDutchieAZPool(); + return p.connect(); +} + +/** + * Close the pool connection + */ +export async function closePool(): Promise { + if (pool) { + await pool.end(); + pool = null; + console.log('[DutchieAZ DB] Pool closed'); + } +} + +/** + * Check if the database is accessible + */ +export async function healthCheck(): Promise { + try { + const result = await query('SELECT 1 as ok'); + return result.rows.length > 0 && result.rows[0].ok === 1; + } catch (error) { + console.error('[DutchieAZ DB] Health check failed:', error); + return false; + } +} diff --git a/backend/src/dutchie-az/db/schema.ts b/backend/src/dutchie-az/db/schema.ts new file mode 100644 index 00000000..ad6e2036 --- /dev/null +++ b/backend/src/dutchie-az/db/schema.ts @@ -0,0 +1,408 @@ +/** + * Dutchie AZ Database Schema + * + * Creates all tables for the isolated Dutchie Arizona data pipeline. + * Run this to initialize the dutchie_az database. + */ + +import { query, getClient } from './connection'; + +/** + * SQL statements to create all tables + */ +const SCHEMA_SQL = ` +-- ============================================================ +-- DISPENSARIES TABLE +-- Stores discovered Dutchie dispensaries in Arizona +-- ============================================================ +CREATE TABLE IF NOT EXISTS dispensaries ( + id SERIAL PRIMARY KEY, + platform VARCHAR(20) NOT NULL DEFAULT 'dutchie', + name VARCHAR(255) NOT NULL, + slug VARCHAR(255) NOT NULL, + city VARCHAR(100) NOT NULL, + state VARCHAR(10) NOT NULL DEFAULT 'AZ', + postal_code VARCHAR(20), + address TEXT, + latitude DECIMAL(10, 7), + longitude DECIMAL(10, 7), + platform_dispensary_id VARCHAR(100), + is_delivery BOOLEAN DEFAULT false, + is_pickup BOOLEAN DEFAULT true, + raw_metadata JSONB, + last_crawled_at TIMESTAMPTZ, + product_count INTEGER DEFAULT 0, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + + CONSTRAINT uk_dispensaries_platform_slug UNIQUE (platform, slug, city, state) +); + +CREATE INDEX IF NOT EXISTS idx_dispensaries_platform ON dispensaries(platform); +CREATE INDEX IF NOT EXISTS idx_dispensaries_platform_id ON dispensaries(platform_dispensary_id); +CREATE INDEX IF NOT EXISTS idx_dispensaries_state ON dispensaries(state); +CREATE INDEX IF NOT EXISTS idx_dispensaries_city ON dispensaries(city); + +-- ============================================================ +-- DUTCHIE_PRODUCTS TABLE +-- Canonical product identity per store +-- ============================================================ +CREATE TABLE IF NOT EXISTS dutchie_products ( + id SERIAL PRIMARY KEY, + dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE, + platform VARCHAR(20) NOT NULL DEFAULT 'dutchie', + + external_product_id VARCHAR(100) NOT NULL, + platform_dispensary_id VARCHAR(100) NOT NULL, + c_name VARCHAR(500), + name VARCHAR(500) NOT NULL, + + -- Brand + brand_name VARCHAR(255), + brand_id VARCHAR(100), + brand_logo_url TEXT, + + -- Classification + type VARCHAR(100), + subcategory VARCHAR(100), + strain_type VARCHAR(50), + provider VARCHAR(100), + + -- Potency + thc DECIMAL(10, 4), + thc_content DECIMAL(10, 4), + cbd DECIMAL(10, 4), + cbd_content DECIMAL(10, 4), + cannabinoids_v2 JSONB, + effects JSONB, + + -- Status / flags + status VARCHAR(50), + medical_only BOOLEAN DEFAULT false, + rec_only BOOLEAN DEFAULT false, + featured BOOLEAN DEFAULT false, + coming_soon BOOLEAN DEFAULT false, + certificate_of_analysis_enabled BOOLEAN DEFAULT false, + + is_below_threshold BOOLEAN DEFAULT false, + is_below_kiosk_threshold BOOLEAN DEFAULT false, + options_below_threshold BOOLEAN DEFAULT false, + options_below_kiosk_threshold BOOLEAN DEFAULT false, + + -- Derived stock status: 'in_stock', 'out_of_stock', 'unknown' + stock_status VARCHAR(20) DEFAULT 'unknown', + total_quantity_available INTEGER DEFAULT 0, + + -- Images + primary_image_url TEXT, + images JSONB, + + -- Misc + measurements JSONB, + weight VARCHAR(50), + past_c_names TEXT[], + + created_at_dutchie TIMESTAMPTZ, + updated_at_dutchie TIMESTAMPTZ, + + latest_raw_payload JSONB, + + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + + CONSTRAINT uk_dutchie_products UNIQUE (dispensary_id, external_product_id) +); + +CREATE INDEX IF NOT EXISTS idx_dutchie_products_dispensary ON dutchie_products(dispensary_id); +CREATE INDEX IF NOT EXISTS idx_dutchie_products_external_id ON dutchie_products(external_product_id); +CREATE INDEX IF NOT EXISTS idx_dutchie_products_platform_disp ON dutchie_products(platform_dispensary_id); +CREATE INDEX IF NOT EXISTS idx_dutchie_products_brand ON dutchie_products(brand_name); +CREATE INDEX IF NOT EXISTS idx_dutchie_products_type ON dutchie_products(type); +CREATE INDEX IF NOT EXISTS idx_dutchie_products_subcategory ON dutchie_products(subcategory); +CREATE INDEX IF NOT EXISTS idx_dutchie_products_status ON dutchie_products(status); +CREATE INDEX IF NOT EXISTS idx_dutchie_products_strain ON dutchie_products(strain_type); +CREATE INDEX IF NOT EXISTS idx_dutchie_products_stock_status ON dutchie_products(stock_status); + +-- ============================================================ +-- DUTCHIE_PRODUCT_SNAPSHOTS TABLE +-- Historical state per crawl, includes options[] +-- ============================================================ +CREATE TABLE IF NOT EXISTS dutchie_product_snapshots ( + id SERIAL PRIMARY KEY, + dutchie_product_id INTEGER NOT NULL REFERENCES dutchie_products(id) ON DELETE CASCADE, + dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE, + platform_dispensary_id VARCHAR(100) NOT NULL, + external_product_id VARCHAR(100) NOT NULL, + pricing_type VARCHAR(20) DEFAULT 'unknown', + crawl_mode VARCHAR(20) DEFAULT 'mode_a', -- 'mode_a' (UI parity) or 'mode_b' (max coverage) + + status VARCHAR(50), + featured BOOLEAN DEFAULT false, + special BOOLEAN DEFAULT false, + medical_only BOOLEAN DEFAULT false, + rec_only BOOLEAN DEFAULT false, + + -- Flag indicating if product was present in feed (false = missing_from_feed snapshot) + is_present_in_feed BOOLEAN DEFAULT true, + + -- Derived stock status + stock_status VARCHAR(20) DEFAULT 'unknown', + + -- Price summary (in cents) + rec_min_price_cents INTEGER, + rec_max_price_cents INTEGER, + rec_min_special_price_cents INTEGER, + med_min_price_cents INTEGER, + med_max_price_cents INTEGER, + med_min_special_price_cents INTEGER, + wholesale_min_price_cents INTEGER, + + -- Inventory summary + total_quantity_available INTEGER, + total_kiosk_quantity_available INTEGER, + manual_inventory BOOLEAN DEFAULT false, + is_below_threshold BOOLEAN DEFAULT false, + is_below_kiosk_threshold BOOLEAN DEFAULT false, + + -- Option-level data (from POSMetaData.children) + options JSONB, + + -- Full raw product node + raw_payload JSONB NOT NULL, + + crawled_at TIMESTAMPTZ NOT NULL, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_snapshots_product ON dutchie_product_snapshots(dutchie_product_id); +CREATE INDEX IF NOT EXISTS idx_snapshots_dispensary ON dutchie_product_snapshots(dispensary_id); +CREATE INDEX IF NOT EXISTS idx_snapshots_crawled_at ON dutchie_product_snapshots(crawled_at); +CREATE INDEX IF NOT EXISTS idx_snapshots_platform_disp ON dutchie_product_snapshots(platform_dispensary_id); +CREATE INDEX IF NOT EXISTS idx_snapshots_external_id ON dutchie_product_snapshots(external_product_id); +CREATE INDEX IF NOT EXISTS idx_snapshots_special ON dutchie_product_snapshots(special) WHERE special = true; +CREATE INDEX IF NOT EXISTS idx_snapshots_stock_status ON dutchie_product_snapshots(stock_status); +CREATE INDEX IF NOT EXISTS idx_snapshots_crawl_mode ON dutchie_product_snapshots(crawl_mode); + +-- ============================================================ +-- CRAWL_JOBS TABLE +-- Tracks crawl execution status +-- ============================================================ +CREATE TABLE IF NOT EXISTS crawl_jobs ( + id SERIAL PRIMARY KEY, + job_type VARCHAR(50) NOT NULL, + dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE SET NULL, + status VARCHAR(20) NOT NULL DEFAULT 'pending', + started_at TIMESTAMPTZ, + completed_at TIMESTAMPTZ, + error_message TEXT, + products_found INTEGER, + snapshots_created INTEGER, + metadata JSONB, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_crawl_jobs_type ON crawl_jobs(job_type); +CREATE INDEX IF NOT EXISTS idx_crawl_jobs_status ON crawl_jobs(status); +CREATE INDEX IF NOT EXISTS idx_crawl_jobs_dispensary ON crawl_jobs(dispensary_id); +CREATE INDEX IF NOT EXISTS idx_crawl_jobs_created ON crawl_jobs(created_at); + +-- ============================================================ +-- JOB_SCHEDULES TABLE +-- Stores schedule configuration for recurring jobs with jitter support +-- Each job has independent timing that "wanders" over time +-- ============================================================ +CREATE TABLE IF NOT EXISTS job_schedules ( + id SERIAL PRIMARY KEY, + job_name VARCHAR(100) NOT NULL UNIQUE, + description TEXT, + enabled BOOLEAN DEFAULT true, + + -- Timing configuration (jitter makes times "wander") + base_interval_minutes INTEGER NOT NULL DEFAULT 240, -- e.g., 4 hours + jitter_minutes INTEGER NOT NULL DEFAULT 30, -- e.g., ±30 min + + -- Last run tracking + last_run_at TIMESTAMPTZ, + last_status VARCHAR(20), -- 'success', 'error', 'partial', 'running' + last_error_message TEXT, + last_duration_ms INTEGER, + + -- Next run (calculated with jitter after each run) + next_run_at TIMESTAMPTZ, + + -- Additional config + job_config JSONB, -- e.g., { pricingType: 'rec', useBothModes: true } + + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_job_schedules_enabled ON job_schedules(enabled); +CREATE INDEX IF NOT EXISTS idx_job_schedules_next_run ON job_schedules(next_run_at); + +-- ============================================================ +-- JOB_RUN_LOGS TABLE +-- Stores history of job runs for monitoring +-- ============================================================ +CREATE TABLE IF NOT EXISTS job_run_logs ( + id SERIAL PRIMARY KEY, + schedule_id INTEGER NOT NULL REFERENCES job_schedules(id) ON DELETE CASCADE, + job_name VARCHAR(100) NOT NULL, + status VARCHAR(20) NOT NULL, -- 'pending', 'running', 'success', 'error', 'partial' + started_at TIMESTAMPTZ, + completed_at TIMESTAMPTZ, + duration_ms INTEGER, + error_message TEXT, + + -- Results summary + items_processed INTEGER, + items_succeeded INTEGER, + items_failed INTEGER, + + metadata JSONB, -- Additional run details + + created_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_job_run_logs_schedule ON job_run_logs(schedule_id); +CREATE INDEX IF NOT EXISTS idx_job_run_logs_job_name ON job_run_logs(job_name); +CREATE INDEX IF NOT EXISTS idx_job_run_logs_status ON job_run_logs(status); +CREATE INDEX IF NOT EXISTS idx_job_run_logs_created ON job_run_logs(created_at); + +-- ============================================================ +-- VIEWS FOR EASY QUERYING +-- ============================================================ + +-- Categories derived from products +CREATE OR REPLACE VIEW v_categories AS +SELECT + type, + subcategory, + COUNT(DISTINCT id) as product_count, + COUNT(DISTINCT dispensary_id) as dispensary_count, + AVG(thc) as avg_thc, + MIN(thc) as min_thc, + MAX(thc) as max_thc +FROM dutchie_products +WHERE type IS NOT NULL +GROUP BY type, subcategory +ORDER BY type, subcategory; + +-- Brands derived from products +CREATE OR REPLACE VIEW v_brands AS +SELECT + brand_name, + brand_id, + MAX(brand_logo_url) as brand_logo_url, + COUNT(DISTINCT id) as product_count, + COUNT(DISTINCT dispensary_id) as dispensary_count, + ARRAY_AGG(DISTINCT type) FILTER (WHERE type IS NOT NULL) as product_types +FROM dutchie_products +WHERE brand_name IS NOT NULL +GROUP BY brand_name, brand_id +ORDER BY product_count DESC; + +-- Latest snapshot per product (most recent crawl data) +CREATE OR REPLACE VIEW v_latest_snapshots AS +SELECT DISTINCT ON (dutchie_product_id) + s.* +FROM dutchie_product_snapshots s +ORDER BY dutchie_product_id, crawled_at DESC; + +-- Dashboard stats +CREATE OR REPLACE VIEW v_dashboard_stats AS +SELECT + (SELECT COUNT(*) FROM dispensaries WHERE state = 'AZ') as dispensary_count, + (SELECT COUNT(*) FROM dutchie_products) as product_count, + (SELECT COUNT(*) FROM dutchie_product_snapshots WHERE crawled_at > NOW() - INTERVAL '24 hours') as snapshots_24h, + (SELECT MAX(crawled_at) FROM dutchie_product_snapshots) as last_crawl_time, + (SELECT COUNT(*) FROM crawl_jobs WHERE status = 'failed' AND created_at > NOW() - INTERVAL '24 hours') as failed_jobs_24h, + (SELECT COUNT(DISTINCT brand_name) FROM dutchie_products WHERE brand_name IS NOT NULL) as brand_count, + (SELECT COUNT(DISTINCT (type, subcategory)) FROM dutchie_products WHERE type IS NOT NULL) as category_count; +`; + +/** + * Run the schema migration + */ +export async function createSchema(): Promise { + console.log('[DutchieAZ Schema] Creating database schema...'); + + const client = await getClient(); + + try { + await client.query('BEGIN'); + + // Split into individual statements and execute + const statements = SCHEMA_SQL + .split(';') + .map(s => s.trim()) + .filter(s => s.length > 0 && !s.startsWith('--')); + + for (const statement of statements) { + if (statement.trim()) { + await client.query(statement + ';'); + } + } + + await client.query('COMMIT'); + console.log('[DutchieAZ Schema] Schema created successfully'); + } catch (error) { + await client.query('ROLLBACK'); + console.error('[DutchieAZ Schema] Failed to create schema:', error); + throw error; + } finally { + client.release(); + } +} + +/** + * Drop all tables (for development/testing) + */ +export async function dropSchema(): Promise { + console.log('[DutchieAZ Schema] Dropping all tables...'); + + await query(` + DROP VIEW IF EXISTS v_dashboard_stats CASCADE; + DROP VIEW IF EXISTS v_latest_snapshots CASCADE; + DROP VIEW IF EXISTS v_brands CASCADE; + DROP VIEW IF EXISTS v_categories CASCADE; + DROP TABLE IF EXISTS crawl_schedule CASCADE; + DROP TABLE IF EXISTS crawl_jobs CASCADE; + DROP TABLE IF EXISTS dutchie_product_snapshots CASCADE; + DROP TABLE IF EXISTS dutchie_products CASCADE; + DROP TABLE IF EXISTS dispensaries CASCADE; + `); + + console.log('[DutchieAZ Schema] All tables dropped'); +} + +/** + * Check if schema exists + */ +export async function schemaExists(): Promise { + try { + const result = await query(` + SELECT EXISTS ( + SELECT FROM information_schema.tables + WHERE table_name = 'dispensaries' + ) as exists + `); + return result.rows[0]?.exists === true; + } catch (error) { + return false; + } +} + +/** + * Initialize schema if it doesn't exist + */ +export async function ensureSchema(): Promise { + const exists = await schemaExists(); + if (!exists) { + await createSchema(); + } else { + console.log('[DutchieAZ Schema] Schema already exists'); + } +} diff --git a/backend/src/dutchie-az/index.ts b/backend/src/dutchie-az/index.ts new file mode 100644 index 00000000..d4d6d03e --- /dev/null +++ b/backend/src/dutchie-az/index.ts @@ -0,0 +1,91 @@ +/** + * Dutchie AZ Data Pipeline + * + * Isolated data pipeline for crawling and storing Dutchie Arizona dispensary data. + * This module is completely separate from the main application database. + * + * Features: + * - Two-mode crawling (Mode A: UI parity, Mode B: MAX COVERAGE) + * - Derived stockStatus field (in_stock, out_of_stock, unknown) + * - Full raw payload storage for 100% data preservation + * - AZDHS dispensary list as canonical source + */ + +// Types +export * from './types'; + +// Database +export { + getDutchieAZPool, + query, + getClient, + closePool, + healthCheck, +} from './db/connection'; + +export { + createSchema, + dropSchema, + schemaExists, + ensureSchema, +} from './db/schema'; + +// Services - GraphQL Client +export { + GRAPHQL_HASHES, + ARIZONA_CENTERPOINTS, + resolveDispensaryId, + fetchAllProducts, + fetchAllProductsBothModes, + discoverDispensaries, + discoverArizonaDispensaries, +} from './services/graphql-client'; + +// Services - Discovery +export { + importFromExistingDispensaries, + discoverDispensaries as discoverAndSaveDispensaries, + resolvePlatformDispensaryIds, + getAllDispensaries, + getDispensaryById, + getDispensariesWithPlatformIds, +} from './services/discovery'; + +// Services - Product Crawler +export { + normalizeProduct, + normalizeSnapshot, + crawlDispensaryProducts, + crawlAllArizonaDispensaries, +} from './services/product-crawler'; + +export type { CrawlResult } from './services/product-crawler'; + +// Services - Scheduler +export { + startScheduler, + stopScheduler, + triggerImmediateCrawl, + getSchedulerStatus, + crawlSingleDispensary, + // Schedule config CRUD + getAllSchedules, + getScheduleById, + createSchedule, + updateSchedule, + deleteSchedule, + triggerScheduleNow, + initializeDefaultSchedules, + // Run logs + getRunLogs, +} from './services/scheduler'; + +// Services - AZDHS Import +export { + importAZDHSDispensaries, + importFromJSON, + getImportStats, +} from './services/azdhs-import'; + +// Routes +export { default as dutchieAZRouter } from './routes'; diff --git a/backend/src/dutchie-az/routes/index.ts b/backend/src/dutchie-az/routes/index.ts new file mode 100644 index 00000000..a014403c --- /dev/null +++ b/backend/src/dutchie-az/routes/index.ts @@ -0,0 +1,1318 @@ +/** + * Dutchie AZ API Routes + * + * Express routes for the Dutchie AZ data pipeline. + * Provides API endpoints for stores, products, categories, and dashboard. + */ + +import { Router, Request, Response } from 'express'; +import { query } from '../db/connection'; +import { ensureSchema } from '../db/schema'; +import { + importAZDHSDispensaries, + importFromJSON, + getImportStats, +} from '../services/azdhs-import'; +import { + discoverDispensaries, + resolvePlatformDispensaryIds, + getAllDispensaries, + getDispensaryById, +} from '../services/discovery'; +import { crawlDispensaryProducts } from '../services/product-crawler'; +import { + startScheduler, + stopScheduler, + triggerImmediateCrawl, + getSchedulerStatus, + crawlSingleDispensary, + getAllSchedules, + getScheduleById, + createSchedule, + updateSchedule, + deleteSchedule, + triggerScheduleNow, + initializeDefaultSchedules, + getRunLogs, +} from '../services/scheduler'; +import { StockStatus } from '../types'; + +const router = Router(); + +// ============================================================ +// DASHBOARD +// ============================================================ + +/** + * GET /api/dutchie-az/dashboard + * Dashboard stats overview + */ +router.get('/dashboard', async (_req: Request, res: Response) => { + try { + const { rows } = await query<{ + dispensary_count: string; + product_count: string; + snapshots_24h: string; + last_crawl_time: Date; + failed_jobs_24h: string; + brand_count: string; + category_count: string; + }>(`SELECT * FROM v_dashboard_stats`); + + const stats = rows[0] || {}; + res.json({ + dispensaryCount: parseInt(stats.dispensary_count || '0', 10), + productCount: parseInt(stats.product_count || '0', 10), + snapshotCount24h: parseInt(stats.snapshots_24h || '0', 10), + lastCrawlTime: stats.last_crawl_time, + failedJobCount: parseInt(stats.failed_jobs_24h || '0', 10), + brandCount: parseInt(stats.brand_count || '0', 10), + categoryCount: parseInt(stats.category_count || '0', 10), + }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +// ============================================================ +// DISPENSARIES (STORES) +// ============================================================ + +/** + * GET /api/dutchie-az/stores + * List all stores with optional filters + */ +router.get('/stores', async (req: Request, res: Response) => { + try { + const { city, hasPlatformId, limit = '100', offset = '0' } = req.query; + + let whereClause = 'WHERE state = \'AZ\''; + const params: any[] = []; + let paramIndex = 1; + + if (city) { + whereClause += ` AND city = $${paramIndex}`; + params.push(city); + paramIndex++; + } + + if (hasPlatformId === 'true') { + whereClause += ' AND platform_dispensary_id IS NOT NULL'; + } else if (hasPlatformId === 'false') { + whereClause += ' AND platform_dispensary_id IS NULL'; + } + + params.push(parseInt(limit as string, 10), parseInt(offset as string, 10)); + + const { rows, rowCount } = await query( + ` + SELECT * FROM dispensaries + ${whereClause} + ORDER BY name + LIMIT $${paramIndex} OFFSET $${paramIndex + 1} + `, + params + ); + + // Get total count + const { rows: countRows } = await query( + `SELECT COUNT(*) as total FROM dispensaries ${whereClause}`, + params.slice(0, -2) + ); + + res.json({ + stores: rows, + total: parseInt(countRows[0]?.total || '0', 10), + limit: parseInt(limit as string, 10), + offset: parseInt(offset as string, 10), + }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/dutchie-az/stores/:id + * Get a single store by ID + */ +router.get('/stores/:id', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const store = await getDispensaryById(parseInt(id, 10)); + + if (!store) { + return res.status(404).json({ error: 'Store not found' }); + } + + res.json(store); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/dutchie-az/stores/:id/summary + * Get store summary with product count, categories, and brands + * This is the main endpoint for the DispensaryDetail panel + */ +router.get('/stores/:id/summary', async (req: Request, res: Response) => { + try { + const { id } = req.params; + + // Get dispensary info + const { rows: dispensaryRows } = await query( + `SELECT * FROM dispensaries WHERE id = $1`, + [parseInt(id, 10)] + ); + + if (dispensaryRows.length === 0) { + return res.status(404).json({ error: 'Store not found' }); + } + + const dispensary = dispensaryRows[0]; + + // Get product counts by stock status + const { rows: countRows } = await query( + ` + SELECT + COUNT(*) as total_products, + COUNT(*) FILTER (WHERE stock_status = 'in_stock') as in_stock_count, + COUNT(*) FILTER (WHERE stock_status = 'out_of_stock') as out_of_stock_count, + COUNT(*) FILTER (WHERE stock_status = 'unknown') as unknown_count, + COUNT(*) FILTER (WHERE missing_from_feed = true) as missing_count + FROM dutchie_products + WHERE dispensary_id = $1 + `, + [id] + ); + + // Get categories with counts for this store + const { rows: categories } = await query( + ` + SELECT + type, + subcategory, + COUNT(*) as product_count + FROM dutchie_products + WHERE dispensary_id = $1 AND type IS NOT NULL + GROUP BY type, subcategory + ORDER BY type, subcategory + `, + [id] + ); + + // Get brands with counts for this store + const { rows: brands } = await query( + ` + SELECT + brand_name, + COUNT(*) as product_count + FROM dutchie_products + WHERE dispensary_id = $1 AND brand_name IS NOT NULL + GROUP BY brand_name + ORDER BY product_count DESC + `, + [id] + ); + + // Get last crawl info + const { rows: lastCrawl } = await query( + ` + SELECT + id, + status, + started_at, + completed_at, + products_found, + products_inserted, + products_updated, + error_message + FROM crawl_jobs + WHERE dispensary_id = $1 + ORDER BY created_at DESC + LIMIT 1 + `, + [id] + ); + + const counts = countRows[0] || {}; + + res.json({ + dispensary, + totalProducts: parseInt(counts.total_products || '0', 10), + inStockCount: parseInt(counts.in_stock_count || '0', 10), + outOfStockCount: parseInt(counts.out_of_stock_count || '0', 10), + unknownStockCount: parseInt(counts.unknown_count || '0', 10), + missingFromFeedCount: parseInt(counts.missing_count || '0', 10), + categories, + brands, + brandCount: brands.length, + categoryCount: categories.length, + lastCrawl: lastCrawl[0] || null, + }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/dutchie-az/stores/:id/products + * Get paginated products for a store with latest snapshot data + */ +router.get('/stores/:id/products', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const { + stockStatus, + type, + subcategory, + brandName, + search, + limit = '50', + offset = '0', + } = req.query; + + let whereClause = 'WHERE p.dispensary_id = $1'; + const params: any[] = [parseInt(id, 10)]; + let paramIndex = 2; + + if (stockStatus) { + whereClause += ` AND p.stock_status = $${paramIndex}`; + params.push(stockStatus); + paramIndex++; + } + + if (type) { + whereClause += ` AND p.type = $${paramIndex}`; + params.push(type); + paramIndex++; + } + + if (subcategory) { + whereClause += ` AND p.subcategory = $${paramIndex}`; + params.push(subcategory); + paramIndex++; + } + + if (brandName) { + whereClause += ` AND p.brand_name ILIKE $${paramIndex}`; + params.push(`%${brandName}%`); + paramIndex++; + } + + if (search) { + whereClause += ` AND (p.name ILIKE $${paramIndex} OR p.brand_name ILIKE $${paramIndex})`; + params.push(`%${search}%`); + paramIndex++; + } + + params.push(parseInt(limit as string, 10), parseInt(offset as string, 10)); + + // Get products with their latest snapshot data + const { rows: products } = await query( + ` + SELECT + p.id, + p.platform_product_id, + p.name, + p.slug, + p.brand_name, + p.type, + p.subcategory, + p.strain_type, + p.stock_status, + p.missing_from_feed, + p.first_seen_at, + p.last_seen_at, + p.updated_at, + -- Latest snapshot data + s.price_rec, + s.price_med, + s.special_price_rec, + s.special_price_med, + s.thc_potency_range, + s.cbd_potency_range, + s.total_quantity, + s.images, + s.options, + s.description, + s.crawled_at as snapshot_at + FROM dutchie_products p + LEFT JOIN LATERAL ( + SELECT * FROM dutchie_product_snapshots + WHERE dutchie_product_id = p.id + ORDER BY crawled_at DESC + LIMIT 1 + ) s ON true + ${whereClause} + ORDER BY p.updated_at DESC + LIMIT $${paramIndex} OFFSET $${paramIndex + 1} + `, + params + ); + + // Get total count + const { rows: countRows } = await query( + `SELECT COUNT(*) as total FROM dutchie_products p ${whereClause}`, + params.slice(0, -2) + ); + + // Transform products for frontend compatibility + const transformedProducts = products.map((p) => ({ + id: p.id, + external_id: p.platform_product_id, + name: p.name, + slug: p.slug, + brand: p.brand_name, + type: p.type, + subcategory: p.subcategory, + strain_type: p.strain_type, + stock_status: p.stock_status, + in_stock: p.stock_status === 'in_stock', + missing_from_feed: p.missing_from_feed, + // Prices from latest snapshot + regular_price: p.price_rec, + sale_price: p.special_price_rec, + med_price: p.price_med, + med_sale_price: p.special_price_med, + // Potency + thc_percentage: p.thc_potency_range?.max || p.thc_potency_range?.min || null, + cbd_percentage: p.cbd_potency_range?.max || p.cbd_potency_range?.min || null, + // Images - extract first image URL + image_url: Array.isArray(p.images) && p.images.length > 0 + ? (typeof p.images[0] === 'string' ? p.images[0] : p.images[0]?.url) + : null, + // Other + description: p.description, + options: p.options, + total_quantity: p.total_quantity, + // Timestamps + first_seen_at: p.first_seen_at, + last_seen_at: p.last_seen_at, + updated_at: p.updated_at, + snapshot_at: p.snapshot_at, + })); + + res.json({ + products: transformedProducts, + total: parseInt(countRows[0]?.total || '0', 10), + limit: parseInt(limit as string, 10), + offset: parseInt(offset as string, 10), + }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/dutchie-az/stores/:id/brands + * Get brands for a specific store + */ +router.get('/stores/:id/brands', async (req: Request, res: Response) => { + try { + const { id } = req.params; + + const { rows: brands } = await query( + ` + SELECT + brand_name as brand, + COUNT(*) as product_count + FROM dutchie_products + WHERE dispensary_id = $1 AND brand_name IS NOT NULL + GROUP BY brand_name + ORDER BY product_count DESC + `, + [parseInt(id, 10)] + ); + + res.json({ brands }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/dutchie-az/stores/:id/categories + * Get categories for a specific store + */ +router.get('/stores/:id/categories', async (req: Request, res: Response) => { + try { + const { id } = req.params; + + const { rows: categories } = await query( + ` + SELECT + type, + subcategory, + COUNT(*) as product_count + FROM dutchie_products + WHERE dispensary_id = $1 AND type IS NOT NULL + GROUP BY type, subcategory + ORDER BY type, subcategory + `, + [parseInt(id, 10)] + ); + + res.json({ categories }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +// ============================================================ +// PRODUCTS +// ============================================================ + +/** + * GET /api/dutchie-az/products + * List products with filtering on our own DB + */ +router.get('/products', async (req: Request, res: Response) => { + try { + const { + storeId, + stockStatus, + type, + subcategory, + brandName, + search, + limit = '50', + offset = '0', + } = req.query; + + let whereClause = 'WHERE 1=1'; + const params: any[] = []; + let paramIndex = 1; + + if (storeId) { + whereClause += ` AND dispensary_id = $${paramIndex}`; + params.push(parseInt(storeId as string, 10)); + paramIndex++; + } + + if (stockStatus) { + whereClause += ` AND stock_status = $${paramIndex}`; + params.push(stockStatus); + paramIndex++; + } + + if (type) { + whereClause += ` AND type = $${paramIndex}`; + params.push(type); + paramIndex++; + } + + if (subcategory) { + whereClause += ` AND subcategory = $${paramIndex}`; + params.push(subcategory); + paramIndex++; + } + + if (brandName) { + whereClause += ` AND brand_name ILIKE $${paramIndex}`; + params.push(`%${brandName}%`); + paramIndex++; + } + + if (search) { + whereClause += ` AND (name ILIKE $${paramIndex} OR brand_name ILIKE $${paramIndex})`; + params.push(`%${search}%`); + paramIndex++; + } + + params.push(parseInt(limit as string, 10), parseInt(offset as string, 10)); + + const { rows } = await query( + ` + SELECT + p.*, + d.name as store_name, + d.city as store_city + FROM dutchie_products p + JOIN dispensaries d ON p.dispensary_id = d.id + ${whereClause} + ORDER BY p.updated_at DESC + LIMIT $${paramIndex} OFFSET $${paramIndex + 1} + `, + params + ); + + // Get total count + const { rows: countRows } = await query( + `SELECT COUNT(*) as total FROM dutchie_products ${whereClause}`, + params.slice(0, -2) + ); + + res.json({ + products: rows, + total: parseInt(countRows[0]?.total || '0', 10), + limit: parseInt(limit as string, 10), + offset: parseInt(offset as string, 10), + }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/dutchie-az/products/:id + * Get a single product with its latest snapshot + */ +router.get('/products/:id', async (req: Request, res: Response) => { + try { + const { id } = req.params; + + const { rows: productRows } = await query( + ` + SELECT + p.*, + d.name as store_name, + d.city as store_city, + d.slug as store_slug + FROM dutchie_products p + JOIN dispensaries d ON p.dispensary_id = d.id + WHERE p.id = $1 + `, + [id] + ); + + if (productRows.length === 0) { + return res.status(404).json({ error: 'Product not found' }); + } + + // Get latest snapshot + const { rows: snapshotRows } = await query( + ` + SELECT * FROM dutchie_product_snapshots + WHERE dutchie_product_id = $1 + ORDER BY crawled_at DESC + LIMIT 1 + `, + [id] + ); + + res.json({ + product: productRows[0], + latestSnapshot: snapshotRows[0] || null, + }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/dutchie-az/products/:id/snapshots + * Get snapshot history for a product + */ +router.get('/products/:id/snapshots', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const { limit = '50', offset = '0' } = req.query; + + const { rows } = await query( + ` + SELECT * FROM dutchie_product_snapshots + WHERE dutchie_product_id = $1 + ORDER BY crawled_at DESC + LIMIT $2 OFFSET $3 + `, + [id, parseInt(limit as string, 10), parseInt(offset as string, 10)] + ); + + res.json({ snapshots: rows }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +// ============================================================ +// CATEGORIES +// ============================================================ + +/** + * GET /api/dutchie-az/categories + * Get all categories with counts + */ +router.get('/categories', async (_req: Request, res: Response) => { + try { + const { rows } = await query(`SELECT * FROM v_categories`); + res.json({ categories: rows }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +// ============================================================ +// BRANDS +// ============================================================ + +/** + * GET /api/dutchie-az/brands + * Get all brands with counts + */ +router.get('/brands', async (req: Request, res: Response) => { + try { + const { limit = '100', offset = '0' } = req.query; + + const { rows } = await query( + ` + SELECT * FROM v_brands + LIMIT $1 OFFSET $2 + `, + [parseInt(limit as string, 10), parseInt(offset as string, 10)] + ); + + res.json({ brands: rows }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +// ============================================================ +// ADMIN ACTIONS +// ============================================================ + +/** + * POST /api/dutchie-az/admin/init-schema + * Initialize the database schema + */ +router.post('/admin/init-schema', async (_req: Request, res: Response) => { + try { + await ensureSchema(); + res.json({ success: true, message: 'Schema initialized' }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * POST /api/dutchie-az/admin/import-azdhs + * Import dispensaries from AZDHS (main database) + */ +router.post('/admin/import-azdhs', async (_req: Request, res: Response) => { + try { + const result = await importAZDHSDispensaries(); + res.json(result); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * POST /api/dutchie-az/admin/resolve-platform-ids + * Resolve Dutchie platform IDs for all dispensaries + */ +router.post('/admin/resolve-platform-ids', async (_req: Request, res: Response) => { + try { + const result = await resolvePlatformDispensaryIds(); + res.json(result); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * POST /api/dutchie-az/admin/crawl-store/:id + * Crawl a single store + */ +router.post('/admin/crawl-store/:id', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const { pricingType = 'rec', useBothModes = true } = req.body; + + const dispensary = await getDispensaryById(parseInt(id, 10)); + if (!dispensary) { + return res.status(404).json({ error: 'Store not found' }); + } + + const result = await crawlDispensaryProducts(dispensary, pricingType, { useBothModes }); + res.json(result); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/dutchie-az/admin/stats + * Get import and crawl statistics + */ +router.get('/admin/stats', async (_req: Request, res: Response) => { + try { + const importStats = await getImportStats(); + + // Get stock status distribution + const { rows: stockStats } = await query(` + SELECT + stock_status, + COUNT(*) as count + FROM dutchie_products + GROUP BY stock_status + `); + + // Get recent crawl jobs + const { rows: recentJobs } = await query(` + SELECT * FROM crawl_jobs + ORDER BY created_at DESC + LIMIT 10 + `); + + res.json({ + import: importStats, + stockDistribution: stockStats, + recentJobs, + }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +// ============================================================ +// SCHEDULER ADMIN +// ============================================================ + +/** + * GET /api/dutchie-az/admin/scheduler/status + * Get scheduler status + */ +router.get('/admin/scheduler/status', async (_req: Request, res: Response) => { + try { + const status = getSchedulerStatus(); + res.json(status); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * POST /api/dutchie-az/admin/scheduler/start + * Start the scheduler + */ +router.post('/admin/scheduler/start', async (_req: Request, res: Response) => { + try { + startScheduler(); + res.json({ success: true, message: 'Scheduler started' }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * POST /api/dutchie-az/admin/scheduler/stop + * Stop the scheduler + */ +router.post('/admin/scheduler/stop', async (_req: Request, res: Response) => { + try { + stopScheduler(); + res.json({ success: true, message: 'Scheduler stopped' }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * POST /api/dutchie-az/admin/scheduler/trigger + * Trigger an immediate crawl cycle + */ +router.post('/admin/scheduler/trigger', async (_req: Request, res: Response) => { + try { + const result = await triggerImmediateCrawl(); + res.json(result); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * POST /api/dutchie-az/admin/crawl/:id + * Crawl a single dispensary with job tracking + */ +router.post('/admin/crawl/:id', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const { pricingType = 'rec', useBothModes = true } = req.body; + + // Fetch the dispensary first + const dispensary = await getDispensaryById(parseInt(id, 10)); + if (!dispensary) { + return res.status(404).json({ error: 'Dispensary not found' }); + } + + const result = await crawlSingleDispensary(dispensary, pricingType, { useBothModes }); + res.json(result); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/dutchie-az/admin/jobs + * Get crawl job history + */ +router.get('/admin/jobs', async (req: Request, res: Response) => { + try { + const { status, dispensaryId, limit = '50', offset = '0' } = req.query; + + let whereClause = 'WHERE 1=1'; + const params: any[] = []; + let paramIndex = 1; + + if (status) { + whereClause += ` AND status = $${paramIndex}`; + params.push(status); + paramIndex++; + } + + if (dispensaryId) { + whereClause += ` AND dispensary_id = $${paramIndex}`; + params.push(parseInt(dispensaryId as string, 10)); + paramIndex++; + } + + params.push(parseInt(limit as string, 10), parseInt(offset as string, 10)); + + const { rows } = await query( + ` + SELECT + cj.*, + d.name as dispensary_name, + d.slug as dispensary_slug + FROM crawl_jobs cj + LEFT JOIN dispensaries d ON cj.dispensary_id = d.id + ${whereClause} + ORDER BY cj.created_at DESC + LIMIT $${paramIndex} OFFSET $${paramIndex + 1} + `, + params + ); + + const { rows: countRows } = await query( + `SELECT COUNT(*) as total FROM crawl_jobs ${whereClause}`, + params.slice(0, -2) + ); + + res.json({ + jobs: rows, + total: parseInt(countRows[0]?.total || '0', 10), + limit: parseInt(limit as string, 10), + offset: parseInt(offset as string, 10), + }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +// ============================================================ +// SCHEDULES (CONFIG CRUD) +// ============================================================ + +/** + * GET /api/dutchie-az/admin/schedules + * Get all schedule configurations + */ +router.get('/admin/schedules', async (_req: Request, res: Response) => { + try { + const schedules = await getAllSchedules(); + res.json({ schedules }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/dutchie-az/admin/schedules/:id + * Get a single schedule by ID + */ +router.get('/admin/schedules/:id', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const schedule = await getScheduleById(parseInt(id, 10)); + + if (!schedule) { + return res.status(404).json({ error: 'Schedule not found' }); + } + + res.json(schedule); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * POST /api/dutchie-az/admin/schedules + * Create a new schedule + */ +router.post('/admin/schedules', async (req: Request, res: Response) => { + try { + const { + jobName, + description, + enabled = true, + baseIntervalMinutes, + jitterMinutes, + jobConfig, + startImmediately = false, + } = req.body; + + if (!jobName || typeof baseIntervalMinutes !== 'number' || typeof jitterMinutes !== 'number') { + return res.status(400).json({ + error: 'jobName, baseIntervalMinutes, and jitterMinutes are required', + }); + } + + const schedule = await createSchedule({ + jobName, + description, + enabled, + baseIntervalMinutes, + jitterMinutes, + jobConfig, + startImmediately, + }); + + res.status(201).json(schedule); + } catch (error: any) { + // Handle unique constraint violation + if (error.code === '23505') { + return res.status(409).json({ error: `Schedule "${req.body.jobName}" already exists` }); + } + res.status(500).json({ error: error.message }); + } +}); + +/** + * PUT /api/dutchie-az/admin/schedules/:id + * Update a schedule + */ +router.put('/admin/schedules/:id', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const { description, enabled, baseIntervalMinutes, jitterMinutes, jobConfig } = req.body; + + const schedule = await updateSchedule(parseInt(id, 10), { + description, + enabled, + baseIntervalMinutes, + jitterMinutes, + jobConfig, + }); + + if (!schedule) { + return res.status(404).json({ error: 'Schedule not found' }); + } + + res.json(schedule); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * DELETE /api/dutchie-az/admin/schedules/:id + * Delete a schedule + */ +router.delete('/admin/schedules/:id', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const deleted = await deleteSchedule(parseInt(id, 10)); + + if (!deleted) { + return res.status(404).json({ error: 'Schedule not found' }); + } + + res.json({ success: true, message: 'Schedule deleted' }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * POST /api/dutchie-az/admin/schedules/:id/trigger + * Trigger immediate execution of a schedule + */ +router.post('/admin/schedules/:id/trigger', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const result = await triggerScheduleNow(parseInt(id, 10)); + + if (!result.success) { + return res.status(400).json({ error: result.message }); + } + + res.json(result); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * POST /api/dutchie-az/admin/schedules/init + * Initialize default schedules if they don't exist + */ +router.post('/admin/schedules/init', async (_req: Request, res: Response) => { + try { + await initializeDefaultSchedules(); + const schedules = await getAllSchedules(); + res.json({ success: true, schedules }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/dutchie-az/admin/schedules/:id/logs + * Get run logs for a specific schedule + */ +router.get('/admin/schedules/:id/logs', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const { limit = '50', offset = '0' } = req.query; + + const result = await getRunLogs({ + scheduleId: parseInt(id, 10), + limit: parseInt(limit as string, 10), + offset: parseInt(offset as string, 10), + }); + + res.json(result); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/dutchie-az/admin/run-logs + * Get all run logs with filtering + */ +router.get('/admin/run-logs', async (req: Request, res: Response) => { + try { + const { scheduleId, jobName, limit = '50', offset = '0' } = req.query; + + const result = await getRunLogs({ + scheduleId: scheduleId ? parseInt(scheduleId as string, 10) : undefined, + jobName: jobName as string | undefined, + limit: parseInt(limit as string, 10), + offset: parseInt(offset as string, 10), + }); + + res.json(result); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +// ============================================================ +// DEBUG ROUTES +// ============================================================ + +/** + * GET /api/dutchie-az/debug/summary + * Get overall system summary for debugging + */ +router.get('/debug/summary', async (_req: Request, res: Response) => { + try { + // Get table counts + const { rows: tableCounts } = await query(` + SELECT + (SELECT COUNT(*) FROM dispensaries) as dispensary_count, + (SELECT COUNT(*) FROM dispensaries WHERE platform_dispensary_id IS NOT NULL) as dispensaries_with_platform_id, + (SELECT COUNT(*) FROM dutchie_products) as product_count, + (SELECT COUNT(*) FROM dutchie_product_snapshots) as snapshot_count, + (SELECT COUNT(*) FROM crawl_jobs) as job_count, + (SELECT COUNT(*) FROM crawl_jobs WHERE status = 'completed') as completed_jobs, + (SELECT COUNT(*) FROM crawl_jobs WHERE status = 'failed') as failed_jobs + `); + + // Get stock status distribution + const { rows: stockDistribution } = await query(` + SELECT + stock_status, + COUNT(*) as count + FROM dutchie_products + GROUP BY stock_status + ORDER BY count DESC + `); + + // Get products by dispensary + const { rows: productsByDispensary } = await query(` + SELECT + d.id, + d.name, + d.slug, + d.platform_dispensary_id, + COUNT(p.id) as product_count, + MAX(p.updated_at) as last_product_update + FROM dispensaries d + LEFT JOIN dutchie_products p ON d.id = p.dispensary_id + WHERE d.state = 'AZ' + GROUP BY d.id, d.name, d.slug, d.platform_dispensary_id + ORDER BY product_count DESC + LIMIT 20 + `); + + // Get recent snapshots + const { rows: recentSnapshots } = await query(` + SELECT + s.id, + s.dutchie_product_id, + p.name as product_name, + d.name as dispensary_name, + s.crawled_at + FROM dutchie_product_snapshots s + JOIN dutchie_products p ON s.dutchie_product_id = p.id + JOIN dispensaries d ON p.dispensary_id = d.id + ORDER BY s.crawled_at DESC + LIMIT 10 + `); + + res.json({ + tableCounts: tableCounts[0], + stockDistribution, + productsByDispensary, + recentSnapshots, + }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/dutchie-az/debug/store/:id + * Get detailed debug info for a specific store + */ +router.get('/debug/store/:id', async (req: Request, res: Response) => { + try { + const { id } = req.params; + + // Get dispensary info + const { rows: dispensaryRows } = await query( + `SELECT * FROM dispensaries WHERE id = $1`, + [parseInt(id, 10)] + ); + + if (dispensaryRows.length === 0) { + return res.status(404).json({ error: 'Store not found' }); + } + + const dispensary = dispensaryRows[0]; + + // Get product stats + const { rows: productStats } = await query( + ` + SELECT + COUNT(*) as total_products, + COUNT(*) FILTER (WHERE stock_status = 'in_stock') as in_stock, + COUNT(*) FILTER (WHERE stock_status = 'out_of_stock') as out_of_stock, + COUNT(*) FILTER (WHERE stock_status = 'unknown') as unknown, + COUNT(*) FILTER (WHERE missing_from_feed = true) as missing_from_feed, + MIN(first_seen_at) as earliest_product, + MAX(last_seen_at) as latest_product, + MAX(updated_at) as last_update + FROM dutchie_products + WHERE dispensary_id = $1 + `, + [id] + ); + + // Get snapshot stats + const { rows: snapshotStats } = await query( + ` + SELECT + COUNT(*) as total_snapshots, + MIN(crawled_at) as earliest_snapshot, + MAX(crawled_at) as latest_snapshot, + COUNT(DISTINCT dutchie_product_id) as products_with_snapshots + FROM dutchie_product_snapshots s + JOIN dutchie_products p ON s.dutchie_product_id = p.id + WHERE p.dispensary_id = $1 + `, + [id] + ); + + // Get crawl job history + const { rows: recentJobs } = await query( + ` + SELECT + id, + status, + started_at, + completed_at, + products_found, + products_inserted, + products_updated, + error_message, + created_at + FROM crawl_jobs + WHERE dispensary_id = $1 + ORDER BY created_at DESC + LIMIT 10 + `, + [id] + ); + + // Get sample products (5 in-stock, 5 out-of-stock) + const { rows: sampleInStock } = await query( + ` + SELECT + p.id, + p.name, + p.brand_name, + p.type, + p.stock_status, + p.updated_at + FROM dutchie_products p + WHERE p.dispensary_id = $1 AND p.stock_status = 'in_stock' + ORDER BY p.updated_at DESC + LIMIT 5 + `, + [id] + ); + + const { rows: sampleOutOfStock } = await query( + ` + SELECT + p.id, + p.name, + p.brand_name, + p.type, + p.stock_status, + p.updated_at + FROM dutchie_products p + WHERE p.dispensary_id = $1 AND p.stock_status = 'out_of_stock' + ORDER BY p.updated_at DESC + LIMIT 5 + `, + [id] + ); + + // Get categories breakdown + const { rows: categories } = await query( + ` + SELECT + type, + subcategory, + COUNT(*) as count + FROM dutchie_products + WHERE dispensary_id = $1 + GROUP BY type, subcategory + ORDER BY count DESC + `, + [id] + ); + + res.json({ + dispensary, + productStats: productStats[0], + snapshotStats: snapshotStats[0], + recentJobs, + sampleProducts: { + inStock: sampleInStock, + outOfStock: sampleOutOfStock, + }, + categories, + }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } +}); + +export default router; diff --git a/backend/src/dutchie-az/services/azdhs-import.ts b/backend/src/dutchie-az/services/azdhs-import.ts new file mode 100644 index 00000000..a0b16af7 --- /dev/null +++ b/backend/src/dutchie-az/services/azdhs-import.ts @@ -0,0 +1,258 @@ +/** + * AZDHS Import Service + * + * Imports Arizona dispensaries from the main database's dispensaries table + * (which was populated from AZDHS data) into the isolated Dutchie AZ database. + * + * This establishes the canonical list of AZ dispensaries to match against Dutchie. + */ + +import { Pool } from 'pg'; +import { query as dutchieQuery } from '../db/connection'; +import { Dispensary } from '../types'; + +// Main database connection (source of AZDHS data) +const MAIN_DATABASE_URL = + process.env.DATABASE_URL || + 'postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus'; + +/** + * AZDHS dispensary record from the main database + */ +interface AZDHSDispensary { + id: number; + azdhs_id: number; + name: string; + company_name?: string; + address?: string; + city: string; + state: string; + zip?: string; + latitude?: number; + longitude?: number; + dba_name?: string; + phone?: string; + email?: string; + website?: string; + google_rating?: string; + google_review_count?: number; + slug: string; + menu_provider?: string; + product_provider?: string; + created_at: Date; + updated_at: Date; +} + +/** + * Import result statistics + */ +interface ImportResult { + total: number; + imported: number; + skipped: number; + errors: string[]; +} + +/** + * Create a temporary connection to the main database + */ +function getMainDBPool(): Pool { + return new Pool({ + connectionString: MAIN_DATABASE_URL, + max: 5, + idleTimeoutMillis: 30000, + connectionTimeoutMillis: 5000, + }); +} + +/** + * Fetch all AZ dispensaries from the main database + */ +async function fetchAZDHSDispensaries(): Promise { + const pool = getMainDBPool(); + + try { + const result = await pool.query(` + SELECT + id, azdhs_id, name, company_name, address, city, state, zip, + latitude, longitude, dba_name, phone, email, website, + google_rating, google_review_count, slug, + menu_provider, product_provider, + created_at, updated_at + FROM dispensaries + WHERE state = 'AZ' + ORDER BY id + `); + + return result.rows; + } finally { + await pool.end(); + } +} + +/** + * Import a single dispensary into the Dutchie AZ database + */ +async function importDispensary(disp: AZDHSDispensary): Promise { + const result = await dutchieQuery<{ id: number }>( + ` + INSERT INTO dispensaries ( + platform, name, slug, city, state, postal_code, address, + latitude, longitude, is_delivery, is_pickup, raw_metadata, updated_at + ) VALUES ( + $1, $2, $3, $4, $5, $6, $7, + $8, $9, $10, $11, $12, NOW() + ) + ON CONFLICT (platform, slug, city, state) DO UPDATE SET + name = EXCLUDED.name, + postal_code = EXCLUDED.postal_code, + address = EXCLUDED.address, + latitude = EXCLUDED.latitude, + longitude = EXCLUDED.longitude, + raw_metadata = EXCLUDED.raw_metadata, + updated_at = NOW() + RETURNING id + `, + [ + 'dutchie', // Will be updated when Dutchie match is found + disp.dba_name || disp.name, + disp.slug, + disp.city, + disp.state, + disp.zip, + disp.address, + disp.latitude, + disp.longitude, + false, // is_delivery - unknown + true, // is_pickup - assume true + JSON.stringify({ + azdhs_id: disp.azdhs_id, + main_db_id: disp.id, + company_name: disp.company_name, + phone: disp.phone, + email: disp.email, + website: disp.website, + google_rating: disp.google_rating, + google_review_count: disp.google_review_count, + menu_provider: disp.menu_provider, + product_provider: disp.product_provider, + }), + ] + ); + + return result.rows[0].id; +} + +/** + * Import all AZDHS dispensaries into the Dutchie AZ database + */ +export async function importAZDHSDispensaries(): Promise { + console.log('[AZDHS Import] Starting import from main database...'); + + const result: ImportResult = { + total: 0, + imported: 0, + skipped: 0, + errors: [], + }; + + try { + const dispensaries = await fetchAZDHSDispensaries(); + result.total = dispensaries.length; + + console.log(`[AZDHS Import] Found ${dispensaries.length} AZ dispensaries in main DB`); + + for (const disp of dispensaries) { + try { + const id = await importDispensary(disp); + result.imported++; + console.log(`[AZDHS Import] Imported: ${disp.name} (${disp.city}) -> id=${id}`); + } catch (error: any) { + if (error.message.includes('duplicate')) { + result.skipped++; + } else { + result.errors.push(`${disp.name}: ${error.message}`); + } + } + } + } catch (error: any) { + result.errors.push(`Failed to fetch from main DB: ${error.message}`); + } + + console.log(`[AZDHS Import] Complete: ${result.imported} imported, ${result.skipped} skipped, ${result.errors.length} errors`); + return result; +} + +/** + * Import dispensaries from JSON file (backup export) + */ +export async function importFromJSON(jsonPath: string): Promise { + console.log(`[AZDHS Import] Importing from JSON: ${jsonPath}`); + + const result: ImportResult = { + total: 0, + imported: 0, + skipped: 0, + errors: [], + }; + + try { + const fs = await import('fs/promises'); + const data = await fs.readFile(jsonPath, 'utf-8'); + const dispensaries: AZDHSDispensary[] = JSON.parse(data); + + result.total = dispensaries.length; + console.log(`[AZDHS Import] Found ${dispensaries.length} dispensaries in JSON file`); + + for (const disp of dispensaries) { + try { + const id = await importDispensary(disp); + result.imported++; + } catch (error: any) { + if (error.message.includes('duplicate')) { + result.skipped++; + } else { + result.errors.push(`${disp.name}: ${error.message}`); + } + } + } + } catch (error: any) { + result.errors.push(`Failed to read JSON file: ${error.message}`); + } + + console.log(`[AZDHS Import] Complete: ${result.imported} imported, ${result.skipped} skipped`); + return result; +} + +/** + * Get import statistics + */ +export async function getImportStats(): Promise<{ + totalDispensaries: number; + withPlatformIds: number; + withoutPlatformIds: number; + lastImportedAt?: Date; +}> { + const { rows } = await dutchieQuery<{ + total: string; + with_platform_id: string; + without_platform_id: string; + last_updated: Date; + }>(` + SELECT + COUNT(*) as total, + COUNT(platform_dispensary_id) as with_platform_id, + COUNT(*) - COUNT(platform_dispensary_id) as without_platform_id, + MAX(updated_at) as last_updated + FROM dispensaries + WHERE state = 'AZ' + `); + + const stats = rows[0]; + return { + totalDispensaries: parseInt(stats.total, 10), + withPlatformIds: parseInt(stats.with_platform_id, 10), + withoutPlatformIds: parseInt(stats.without_platform_id, 10), + lastImportedAt: stats.last_updated, + }; +} diff --git a/backend/src/dutchie-az/services/discovery.ts b/backend/src/dutchie-az/services/discovery.ts new file mode 100644 index 00000000..e7afa95b --- /dev/null +++ b/backend/src/dutchie-az/services/discovery.ts @@ -0,0 +1,230 @@ +/** + * Dutchie AZ Discovery Service + * + * Discovers and manages dispensaries from Dutchie for Arizona. + */ + +import { query, getClient } from '../db/connection'; +import { discoverArizonaDispensaries, resolveDispensaryId } from './graphql-client'; +import { Dispensary } from '../types'; + +/** + * Upsert a dispensary record + */ +async function upsertDispensary(dispensary: Partial): Promise { + const result = await query<{ id: number }>( + ` + INSERT INTO dispensaries ( + platform, name, slug, city, state, postal_code, address, + latitude, longitude, platform_dispensary_id, + is_delivery, is_pickup, raw_metadata, updated_at + ) VALUES ( + $1, $2, $3, $4, $5, $6, $7, + $8, $9, $10, + $11, $12, $13, NOW() + ) + ON CONFLICT (platform, slug, city, state) DO UPDATE SET + name = EXCLUDED.name, + postal_code = EXCLUDED.postal_code, + address = EXCLUDED.address, + latitude = EXCLUDED.latitude, + longitude = EXCLUDED.longitude, + platform_dispensary_id = COALESCE(EXCLUDED.platform_dispensary_id, dispensaries.platform_dispensary_id), + is_delivery = EXCLUDED.is_delivery, + is_pickup = EXCLUDED.is_pickup, + raw_metadata = EXCLUDED.raw_metadata, + updated_at = NOW() + RETURNING id + `, + [ + dispensary.platform || 'dutchie', + dispensary.name, + dispensary.slug, + dispensary.city, + dispensary.state || 'AZ', + dispensary.postalCode, + dispensary.address, + dispensary.latitude, + dispensary.longitude, + dispensary.platformDispensaryId, + dispensary.isDelivery || false, + dispensary.isPickup || true, + dispensary.rawMetadata ? JSON.stringify(dispensary.rawMetadata) : null, + ] + ); + + return result.rows[0].id; +} + +/** + * Normalize a raw discovery result to Dispensary + */ +function normalizeDispensary(raw: any): Partial { + return { + platform: 'dutchie', + name: raw.name || raw.Name || '', + slug: raw.slug || raw.cName || raw.id || '', + city: raw.city || raw.address?.city || '', + state: 'AZ', + postalCode: raw.postalCode || raw.address?.postalCode || raw.address?.zip, + address: raw.streetAddress || raw.address?.streetAddress, + latitude: raw.latitude || raw.location?.lat, + longitude: raw.longitude || raw.location?.lng, + platformDispensaryId: raw.dispensaryId || raw.id || null, + isDelivery: raw.isDelivery || raw.delivery || false, + isPickup: raw.isPickup || raw.pickup || true, + rawMetadata: raw, + }; +} + +/** + * Import dispensaries from the existing dispensaries table (from AZDHS data) + * This creates records in the dutchie_az database for AZ dispensaries + */ +export async function importFromExistingDispensaries(): Promise<{ imported: number }> { + console.log('[Discovery] Importing from existing dispensaries table...'); + + // This is a workaround - we'll use the dispensaries we already know about + // and try to resolve their Dutchie IDs + const knownDispensaries = [ + { name: 'Deeply Rooted', slug: 'AZ-Deeply-Rooted', city: 'Phoenix', state: 'AZ' }, + { name: 'Curaleaf Gilbert', slug: 'curaleaf-gilbert', city: 'Gilbert', state: 'AZ' }, + { name: 'Zen Leaf Prescott', slug: 'AZ-zen-leaf-prescott', city: 'Prescott', state: 'AZ' }, + // Add more known Dutchie stores here + ]; + + let imported = 0; + + for (const disp of knownDispensaries) { + try { + const id = await upsertDispensary({ + platform: 'dutchie', + name: disp.name, + slug: disp.slug, + city: disp.city, + state: disp.state, + }); + imported++; + console.log(`[Discovery] Imported: ${disp.name} (id=${id})`); + } catch (error: any) { + console.error(`[Discovery] Failed to import ${disp.name}:`, error.message); + } + } + + return { imported }; +} + +/** + * Discover all Arizona Dutchie dispensaries via GraphQL + */ +export async function discoverDispensaries(): Promise<{ discovered: number; errors: string[] }> { + console.log('[Discovery] Starting Arizona dispensary discovery...'); + const errors: string[] = []; + let discovered = 0; + + try { + const rawDispensaries = await discoverArizonaDispensaries(); + console.log(`[Discovery] Found ${rawDispensaries.length} dispensaries from GraphQL`); + + for (const raw of rawDispensaries) { + try { + const normalized = normalizeDispensary(raw); + if (normalized.name && normalized.slug && normalized.city) { + await upsertDispensary(normalized); + discovered++; + } + } catch (error: any) { + errors.push(`${raw.name || raw.slug}: ${error.message}`); + } + } + } catch (error: any) { + errors.push(`Discovery failed: ${error.message}`); + } + + console.log(`[Discovery] Completed: ${discovered} dispensaries, ${errors.length} errors`); + return { discovered, errors }; +} + +/** + * Resolve platform dispensary IDs for all dispensaries that don't have one + */ +export async function resolvePlatformDispensaryIds(): Promise<{ resolved: number; failed: number }> { + console.log('[Discovery] Resolving platform dispensary IDs...'); + + const { rows: dispensaries } = await query( + ` + SELECT * FROM dispensaries + WHERE platform = 'dutchie' AND platform_dispensary_id IS NULL + ORDER BY id + ` + ); + + let resolved = 0; + let failed = 0; + + for (const dispensary of dispensaries) { + try { + console.log(`[Discovery] Resolving ID for: ${dispensary.name} (${dispensary.slug})`); + const platformId = await resolveDispensaryId(dispensary.slug); + + if (platformId) { + await query( + ` + UPDATE dispensaries SET platform_dispensary_id = $1, updated_at = NOW() + WHERE id = $2 + `, + [platformId, dispensary.id] + ); + resolved++; + console.log(`[Discovery] Resolved: ${dispensary.slug} -> ${platformId}`); + } else { + failed++; + console.log(`[Discovery] Could not resolve: ${dispensary.slug}`); + } + + // Delay between requests + await new Promise((r) => setTimeout(r, 2000)); + } catch (error: any) { + failed++; + console.error(`[Discovery] Error resolving ${dispensary.slug}:`, error.message); + } + } + + console.log(`[Discovery] Completed: ${resolved} resolved, ${failed} failed`); + return { resolved, failed }; +} + +/** + * Get all dispensaries + */ +export async function getAllDispensaries(): Promise { + const { rows } = await query( + `SELECT * FROM dispensaries WHERE platform = 'dutchie' ORDER BY name` + ); + return rows; +} + +/** + * Get dispensary by ID + */ +export async function getDispensaryById(id: number): Promise { + const { rows } = await query( + `SELECT * FROM dispensaries WHERE id = $1`, + [id] + ); + return rows[0] || null; +} + +/** + * Get dispensaries with platform IDs (ready for crawling) + */ +export async function getDispensariesWithPlatformIds(): Promise { + const { rows } = await query( + ` + SELECT * FROM dispensaries + WHERE platform = 'dutchie' AND platform_dispensary_id IS NOT NULL + ORDER BY name + ` + ); + return rows; +} diff --git a/backend/src/dutchie-az/services/graphql-client.ts b/backend/src/dutchie-az/services/graphql-client.ts new file mode 100644 index 00000000..a3fca3b4 --- /dev/null +++ b/backend/src/dutchie-az/services/graphql-client.ts @@ -0,0 +1,666 @@ +/** + * Dutchie GraphQL Client + * + * Makes GraphQL requests to Dutchie's API using Puppeteer to bypass Cloudflare. + * Uses in-page fetch to maintain browser session/cookies. + * + * Key features: + * - Browser session reuse between Mode A and Mode B (single browser per store) + * - Config-driven GraphQL hashes + * - POST fallback when GET fails with 405 + * - Pagination retry logic + * - Proper termination on incomplete pages + */ + +import puppeteer from 'puppeteer-extra'; +import type { Browser, Page } from 'puppeteer'; +import StealthPlugin from 'puppeteer-extra-plugin-stealth'; +import { + DutchieRawProduct, + DutchiePOSChild, + FilteredProductsVariables, + CrawlMode, +} from '../types'; +import { dutchieConfig, GRAPHQL_HASHES, ARIZONA_CENTERPOINTS } from '../config/dutchie'; + +puppeteer.use(StealthPlugin()); + +// Re-export for backward compatibility +export { GRAPHQL_HASHES, ARIZONA_CENTERPOINTS }; + +interface BrowserSession { + browser: Browser; + page: Page; + dispensaryId?: string; +} + +// ============================================================ +// BROWSER SESSION MANAGEMENT +// ============================================================ + +/** + * Launch a browser session for Dutchie GraphQL requests + */ +async function createBrowserSession(menuUrl?: string): Promise { + const browser = await puppeteer.launch({ + headless: 'new', + args: dutchieConfig.browserArgs, + }); + + const page = await browser.newPage(); + + // Set up stealth + await page.setUserAgent(dutchieConfig.userAgent); + await page.setViewport({ width: 1920, height: 1080 }); + await page.evaluateOnNewDocument(() => { + Object.defineProperty(navigator, 'webdriver', { get: () => false }); + (window as any).chrome = { runtime: {} }; + }); + + // Navigate to establish session + const url = menuUrl || 'https://dutchie.com/dispensaries'; + console.log(`[GraphQL Client] Loading ${url} to establish session...`); + + await page.goto(url, { + waitUntil: 'networkidle2', + timeout: dutchieConfig.navigationTimeout, + }); + await new Promise((r) => setTimeout(r, dutchieConfig.pageLoadDelay)); + + // Try to get dispensary ID from page if it's a menu page + let dispensaryId: string | undefined; + if (menuUrl && menuUrl.includes('embedded-menu')) { + dispensaryId = await page.evaluate(() => (window as any).reactEnv?.dispensaryId); + } + + return { browser, page, dispensaryId }; +} + +/** + * Close browser session + */ +async function closeBrowserSession(session: BrowserSession): Promise { + await session.browser.close(); +} + +// ============================================================ +// GRAPHQL EXECUTION WITH POST FALLBACK +// ============================================================ + +/** + * Execute a GraphQL query from within the browser context + * Supports GET (default) with POST fallback on 405 errors + */ +async function executeGraphQL( + page: Page, + operationName: string, + variables: any, + hash: string, + endpoint: string = 'https://dutchie.com/graphql' +): Promise { + const headers = dutchieConfig.defaultHeaders; + const preferGet = dutchieConfig.preferGet; + const enablePostFallback = dutchieConfig.enablePostFallback; + + return page.evaluate( + async ( + opName: string, + vars: any, + queryHash: string, + url: string, + hdrs: Record, + useGet: boolean, + allowPostFallback: boolean + ) => { + const doFetch = async (method: 'GET' | 'POST'): Promise => { + if (method === 'GET') { + const qs = new URLSearchParams({ + operationName: opName, + variables: JSON.stringify(vars), + extensions: JSON.stringify({ + persistedQuery: { version: 1, sha256Hash: queryHash }, + }), + }); + return fetch(`${url}?${qs.toString()}`, { + method: 'GET', + headers: { + ...hdrs, + 'content-type': 'application/json', + }, + credentials: 'include', + }); + } else { + // POST request with full body + return fetch(url, { + method: 'POST', + headers: { + ...hdrs, + 'content-type': 'application/json', + }, + credentials: 'include', + body: JSON.stringify({ + operationName: opName, + variables: vars, + extensions: { + persistedQuery: { version: 1, sha256Hash: queryHash }, + }, + }), + }); + } + }; + + // Try GET first if preferred + if (useGet) { + const response = await doFetch('GET'); + + // If GET fails with 405 and POST fallback is enabled, try POST + if (response.status === 405 && allowPostFallback) { + console.log('[GraphQL] GET returned 405, falling back to POST'); + const postResponse = await doFetch('POST'); + if (!postResponse.ok) { + throw new Error(`HTTP ${postResponse.status} (POST fallback)`); + } + return postResponse.json(); + } + + if (!response.ok) { + throw new Error(`HTTP ${response.status}`); + } + return response.json(); + } else { + // Use POST directly + const response = await doFetch('POST'); + if (!response.ok) { + throw new Error(`HTTP ${response.status}`); + } + return response.json(); + } + }, + operationName, + variables, + hash, + endpoint, + headers, + preferGet, + enablePostFallback + ); +} + +// ============================================================ +// DISPENSARY ID RESOLUTION +// ============================================================ + +/** + * Resolve a dispensary slug to its internal platform ID + */ +export async function resolveDispensaryId(slug: string): Promise { + const session = await createBrowserSession(`https://dutchie.com/embedded-menu/${slug}`); + + try { + // First check if we got it from the page context + if (session.dispensaryId) { + console.log(`[GraphQL Client] Got dispensaryId from page: ${session.dispensaryId}`); + return session.dispensaryId; + } + + // Otherwise try the GetAddressBasedDispensaryData query + const result = await executeGraphQL( + session.page, + 'GetAddressBasedDispensaryData', + { input: { dispensaryId: slug } }, + GRAPHQL_HASHES.GetAddressBasedDispensaryData + ); + + const dispensaryId = result?.data?.getAddressBasedDispensaryData?.dispensaryId; + console.log(`[GraphQL Client] Resolved ${slug} -> ${dispensaryId}`); + return dispensaryId || null; + } catch (error: any) { + console.error(`[GraphQL Client] Failed to resolve ${slug}:`, error.message); + return null; + } finally { + await closeBrowserSession(session); + } +} + +// ============================================================ +// FILTER VARIABLE BUILDING +// ============================================================ + +/** + * Build GraphQL variables based on crawl mode + * + * MODE A - "UI parity": Matches what Dutchie website shows + * - Status: 'Active' + * - removeProductsBelowOptionThresholds: true (default behavior) + * - bypassOnlineThresholds: false + * + * MODE B - "MAX COVERAGE": Tries to get out-of-stock products + * - Status: undefined (no filter) + * - removeProductsBelowOptionThresholds: false + * - bypassOnlineThresholds: true + */ +function buildFilterVariables( + platformDispensaryId: string, + pricingType: 'rec' | 'med', + crawlMode: CrawlMode, + pageNum: number, + perPage: number +): FilteredProductsVariables { + if (crawlMode === 'mode_a') { + // UI parity mode + return { + includeEnterpriseSpecials: false, + productsFilter: { + dispensaryId: platformDispensaryId, + pricingType, + Status: 'Active', + types: [], + strainTypes: [], + subcategories: [], + useCache: false, + isDefaultSort: true, + sortBy: 'popularSortIdx', + sortDirection: 1, + bypassOnlineThresholds: false, + isKioskMenu: false, + removeProductsBelowOptionThresholds: true, + }, + page: pageNum, + perPage, + }; + } else { + // MAX COVERAGE mode (mode_b) + return { + includeEnterpriseSpecials: false, + productsFilter: { + dispensaryId: platformDispensaryId, + pricingType, + // No Status filter - try to get all products + types: [], + strainTypes: [], + subcategories: [], + useCache: false, + isDefaultSort: true, + sortBy: 'popularSortIdx', + sortDirection: 1, + bypassOnlineThresholds: true, + isKioskMenu: false, + removeProductsBelowOptionThresholds: false, + }, + page: pageNum, + perPage, + }; + } +} + +// ============================================================ +// PRODUCT FETCHING WITH RETRY & PAGINATION +// ============================================================ + +/** + * Fetch all products for a dispensary via paginated GraphQL + * Supports retry logic and proper termination + * + * @param session - Existing browser session to reuse + */ +async function fetchProductsWithSession( + session: BrowserSession, + platformDispensaryId: string, + pricingType: 'rec' | 'med', + crawlMode: CrawlMode +): Promise<{ products: DutchieRawProduct[]; totalCount: number; crawlMode: CrawlMode }> { + const perPage = dutchieConfig.perPage; + const maxPages = dutchieConfig.maxPages; + const maxRetries = dutchieConfig.maxRetries; + const pageDelayMs = dutchieConfig.pageDelayMs; + + const allProducts: DutchieRawProduct[] = []; + let pageNum = 0; + let totalCount = 0; + let consecutiveEmptyPages = 0; + + console.log(`[GraphQL Client] Fetching products for ${platformDispensaryId} (${pricingType}, ${crawlMode})...`); + + while (pageNum < maxPages) { + const variables = buildFilterVariables( + platformDispensaryId, + pricingType, + crawlMode, + pageNum, + perPage + ); + + let result: any = null; + let lastError: Error | null = null; + + // Retry logic for failed page fetches + for (let attempt = 0; attempt <= maxRetries; attempt++) { + try { + result = await executeGraphQL( + session.page, + 'FilteredProducts', + variables, + GRAPHQL_HASHES.FilteredProducts + ); + lastError = null; + break; // Success, exit retry loop + } catch (error: any) { + lastError = error; + console.warn(`[GraphQL Client] Page ${pageNum} attempt ${attempt + 1} failed: ${error.message}`); + if (attempt < maxRetries) { + await new Promise((r) => setTimeout(r, 1000 * (attempt + 1))); // Exponential backoff + } + } + } + + // If all retries failed, log error and break + if (lastError) { + console.error(`[GraphQL Client] Page ${pageNum} failed after ${maxRetries + 1} attempts: ${lastError.message}`); + break; + } + + if (result.errors) { + console.error('[GraphQL Client] GraphQL errors:', result.errors); + break; + } + + const products = result?.data?.filteredProducts?.products || []; + const queryInfo = result?.data?.filteredProducts?.queryInfo; + + if (queryInfo?.totalCount) { + totalCount = queryInfo.totalCount; + } + + console.log( + `[GraphQL Client] Page ${pageNum}: ${products.length} products (total so far: ${allProducts.length + products.length}/${totalCount})` + ); + + // PROPER TERMINATION: Stop if products.length < perPage (incomplete page = last page) + if (products.length === 0) { + consecutiveEmptyPages++; + if (consecutiveEmptyPages >= 2) { + console.log('[GraphQL Client] Multiple empty pages, stopping pagination'); + break; + } + } else { + consecutiveEmptyPages = 0; + allProducts.push(...products); + } + + // Stop if we got less than a full page (this is the last page) + if (products.length < perPage) { + console.log(`[GraphQL Client] Incomplete page (${products.length} < ${perPage}), stopping pagination`); + break; + } + + pageNum++; + + // Small delay between pages + await new Promise((r) => setTimeout(r, pageDelayMs)); + } + + console.log(`[GraphQL Client] Fetched ${allProducts.length} total products (${crawlMode})`); + return { products: allProducts, totalCount: totalCount || allProducts.length, crawlMode }; +} + +/** + * Fetch all products for a dispensary (legacy interface - creates new browser) + */ +export async function fetchAllProducts( + platformDispensaryId: string, + pricingType: 'rec' | 'med' = 'rec', + options: { + perPage?: number; + maxPages?: number; + menuUrl?: string; + crawlMode?: CrawlMode; + } = {} +): Promise<{ products: DutchieRawProduct[]; totalCount: number; crawlMode: CrawlMode }> { + const { crawlMode = 'mode_a' } = options; + const menuUrl = options.menuUrl || `https://dutchie.com/dispensaries`; + + const session = await createBrowserSession(menuUrl); + + try { + return await fetchProductsWithSession(session, platformDispensaryId, pricingType, crawlMode); + } finally { + await closeBrowserSession(session); + } +} + +// ============================================================ +// MODE A+B MERGING WITH OPTIONS +// ============================================================ + +/** + * Merge POSMetaData.children arrays from Mode A and Mode B products + * Uses canonicalID/canonicalSKU/canonicalPackageId as merge key + * Mode B children may have different quantityAvailable for options not in Mode A + */ +function mergeProductOptions( + modeAProduct: DutchieRawProduct, + modeBProduct: DutchieRawProduct +): DutchiePOSChild[] { + const modeAChildren = modeAProduct.POSMetaData?.children || []; + const modeBChildren = modeBProduct.POSMetaData?.children || []; + + // Create a map keyed by option identifier + const getOptionKey = (child: DutchiePOSChild): string => { + return child.canonicalID || child.canonicalSKU || child.canonicalPackageId || child.option || ''; + }; + + const mergedMap = new Map(); + + // Add all Mode A children first (they're "canonical") + for (const child of modeAChildren) { + const key = getOptionKey(child); + if (key) { + mergedMap.set(key, child); + } + } + + // Add Mode B children that aren't in Mode A (may include OOS options) + for (const child of modeBChildren) { + const key = getOptionKey(child); + if (key && !mergedMap.has(key)) { + mergedMap.set(key, child); + } + } + + return Array.from(mergedMap.values()); +} + +/** + * Merge a Mode A product with a Mode B product + * Mode A data is preferred, but children are merged for max coverage + */ +function mergeProducts( + modeAProduct: DutchieRawProduct, + modeBProduct: DutchieRawProduct | undefined +): DutchieRawProduct { + if (!modeBProduct) { + return modeAProduct; + } + + // Merge children arrays + const mergedChildren = mergeProductOptions(modeAProduct, modeBProduct); + + // Return Mode A product with merged children + return { + ...modeAProduct, + POSMetaData: { + ...modeAProduct.POSMetaData, + children: mergedChildren, + }, + }; +} + +/** + * Fetch products using BOTH crawl modes with SINGLE browser session + * This ensures maximum coverage by running Mode A then Mode B with the same session + */ +export async function fetchAllProductsBothModes( + platformDispensaryId: string, + pricingType: 'rec' | 'med' = 'rec', + options: { + perPage?: number; + maxPages?: number; + menuUrl?: string; + } = {} +): Promise<{ + modeA: { products: DutchieRawProduct[]; totalCount: number }; + modeB: { products: DutchieRawProduct[]; totalCount: number }; + merged: { products: DutchieRawProduct[]; totalCount: number }; +}> { + console.log(`[GraphQL Client] Running two-mode crawl for ${platformDispensaryId} (${pricingType})...`); + + const menuUrl = options.menuUrl || `https://dutchie.com/dispensaries`; + + // Create a SINGLE browser session for both modes + const session = await createBrowserSession(menuUrl); + + try { + // Run Mode A (UI parity) with shared session + const modeAResult = await fetchProductsWithSession( + session, + platformDispensaryId, + pricingType, + 'mode_a' + ); + + // Small delay between modes + await new Promise((r) => setTimeout(r, dutchieConfig.modeDelayMs)); + + // Run Mode B (MAX COVERAGE) with same session - NO new browser! + const modeBResult = await fetchProductsWithSession( + session, + platformDispensaryId, + pricingType, + 'mode_b' + ); + + // Build lookup map for Mode B products + const modeBMap = new Map(); + for (const product of modeBResult.products) { + modeBMap.set(product._id, product); + } + + // Merge results - deduplicate by _id, merge options + const productMap = new Map(); + + // Add Mode A products first (canonical), merging with Mode B if exists + for (const product of modeAResult.products) { + const modeBProduct = modeBMap.get(product._id); + const mergedProduct = mergeProducts(product, modeBProduct); + productMap.set(product._id, mergedProduct); + } + + // Add Mode B products that aren't in Mode A (may include OOS items) + for (const product of modeBResult.products) { + if (!productMap.has(product._id)) { + productMap.set(product._id, product); + } + } + + const merged = Array.from(productMap.values()); + + console.log(`[GraphQL Client] Two-mode crawl complete:`); + console.log(` Mode A: ${modeAResult.products.length} products`); + console.log(` Mode B: ${modeBResult.products.length} products`); + console.log(` Merged: ${merged.length} unique products`); + + return { + modeA: { products: modeAResult.products, totalCount: modeAResult.totalCount }, + modeB: { products: modeBResult.products, totalCount: modeBResult.totalCount }, + merged: { products: merged, totalCount: merged.length }, + }; + } finally { + // Close the shared session when done + await closeBrowserSession(session); + } +} + +// ============================================================ +// DISPENSARY DISCOVERY +// ============================================================ + +/** + * Discover dispensaries near a geographic point + */ +export async function discoverDispensaries( + lat: number, + lng: number, + radiusKm: number = 100 +): Promise { + // Skip discovery if disabled in config + if (!dutchieConfig.useDiscovery) { + console.log('[GraphQL Client] Discovery disabled in config, skipping'); + return []; + } + + const session = await createBrowserSession(); + + try { + console.log(`[GraphQL Client] Discovering dispensaries near ${lat}, ${lng}...`); + + // Try to use ConsumerDispensaries or similar discovery query + // Note: The exact operation may need to be captured from live traffic + const result = await executeGraphQL( + session.page, + 'ConsumerDispensaries', + { + filter: { + lat, + lng, + radius: radiusKm * 1000, // Convert to meters if needed + isDelivery: false, + }, + }, + GRAPHQL_HASHES.ConsumerDispensaries + ); + + const dispensaries = result?.data?.consumerDispensaries || []; + console.log(`[GraphQL Client] Found ${dispensaries.length} dispensaries`); + return dispensaries; + } catch (error: any) { + console.error(`[GraphQL Client] Discovery failed:`, error.message); + return []; + } finally { + await closeBrowserSession(session); + } +} + +/** + * Discover all Arizona Dutchie dispensaries using multiple centerpoints + */ +export async function discoverArizonaDispensaries(): Promise { + const allDispensaries = new Map(); + + for (const center of ARIZONA_CENTERPOINTS) { + console.log(`[GraphQL Client] Scanning ${center.name}...`); + + try { + const dispensaries = await discoverDispensaries(center.lat, center.lng, 150); + + for (const disp of dispensaries) { + // Filter to AZ only + const state = disp.state || disp.address?.state; + if (state === 'AZ' || state === 'Arizona') { + const key = disp.slug || disp.cName || disp.id; + if (key && !allDispensaries.has(key)) { + allDispensaries.set(key, disp); + } + } + } + + // Delay between scans + await new Promise((r) => setTimeout(r, 2000)); + } catch (error: any) { + console.error(`[GraphQL Client] Failed to scan ${center.name}:`, error.message); + } + } + + const result = Array.from(allDispensaries.values()); + console.log(`[GraphQL Client] Total unique AZ dispensaries: ${result.length}`); + return result; +} diff --git a/backend/src/dutchie-az/services/product-crawler.ts b/backend/src/dutchie-az/services/product-crawler.ts new file mode 100644 index 00000000..d2cca9ef --- /dev/null +++ b/backend/src/dutchie-az/services/product-crawler.ts @@ -0,0 +1,759 @@ +/** + * Dutchie AZ Product Crawler Service + * + * Crawls products from Dutchie dispensaries and stores them in the dutchie_az database. + * Handles normalization from GraphQL response to database entities. + */ + +import { query, getClient } from '../db/connection'; +import { fetchAllProducts, fetchAllProductsBothModes } from './graphql-client'; +import { + DutchieRawProduct, + DutchieProduct, + DutchieProductSnapshot, + DutchieProductOptionSnapshot, + DutchiePOSChild, + Dispensary, + CrawlMode, + StockStatus, + deriveStockStatus, + calculateTotalQuantity, +} from '../types'; + +// ============================================================ +// NORMALIZATION FUNCTIONS +// ============================================================ + +/** + * Convert price to cents + */ +function toCents(price?: number): number | undefined { + if (price === undefined || price === null) return undefined; + return Math.round(price * 100); +} + +/** + * Get min value from array of numbers + */ +function getMin(arr?: number[]): number | undefined { + if (!arr || arr.length === 0) return undefined; + return Math.min(...arr.filter((n) => n !== null && n !== undefined)); +} + +/** + * Get max value from array of numbers + */ +function getMax(arr?: number[]): number | undefined { + if (!arr || arr.length === 0) return undefined; + return Math.max(...arr.filter((n) => n !== null && n !== undefined)); +} + +/** + * Normalize a POSMetaData.children entry to DutchieProductOptionSnapshot + */ +function normalizeOption(child: DutchiePOSChild): DutchieProductOptionSnapshot { + return { + optionId: child.canonicalID || child.canonicalPackageId || child.canonicalSKU || child.option || 'unknown', + canonicalId: child.canonicalID, + canonicalPackageId: child.canonicalPackageId, + canonicalSKU: child.canonicalSKU, + canonicalName: child.canonicalName, + canonicalCategory: child.canonicalCategory, + canonicalCategoryId: child.canonicalCategoryId, + canonicalBrandId: child.canonicalBrandId, + canonicalBrandName: child.canonicalBrandName, + canonicalStrainId: child.canonicalStrainId, + canonicalVendorId: child.canonicalVendorId, + optionLabel: child.option, + packageQuantity: child.packageQuantity, + recEquivalent: child.recEquivalent, + standardEquivalent: child.standardEquivalent, + priceCents: toCents(child.price), + recPriceCents: toCents(child.recPrice), + medPriceCents: toCents(child.medPrice), + quantity: child.quantity, + quantityAvailable: child.quantityAvailable, + kioskQuantityAvailable: child.kioskQuantityAvailable, + activeBatchTags: child.activeBatchTags, + canonicalImgUrl: child.canonicalImgUrl, + canonicalLabResultUrl: child.canonicalLabResultUrl, + canonicalEffectivePotencyMg: child.canonicalEffectivePotencyMg, + rawChildPayload: child, + }; +} + +/** + * Normalize a raw Dutchie product to DutchieProduct (canonical identity) + */ +export function normalizeProduct( + raw: DutchieRawProduct, + dispensaryId: number, + platformDispensaryId: string +): Partial { + return { + dispensaryId, + platform: 'dutchie', + externalProductId: raw._id || raw.id || '', + platformDispensaryId, + cName: raw.cName, + name: raw.Name, + + // Brand + brandName: raw.brandName || raw.brand?.name, + brandId: raw.brandId || raw.brand?.id, + brandLogoUrl: raw.brandLogo || raw.brand?.imageUrl, + + // Classification + type: raw.type, + subcategory: raw.subcategory, + strainType: raw.strainType, + provider: raw.provider, + + // Potency + thc: raw.THC, + thcContent: raw.THCContent?.range?.[0], + cbd: raw.CBD, + cbdContent: raw.CBDContent?.range?.[0], + cannabinoidsV2: raw.cannabinoidsV2, + effects: raw.effects, + + // Status / flags + status: raw.Status, + medicalOnly: raw.medicalOnly || false, + recOnly: raw.recOnly || false, + featured: raw.featured || false, + comingSoon: raw.comingSoon || false, + certificateOfAnalysisEnabled: raw.certificateOfAnalysisEnabled || false, + + isBelowThreshold: raw.isBelowThreshold || false, + isBelowKioskThreshold: raw.isBelowKioskThreshold || false, + optionsBelowThreshold: raw.optionsBelowThreshold || false, + optionsBelowKioskThreshold: raw.optionsBelowKioskThreshold || false, + + // Derived stock status + stockStatus: deriveStockStatus(raw), + totalQuantityAvailable: calculateTotalQuantity(raw), + + // Images + primaryImageUrl: raw.Image || raw.images?.[0]?.url, + images: raw.images, + + // Misc + measurements: raw.measurements, + weight: typeof raw.weight === 'number' ? String(raw.weight) : raw.weight, + pastCNames: raw.pastCNames, + + createdAtDutchie: raw.createdAt ? new Date(raw.createdAt) : undefined, + updatedAtDutchie: raw.updatedAt ? new Date(raw.updatedAt) : undefined, + + latestRawPayload: raw, + }; +} + +/** + * Normalize a raw Dutchie product to DutchieProductSnapshot (time-series data) + */ +export function normalizeSnapshot( + raw: DutchieRawProduct, + dutchieProductId: number, + dispensaryId: number, + platformDispensaryId: string, + pricingType: 'rec' | 'med' | 'unknown', + crawlMode: CrawlMode = 'mode_a' +): Partial { + const children = raw.POSMetaData?.children || []; + const options = children.map(normalizeOption); + + // Aggregate prices from various sources + const recPrices = raw.recPrices || []; + const medPrices = raw.medicalPrices || []; + const recSpecialPrices = raw.recSpecialPrices || []; + const medSpecialPrices = raw.medicalSpecialPrices || []; + const wholesalePrices = raw.wholesalePrices || []; + + // Also consider child prices + const childRecPrices = children.map((c) => c.recPrice).filter((p) => p !== undefined) as number[]; + const childMedPrices = children.map((c) => c.medPrice).filter((p) => p !== undefined) as number[]; + const childPrices = children.map((c) => c.price).filter((p) => p !== undefined) as number[]; + + // Aggregate inventory - use calculateTotalQuantity for proper null handling + const totalQty = calculateTotalQuantity(raw); + const hasAnyKioskQty = children.some(c => typeof c.kioskQuantityAvailable === 'number'); + const totalKioskQty = hasAnyKioskQty + ? children.reduce((sum, c) => sum + (c.kioskQuantityAvailable || 0), 0) + : null; + + // Determine if on special + const isOnSpecial = + raw.special === true || + (raw.specialData?.saleSpecials && raw.specialData.saleSpecials.length > 0) || + (recSpecialPrices.length > 0 && recSpecialPrices[0] !== null) || + (medSpecialPrices.length > 0 && medSpecialPrices[0] !== null); + + return { + dutchieProductId, + dispensaryId, + platformDispensaryId, + externalProductId: raw._id || raw.id || '', + pricingType, + crawlMode, + + status: raw.Status, + featured: raw.featured || false, + special: isOnSpecial, + medicalOnly: raw.medicalOnly || false, + recOnly: raw.recOnly || false, + + // Product was present in feed + isPresentInFeed: true, + + // Derived stock status + stockStatus: deriveStockStatus(raw), + + // Price summary + recMinPriceCents: toCents(getMin([...recPrices, ...childRecPrices, ...childPrices])), + recMaxPriceCents: toCents(getMax([...recPrices, ...childRecPrices, ...childPrices])), + recMinSpecialPriceCents: toCents(getMin(recSpecialPrices)), + medMinPriceCents: toCents(getMin([...medPrices, ...childMedPrices])), + medMaxPriceCents: toCents(getMax([...medPrices, ...childMedPrices])), + medMinSpecialPriceCents: toCents(getMin(medSpecialPrices)), + wholesaleMinPriceCents: toCents(getMin(wholesalePrices)), + + // Inventory summary - null = unknown, 0 = all OOS + totalQuantityAvailable: totalQty, + totalKioskQuantityAvailable: totalKioskQty, + manualInventory: raw.manualInventory || false, + isBelowThreshold: raw.isBelowThreshold || false, + isBelowKioskThreshold: raw.isBelowKioskThreshold || false, + + options, + rawPayload: raw, + crawledAt: new Date(), + }; +} + +// ============================================================ +// DATABASE OPERATIONS +// ============================================================ + +/** + * Upsert a DutchieProduct record + */ +async function upsertProduct(product: Partial): Promise { + const result = await query<{ id: number }>( + ` + INSERT INTO dutchie_products ( + dispensary_id, platform, external_product_id, platform_dispensary_id, + c_name, name, brand_name, brand_id, brand_logo_url, + type, subcategory, strain_type, provider, + thc, thc_content, cbd, cbd_content, cannabinoids_v2, effects, + status, medical_only, rec_only, featured, coming_soon, certificate_of_analysis_enabled, + is_below_threshold, is_below_kiosk_threshold, options_below_threshold, options_below_kiosk_threshold, + stock_status, total_quantity_available, + primary_image_url, images, measurements, weight, past_c_names, + created_at_dutchie, updated_at_dutchie, latest_raw_payload, updated_at + ) VALUES ( + $1, $2, $3, $4, + $5, $6, $7, $8, $9, + $10, $11, $12, $13, + $14, $15, $16, $17, $18, $19, + $20, $21, $22, $23, $24, $25, + $26, $27, $28, $29, + $30, $31, + $32, $33, $34, $35, $36, + $37, $38, $39, NOW() + ) + ON CONFLICT (dispensary_id, external_product_id) DO UPDATE SET + c_name = EXCLUDED.c_name, + name = EXCLUDED.name, + brand_name = EXCLUDED.brand_name, + brand_id = EXCLUDED.brand_id, + brand_logo_url = EXCLUDED.brand_logo_url, + type = EXCLUDED.type, + subcategory = EXCLUDED.subcategory, + strain_type = EXCLUDED.strain_type, + provider = EXCLUDED.provider, + thc = EXCLUDED.thc, + thc_content = EXCLUDED.thc_content, + cbd = EXCLUDED.cbd, + cbd_content = EXCLUDED.cbd_content, + cannabinoids_v2 = EXCLUDED.cannabinoids_v2, + effects = EXCLUDED.effects, + status = EXCLUDED.status, + medical_only = EXCLUDED.medical_only, + rec_only = EXCLUDED.rec_only, + featured = EXCLUDED.featured, + coming_soon = EXCLUDED.coming_soon, + certificate_of_analysis_enabled = EXCLUDED.certificate_of_analysis_enabled, + is_below_threshold = EXCLUDED.is_below_threshold, + is_below_kiosk_threshold = EXCLUDED.is_below_kiosk_threshold, + options_below_threshold = EXCLUDED.options_below_threshold, + options_below_kiosk_threshold = EXCLUDED.options_below_kiosk_threshold, + stock_status = EXCLUDED.stock_status, + total_quantity_available = EXCLUDED.total_quantity_available, + primary_image_url = EXCLUDED.primary_image_url, + images = EXCLUDED.images, + measurements = EXCLUDED.measurements, + weight = EXCLUDED.weight, + past_c_names = EXCLUDED.past_c_names, + created_at_dutchie = EXCLUDED.created_at_dutchie, + updated_at_dutchie = EXCLUDED.updated_at_dutchie, + latest_raw_payload = EXCLUDED.latest_raw_payload, + updated_at = NOW() + RETURNING id + `, + [ + product.dispensaryId, + product.platform, + product.externalProductId, + product.platformDispensaryId, + product.cName, + product.name, + product.brandName, + product.brandId, + product.brandLogoUrl, + product.type, + product.subcategory, + product.strainType, + product.provider, + product.thc, + product.thcContent, + product.cbd, + product.cbdContent, + product.cannabinoidsV2 ? JSON.stringify(product.cannabinoidsV2) : null, + product.effects ? JSON.stringify(product.effects) : null, + product.status, + product.medicalOnly, + product.recOnly, + product.featured, + product.comingSoon, + product.certificateOfAnalysisEnabled, + product.isBelowThreshold, + product.isBelowKioskThreshold, + product.optionsBelowThreshold, + product.optionsBelowKioskThreshold, + product.stockStatus, + product.totalQuantityAvailable, + product.primaryImageUrl, + product.images ? JSON.stringify(product.images) : null, + product.measurements ? JSON.stringify(product.measurements) : null, + product.weight, + product.pastCNames, + product.createdAtDutchie, + product.updatedAtDutchie, + product.latestRawPayload ? JSON.stringify(product.latestRawPayload) : null, + ] + ); + + return result.rows[0].id; +} + +/** + * Insert a snapshot record + */ +async function insertSnapshot(snapshot: Partial): Promise { + const result = await query<{ id: number }>( + ` + INSERT INTO dutchie_product_snapshots ( + dutchie_product_id, dispensary_id, platform_dispensary_id, external_product_id, + pricing_type, crawl_mode, status, featured, special, medical_only, rec_only, + is_present_in_feed, stock_status, + rec_min_price_cents, rec_max_price_cents, rec_min_special_price_cents, + med_min_price_cents, med_max_price_cents, med_min_special_price_cents, + wholesale_min_price_cents, + total_quantity_available, total_kiosk_quantity_available, manual_inventory, + is_below_threshold, is_below_kiosk_threshold, + options, raw_payload, crawled_at + ) VALUES ( + $1, $2, $3, $4, + $5, $6, $7, $8, $9, $10, $11, + $12, $13, + $14, $15, $16, + $17, $18, $19, + $20, + $21, $22, $23, + $24, $25, + $26, $27, $28 + ) + RETURNING id + `, + [ + snapshot.dutchieProductId, + snapshot.dispensaryId, + snapshot.platformDispensaryId, + snapshot.externalProductId, + snapshot.pricingType, + snapshot.crawlMode, + snapshot.status, + snapshot.featured, + snapshot.special, + snapshot.medicalOnly, + snapshot.recOnly, + snapshot.isPresentInFeed ?? true, + snapshot.stockStatus, + snapshot.recMinPriceCents, + snapshot.recMaxPriceCents, + snapshot.recMinSpecialPriceCents, + snapshot.medMinPriceCents, + snapshot.medMaxPriceCents, + snapshot.medMinSpecialPriceCents, + snapshot.wholesaleMinPriceCents, + snapshot.totalQuantityAvailable, + snapshot.totalKioskQuantityAvailable, + snapshot.manualInventory, + snapshot.isBelowThreshold, + snapshot.isBelowKioskThreshold, + JSON.stringify(snapshot.options || []), + JSON.stringify(snapshot.rawPayload || {}), + snapshot.crawledAt, + ] + ); + + return result.rows[0].id; +} + +/** + * Update dispensary last_crawled_at and product_count + */ +async function updateDispensaryCrawlStats( + dispensaryId: number, + productCount: number +): Promise { + await query( + ` + UPDATE dispensaries + SET last_crawled_at = NOW(), product_count = $2, updated_at = NOW() + WHERE id = $1 + `, + [dispensaryId, productCount] + ); +} + +/** + * Mark products as missing from feed + * Creates a snapshot with isPresentInFeed=false and stockStatus='missing_from_feed' + * for products that were NOT in the UNION of Mode A and Mode B product lists + * + * IMPORTANT: Uses UNION of both modes to avoid false positives + * If the union is empty (possible outage), we skip marking to avoid data corruption + */ +async function markMissingProducts( + dispensaryId: number, + platformDispensaryId: string, + modeAProductIds: Set, + modeBProductIds: Set, + pricingType: 'rec' | 'med' +): Promise { + // Build UNION of Mode A + Mode B product IDs + const unionProductIds = new Set([...modeAProductIds, ...modeBProductIds]); + + // OUTAGE DETECTION: If union is empty, something went wrong - don't mark anything as missing + if (unionProductIds.size === 0) { + console.warn('[ProductCrawler] OUTAGE DETECTED: Both Mode A and Mode B returned 0 products. Skipping missing product marking.'); + return 0; + } + + // Get all existing products for this dispensary that were not in the UNION + const { rows: missingProducts } = await query<{ + id: number; + external_product_id: string; + name: string; + }>( + ` + SELECT id, external_product_id, name + FROM dutchie_products + WHERE dispensary_id = $1 + AND external_product_id NOT IN (SELECT unnest($2::text[])) + `, + [dispensaryId, Array.from(unionProductIds)] + ); + + if (missingProducts.length === 0) { + return 0; + } + + console.log(`[ProductCrawler] Marking ${missingProducts.length} products as missing from feed (union of ${modeAProductIds.size} Mode A + ${modeBProductIds.size} Mode B = ${unionProductIds.size} unique)...`); + + const crawledAt = new Date(); + let marked = 0; + + for (const product of missingProducts) { + try { + // Insert a "missing from feed" snapshot + await insertSnapshot({ + dutchieProductId: product.id, + dispensaryId, + platformDispensaryId, + externalProductId: product.external_product_id, + pricingType, + crawlMode: 'mode_a', // Use mode_a for missing snapshots (convention) + status: undefined, + featured: false, + special: false, + medicalOnly: false, + recOnly: false, + isPresentInFeed: false, + stockStatus: 'missing_from_feed', + totalQuantityAvailable: undefined, // null = unknown, not 0 + manualInventory: false, + isBelowThreshold: false, + isBelowKioskThreshold: false, + options: [], + rawPayload: { _missingFromFeed: true, lastKnownName: product.name }, + crawledAt, + }); + + // Update the product's stock status + await query( + ` + UPDATE dutchie_products + SET stock_status = 'missing_from_feed', total_quantity_available = NULL, updated_at = NOW() + WHERE id = $1 + `, + [product.id] + ); + + marked++; + } catch (error: any) { + console.error(`[ProductCrawler] Error marking product ${product.external_product_id} as missing:`, error.message); + } + } + + console.log(`[ProductCrawler] Marked ${marked} products as missing from feed`); + return marked; +} + +// ============================================================ +// CRAWL ORCHESTRATION +// ============================================================ + +export interface CrawlResult { + success: boolean; + dispensaryId: number; + productsFound: number; + productsUpserted: number; + snapshotsCreated: number; + modeAProducts?: number; + modeBProducts?: number; + missingProductsMarked?: number; + errorMessage?: string; + durationMs: number; +} + +/** + * Process a batch of products from a single crawl mode + * IMPORTANT: Stores ALL products, never filters before DB + * Returns the set of external product IDs that were processed + */ +async function processProducts( + products: DutchieRawProduct[], + dispensary: Dispensary, + pricingType: 'rec' | 'med', + crawlMode: CrawlMode +): Promise<{ upserted: number; snapshots: number; productIds: Set }> { + let upserted = 0; + let snapshots = 0; + const productIds = new Set(); + + for (const raw of products) { + try { + const externalId = raw._id || raw.id || ''; + productIds.add(externalId); + + // Upsert the canonical product - NEVER filter, store everything + const normalizedProduct = normalizeProduct( + raw, + dispensary.id, + dispensary.platformDispensaryId! + ); + const productId = await upsertProduct(normalizedProduct); + upserted++; + + // Create snapshot with crawl mode + const snapshot = normalizeSnapshot( + raw, + productId, + dispensary.id, + dispensary.platformDispensaryId!, + pricingType, + crawlMode + ); + await insertSnapshot(snapshot); + snapshots++; + } catch (error: any) { + console.error(`[ProductCrawler] Error processing product ${raw._id}:`, error.message); + } + } + + return { upserted, snapshots, productIds }; +} + +/** + * Crawl all products for a single dispensary using BOTH modes + * Mode A: UI parity (Status: Active) + * Mode B: MAX COVERAGE (no Status filter, bypass thresholds) + * + * This ensures we capture ALL products including out-of-stock items + */ +export async function crawlDispensaryProducts( + dispensary: Dispensary, + pricingType: 'rec' | 'med' = 'rec', + options: { useBothModes?: boolean } = {} +): Promise { + const { useBothModes = true } = options; + const startTime = Date.now(); + + if (!dispensary.platformDispensaryId) { + return { + success: false, + dispensaryId: dispensary.id, + productsFound: 0, + productsUpserted: 0, + snapshotsCreated: 0, + errorMessage: 'Missing platformDispensaryId', + durationMs: Date.now() - startTime, + }; + } + + try { + console.log(`[ProductCrawler] Crawling ${dispensary.name} (${dispensary.platformDispensaryId})...`); + + let totalUpserted = 0; + let totalSnapshots = 0; + let modeAProducts = 0; + let modeBProducts = 0; + let missingMarked = 0; + + // Track product IDs separately for each mode (needed for missing product detection) + const modeAProductIds = new Set(); + const modeBProductIds = new Set(); + + if (useBothModes) { + // Run two-mode crawl for maximum coverage + const bothResults = await fetchAllProductsBothModes( + dispensary.platformDispensaryId, + pricingType + ); + + modeAProducts = bothResults.modeA.products.length; + modeBProducts = bothResults.modeB.products.length; + + console.log(`[ProductCrawler] Two-mode crawl: Mode A=${modeAProducts}, Mode B=${modeBProducts}, Merged=${bothResults.merged.products.length}`); + + // Collect Mode A product IDs + for (const p of bothResults.modeA.products) { + modeAProductIds.add(p._id); + } + + // Collect Mode B product IDs + for (const p of bothResults.modeB.products) { + modeBProductIds.add(p._id); + } + + // Process MERGED products (includes options from both modes) + if (bothResults.merged.products.length > 0) { + const mergedResult = await processProducts( + bothResults.merged.products, + dispensary, + pricingType, + 'mode_a' // Use mode_a for merged products (convention) + ); + totalUpserted = mergedResult.upserted; + totalSnapshots = mergedResult.snapshots; + } + } else { + // Single mode crawl (Mode A only) + const { products, crawlMode } = await fetchAllProducts( + dispensary.platformDispensaryId, + pricingType, + { crawlMode: 'mode_a' } + ); + + modeAProducts = products.length; + + // Collect Mode A product IDs + for (const p of products) { + modeAProductIds.add(p._id); + } + + const result = await processProducts(products, dispensary, pricingType, crawlMode); + totalUpserted = result.upserted; + totalSnapshots = result.snapshots; + } + + // Mark products as missing using UNION of Mode A + Mode B + // The function handles outage detection (empty union = skip marking) + missingMarked = await markMissingProducts( + dispensary.id, + dispensary.platformDispensaryId, + modeAProductIds, + modeBProductIds, + pricingType + ); + totalSnapshots += missingMarked; + + // Update dispensary stats + await updateDispensaryCrawlStats(dispensary.id, totalUpserted); + + console.log(`[ProductCrawler] Completed: ${totalUpserted} products, ${totalSnapshots} snapshots, ${missingMarked} marked missing`); + + return { + success: true, + dispensaryId: dispensary.id, + productsFound: modeAProducts + modeBProducts, + productsUpserted: totalUpserted, + snapshotsCreated: totalSnapshots, + modeAProducts, + modeBProducts, + missingProductsMarked: missingMarked, + durationMs: Date.now() - startTime, + }; + } catch (error: any) { + console.error(`[ProductCrawler] Failed to crawl ${dispensary.name}:`, error.message); + return { + success: false, + dispensaryId: dispensary.id, + productsFound: 0, + productsUpserted: 0, + snapshotsCreated: 0, + errorMessage: error.message, + durationMs: Date.now() - startTime, + }; + } +} + +/** + * Crawl all Arizona dispensaries + */ +export async function crawlAllArizonaDispensaries( + pricingType: 'rec' | 'med' = 'rec' +): Promise { + const results: CrawlResult[] = []; + + // Get all AZ dispensaries with platform IDs + const { rows: dispensaries } = await query( + ` + SELECT * FROM dispensaries + WHERE state = 'AZ' AND platform = 'dutchie' AND platform_dispensary_id IS NOT NULL + ORDER BY id + ` + ); + + console.log(`[ProductCrawler] Starting crawl of ${dispensaries.length} dispensaries...`); + + for (const dispensary of dispensaries) { + const result = await crawlDispensaryProducts(dispensary, pricingType); + results.push(result); + + // Delay between dispensaries + await new Promise((r) => setTimeout(r, 2000)); + } + + const successful = results.filter((r) => r.success).length; + const totalProducts = results.reduce((sum, r) => sum + r.productsUpserted, 0); + const totalSnapshots = results.reduce((sum, r) => sum + r.snapshotsCreated, 0); + + console.log(`[ProductCrawler] Completed: ${successful}/${dispensaries.length} stores, ${totalProducts} products, ${totalSnapshots} snapshots`); + + return results; +} diff --git a/backend/src/dutchie-az/services/scheduler.ts b/backend/src/dutchie-az/services/scheduler.ts new file mode 100644 index 00000000..3bb7f8bb --- /dev/null +++ b/backend/src/dutchie-az/services/scheduler.ts @@ -0,0 +1,763 @@ +/** + * Dutchie AZ Scheduler Service + * + * Handles scheduled crawling with JITTER - no fixed intervals! + * Each job re-schedules itself with a NEW random offset after each run. + * This makes timing "wander" around the clock, avoiding detectable patterns. + * + * Jitter Logic: + * nextRunAt = lastRunAt + baseIntervalMinutes + random(-jitterMinutes, +jitterMinutes) + * + * Example: 4-hour base with ±30min jitter = runs anywhere from 3h30m to 4h30m apart + */ + +import { query, getClient } from '../db/connection'; +import { crawlDispensaryProducts, CrawlResult } from './product-crawler'; +import { JobSchedule, JobStatus, Dispensary } from '../types'; + +// Scheduler poll interval (how often we check for due jobs) +const SCHEDULER_POLL_INTERVAL_MS = 60 * 1000; // 1 minute + +// Track running state +let isSchedulerRunning = false; +let schedulerInterval: NodeJS.Timeout | null = null; + +// ============================================================ +// JITTER CALCULATION +// ============================================================ + +/** + * Generate a random jitter value in minutes + * Returns a value between -jitterMinutes and +jitterMinutes + */ +function getRandomJitterMinutes(jitterMinutes: number): number { + // random() returns [0, 1), we want [-jitter, +jitter] + return (Math.random() * 2 - 1) * jitterMinutes; +} + +/** + * Calculate next run time with jitter + * nextRunAt = baseTime + baseIntervalMinutes + random(-jitter, +jitter) + */ +function calculateNextRunAt( + baseTime: Date, + baseIntervalMinutes: number, + jitterMinutes: number +): Date { + const jitter = getRandomJitterMinutes(jitterMinutes); + const totalMinutes = baseIntervalMinutes + jitter; + const totalMs = totalMinutes * 60 * 1000; + return new Date(baseTime.getTime() + totalMs); +} + +// ============================================================ +// DATABASE OPERATIONS +// ============================================================ + +/** + * Get all job schedules + */ +export async function getAllSchedules(): Promise { + const { rows } = await query(` + SELECT + id, job_name, description, enabled, + base_interval_minutes, jitter_minutes, + last_run_at, last_status, last_error_message, last_duration_ms, + next_run_at, job_config, created_at, updated_at + FROM job_schedules + ORDER BY job_name + `); + + return rows.map(row => ({ + id: row.id, + jobName: row.job_name, + description: row.description, + enabled: row.enabled, + baseIntervalMinutes: row.base_interval_minutes, + jitterMinutes: row.jitter_minutes, + lastRunAt: row.last_run_at, + lastStatus: row.last_status, + lastErrorMessage: row.last_error_message, + lastDurationMs: row.last_duration_ms, + nextRunAt: row.next_run_at, + jobConfig: row.job_config, + createdAt: row.created_at, + updatedAt: row.updated_at, + })); +} + +/** + * Get a single schedule by ID + */ +export async function getScheduleById(id: number): Promise { + const { rows } = await query( + `SELECT * FROM job_schedules WHERE id = $1`, + [id] + ); + + if (rows.length === 0) return null; + + const row = rows[0]; + return { + id: row.id, + jobName: row.job_name, + description: row.description, + enabled: row.enabled, + baseIntervalMinutes: row.base_interval_minutes, + jitterMinutes: row.jitter_minutes, + lastRunAt: row.last_run_at, + lastStatus: row.last_status, + lastErrorMessage: row.last_error_message, + lastDurationMs: row.last_duration_ms, + nextRunAt: row.next_run_at, + jobConfig: row.job_config, + createdAt: row.created_at, + updatedAt: row.updated_at, + }; +} + +/** + * Create a new schedule + */ +export async function createSchedule(schedule: { + jobName: string; + description?: string; + enabled?: boolean; + baseIntervalMinutes: number; + jitterMinutes: number; + jobConfig?: Record; + startImmediately?: boolean; +}): Promise { + // Calculate initial nextRunAt + const nextRunAt = schedule.startImmediately + ? new Date() // Start immediately + : calculateNextRunAt(new Date(), schedule.baseIntervalMinutes, schedule.jitterMinutes); + + const { rows } = await query( + ` + INSERT INTO job_schedules ( + job_name, description, enabled, + base_interval_minutes, jitter_minutes, + next_run_at, job_config + ) VALUES ($1, $2, $3, $4, $5, $6, $7) + RETURNING * + `, + [ + schedule.jobName, + schedule.description || null, + schedule.enabled ?? true, + schedule.baseIntervalMinutes, + schedule.jitterMinutes, + nextRunAt, + schedule.jobConfig ? JSON.stringify(schedule.jobConfig) : null, + ] + ); + + const row = rows[0]; + console.log(`[Scheduler] Created schedule "${schedule.jobName}" - next run at ${nextRunAt.toISOString()}`); + + return { + id: row.id, + jobName: row.job_name, + description: row.description, + enabled: row.enabled, + baseIntervalMinutes: row.base_interval_minutes, + jitterMinutes: row.jitter_minutes, + lastRunAt: row.last_run_at, + lastStatus: row.last_status, + lastErrorMessage: row.last_error_message, + lastDurationMs: row.last_duration_ms, + nextRunAt: row.next_run_at, + jobConfig: row.job_config, + createdAt: row.created_at, + updatedAt: row.updated_at, + }; +} + +/** + * Update a schedule + */ +export async function updateSchedule( + id: number, + updates: { + description?: string; + enabled?: boolean; + baseIntervalMinutes?: number; + jitterMinutes?: number; + jobConfig?: Record; + } +): Promise { + const setClauses: string[] = []; + const params: any[] = []; + let paramIndex = 1; + + if (updates.description !== undefined) { + setClauses.push(`description = $${paramIndex++}`); + params.push(updates.description); + } + if (updates.enabled !== undefined) { + setClauses.push(`enabled = $${paramIndex++}`); + params.push(updates.enabled); + } + if (updates.baseIntervalMinutes !== undefined) { + setClauses.push(`base_interval_minutes = $${paramIndex++}`); + params.push(updates.baseIntervalMinutes); + } + if (updates.jitterMinutes !== undefined) { + setClauses.push(`jitter_minutes = $${paramIndex++}`); + params.push(updates.jitterMinutes); + } + if (updates.jobConfig !== undefined) { + setClauses.push(`job_config = $${paramIndex++}`); + params.push(JSON.stringify(updates.jobConfig)); + } + + if (setClauses.length === 0) { + return getScheduleById(id); + } + + setClauses.push(`updated_at = NOW()`); + params.push(id); + + const { rows } = await query( + `UPDATE job_schedules SET ${setClauses.join(', ')} WHERE id = $${paramIndex} RETURNING *`, + params + ); + + if (rows.length === 0) return null; + + const row = rows[0]; + return { + id: row.id, + jobName: row.job_name, + description: row.description, + enabled: row.enabled, + baseIntervalMinutes: row.base_interval_minutes, + jitterMinutes: row.jitter_minutes, + lastRunAt: row.last_run_at, + lastStatus: row.last_status, + lastErrorMessage: row.last_error_message, + lastDurationMs: row.last_duration_ms, + nextRunAt: row.next_run_at, + jobConfig: row.job_config, + createdAt: row.created_at, + updatedAt: row.updated_at, + }; +} + +/** + * Delete a schedule + */ +export async function deleteSchedule(id: number): Promise { + const result = await query(`DELETE FROM job_schedules WHERE id = $1`, [id]); + return (result.rowCount || 0) > 0; +} + +/** + * Mark a schedule as running + */ +async function markScheduleRunning(id: number): Promise { + await query( + `UPDATE job_schedules SET last_status = 'running', updated_at = NOW() WHERE id = $1`, + [id] + ); +} + +/** + * Update schedule after job completion with NEW jittered next_run_at + */ +async function updateScheduleAfterRun( + id: number, + status: JobStatus, + durationMs: number, + errorMessage?: string +): Promise { + // Get current schedule to calculate new nextRunAt + const schedule = await getScheduleById(id); + if (!schedule) return; + + const now = new Date(); + const newNextRunAt = calculateNextRunAt( + now, + schedule.baseIntervalMinutes, + schedule.jitterMinutes + ); + + console.log(`[Scheduler] Schedule "${schedule.jobName}" completed (${status}). Next run: ${newNextRunAt.toISOString()}`); + + await query( + ` + UPDATE job_schedules SET + last_run_at = $2, + last_status = $3, + last_error_message = $4, + last_duration_ms = $5, + next_run_at = $6, + updated_at = NOW() + WHERE id = $1 + `, + [id, now, status, errorMessage || null, durationMs, newNextRunAt] + ); +} + +/** + * Create a job run log entry + */ +async function createRunLog( + scheduleId: number, + jobName: string, + status: 'pending' | 'running' +): Promise { + const { rows } = await query<{ id: number }>( + ` + INSERT INTO job_run_logs (schedule_id, job_name, status, started_at) + VALUES ($1, $2, $3, NOW()) + RETURNING id + `, + [scheduleId, jobName, status] + ); + return rows[0].id; +} + +/** + * Update a job run log entry + */ +async function updateRunLog( + runLogId: number, + status: 'success' | 'error' | 'partial', + results: { + durationMs: number; + errorMessage?: string; + itemsProcessed?: number; + itemsSucceeded?: number; + itemsFailed?: number; + metadata?: any; + } +): Promise { + await query( + ` + UPDATE job_run_logs SET + status = $2, + completed_at = NOW(), + duration_ms = $3, + error_message = $4, + items_processed = $5, + items_succeeded = $6, + items_failed = $7, + metadata = $8 + WHERE id = $1 + `, + [ + runLogId, + status, + results.durationMs, + results.errorMessage || null, + results.itemsProcessed || 0, + results.itemsSucceeded || 0, + results.itemsFailed || 0, + results.metadata ? JSON.stringify(results.metadata) : null, + ] + ); +} + +/** + * Get job run logs + */ +export async function getRunLogs(options: { + scheduleId?: number; + jobName?: string; + limit?: number; + offset?: number; +}): Promise<{ logs: any[]; total: number }> { + const { scheduleId, jobName, limit = 50, offset = 0 } = options; + + let whereClause = 'WHERE 1=1'; + const params: any[] = []; + let paramIndex = 1; + + if (scheduleId) { + whereClause += ` AND schedule_id = $${paramIndex++}`; + params.push(scheduleId); + } + if (jobName) { + whereClause += ` AND job_name = $${paramIndex++}`; + params.push(jobName); + } + + params.push(limit, offset); + + const { rows } = await query( + ` + SELECT * FROM job_run_logs + ${whereClause} + ORDER BY created_at DESC + LIMIT $${paramIndex} OFFSET $${paramIndex + 1} + `, + params + ); + + const { rows: countRows } = await query( + `SELECT COUNT(*) as total FROM job_run_logs ${whereClause}`, + params.slice(0, -2) + ); + + return { + logs: rows, + total: parseInt(countRows[0]?.total || '0', 10), + }; +} + +// ============================================================ +// JOB EXECUTION +// ============================================================ + +/** + * Execute a job based on its name + */ +async function executeJob(schedule: JobSchedule): Promise<{ + status: JobStatus; + itemsProcessed: number; + itemsSucceeded: number; + itemsFailed: number; + errorMessage?: string; + metadata?: any; +}> { + const config = schedule.jobConfig || {}; + + switch (schedule.jobName) { + case 'dutchie_az_product_crawl': + return executeProductCrawl(config); + case 'dutchie_az_discovery': + return executeDiscovery(config); + default: + throw new Error(`Unknown job type: ${schedule.jobName}`); + } +} + +/** + * Execute the AZ Dutchie product crawl job + */ +async function executeProductCrawl(config: Record): Promise<{ + status: JobStatus; + itemsProcessed: number; + itemsSucceeded: number; + itemsFailed: number; + errorMessage?: string; + metadata?: any; +}> { + const pricingType = config.pricingType || 'rec'; + const useBothModes = config.useBothModes !== false; + + // Get all dispensaries with platform IDs + const { rows: dispensaries } = await query( + ` + SELECT * FROM dispensaries + WHERE state = 'AZ' AND platform = 'dutchie' AND platform_dispensary_id IS NOT NULL + ORDER BY last_crawled_at ASC NULLS FIRST + ` + ); + + if (dispensaries.length === 0) { + return { + status: 'success', + itemsProcessed: 0, + itemsSucceeded: 0, + itemsFailed: 0, + metadata: { message: 'No dispensaries to crawl' }, + }; + } + + console.log(`[Scheduler] Crawling ${dispensaries.length} dispensaries...`); + + let succeeded = 0; + let failed = 0; + let totalProducts = 0; + let totalSnapshots = 0; + const errors: string[] = []; + + for (const dispensary of dispensaries) { + try { + const result = await crawlDispensaryProducts(dispensary, pricingType, { useBothModes }); + + if (result.success) { + succeeded++; + totalProducts += result.productsUpserted; + totalSnapshots += result.snapshotsCreated; + } else { + failed++; + if (result.errorMessage) { + errors.push(`${dispensary.name}: ${result.errorMessage}`); + } + } + + // Delay between dispensaries + await new Promise(r => setTimeout(r, 5000)); + } catch (error: any) { + failed++; + errors.push(`${dispensary.name}: ${error.message}`); + } + } + + const status: JobStatus = failed === 0 ? 'success' : succeeded === 0 ? 'error' : 'partial'; + + return { + status, + itemsProcessed: dispensaries.length, + itemsSucceeded: succeeded, + itemsFailed: failed, + errorMessage: errors.length > 0 ? errors.slice(0, 5).join('; ') : undefined, + metadata: { + totalProducts, + totalSnapshots, + pricingType, + useBothModes, + }, + }; +} + +/** + * Execute the AZ Dutchie discovery job (placeholder) + */ +async function executeDiscovery(_config: Record): Promise<{ + status: JobStatus; + itemsProcessed: number; + itemsSucceeded: number; + itemsFailed: number; + errorMessage?: string; + metadata?: any; +}> { + // Placeholder - implement discovery logic + return { + status: 'success', + itemsProcessed: 0, + itemsSucceeded: 0, + itemsFailed: 0, + metadata: { message: 'Discovery not yet implemented' }, + }; +} + +// ============================================================ +// SCHEDULER RUNNER +// ============================================================ + +/** + * Check for due jobs and run them + */ +async function checkAndRunDueJobs(): Promise { + try { + // Get enabled schedules where nextRunAt <= now + const { rows } = await query( + ` + SELECT * FROM job_schedules + WHERE enabled = true + AND next_run_at IS NOT NULL + AND next_run_at <= NOW() + AND (last_status IS NULL OR last_status != 'running') + ORDER BY next_run_at ASC + ` + ); + + if (rows.length === 0) return; + + console.log(`[Scheduler] Found ${rows.length} due job(s)`); + + for (const row of rows) { + const schedule: JobSchedule = { + id: row.id, + jobName: row.job_name, + description: row.description, + enabled: row.enabled, + baseIntervalMinutes: row.base_interval_minutes, + jitterMinutes: row.jitter_minutes, + lastRunAt: row.last_run_at, + lastStatus: row.last_status, + lastErrorMessage: row.last_error_message, + lastDurationMs: row.last_duration_ms, + nextRunAt: row.next_run_at, + jobConfig: row.job_config, + createdAt: row.created_at, + updatedAt: row.updated_at, + }; + + await runScheduledJob(schedule); + } + } catch (error) { + console.error('[Scheduler] Error checking for due jobs:', error); + } +} + +/** + * Run a single scheduled job + */ +async function runScheduledJob(schedule: JobSchedule): Promise { + const startTime = Date.now(); + + console.log(`[Scheduler] Starting job "${schedule.jobName}"...`); + + // Mark as running + await markScheduleRunning(schedule.id); + + // Create run log entry + const runLogId = await createRunLog(schedule.id, schedule.jobName, 'running'); + + try { + // Execute the job + const result = await executeJob(schedule); + + const durationMs = Date.now() - startTime; + + // Determine final status (exclude 'running' and null) + const finalStatus: 'success' | 'error' | 'partial' = + result.status === 'running' || result.status === null + ? 'success' + : result.status; + + // Update run log + await updateRunLog(runLogId, finalStatus, { + durationMs, + errorMessage: result.errorMessage, + itemsProcessed: result.itemsProcessed, + itemsSucceeded: result.itemsSucceeded, + itemsFailed: result.itemsFailed, + metadata: result.metadata, + }); + + // Update schedule with NEW jittered next_run_at + await updateScheduleAfterRun( + schedule.id, + result.status, + durationMs, + result.errorMessage + ); + + console.log(`[Scheduler] Job "${schedule.jobName}" completed in ${Math.round(durationMs / 1000)}s (${result.status})`); + + } catch (error: any) { + const durationMs = Date.now() - startTime; + + console.error(`[Scheduler] Job "${schedule.jobName}" failed:`, error.message); + + // Update run log with error + await updateRunLog(runLogId, 'error', { + durationMs, + errorMessage: error.message, + itemsProcessed: 0, + itemsSucceeded: 0, + itemsFailed: 0, + }); + + // Update schedule with NEW jittered next_run_at + await updateScheduleAfterRun(schedule.id, 'error', durationMs, error.message); + } +} + +// ============================================================ +// PUBLIC API +// ============================================================ + +/** + * Start the scheduler + */ +export function startScheduler(): void { + if (isSchedulerRunning) { + console.log('[Scheduler] Scheduler is already running'); + return; + } + + isSchedulerRunning = true; + console.log(`[Scheduler] Starting scheduler (polling every ${SCHEDULER_POLL_INTERVAL_MS / 1000}s)...`); + + // Immediately check for due jobs + checkAndRunDueJobs(); + + // Set up interval to check for due jobs + schedulerInterval = setInterval(checkAndRunDueJobs, SCHEDULER_POLL_INTERVAL_MS); +} + +/** + * Stop the scheduler + */ +export function stopScheduler(): void { + if (!isSchedulerRunning) { + console.log('[Scheduler] Scheduler is not running'); + return; + } + + isSchedulerRunning = false; + + if (schedulerInterval) { + clearInterval(schedulerInterval); + schedulerInterval = null; + } + + console.log('[Scheduler] Scheduler stopped'); +} + +/** + * Get scheduler status + */ +export function getSchedulerStatus(): { + running: boolean; + pollIntervalMs: number; +} { + return { + running: isSchedulerRunning, + pollIntervalMs: SCHEDULER_POLL_INTERVAL_MS, + }; +} + +/** + * Trigger immediate execution of a schedule + */ +export async function triggerScheduleNow(scheduleId: number): Promise<{ + success: boolean; + message: string; +}> { + const schedule = await getScheduleById(scheduleId); + if (!schedule) { + return { success: false, message: 'Schedule not found' }; + } + + if (schedule.lastStatus === 'running') { + return { success: false, message: 'Job is already running' }; + } + + // Run the job + await runScheduledJob(schedule); + + return { success: true, message: 'Job triggered successfully' }; +} + +/** + * Initialize default schedules if they don't exist + */ +export async function initializeDefaultSchedules(): Promise { + const schedules = await getAllSchedules(); + + // Check if product crawl schedule exists + const productCrawlExists = schedules.some(s => s.jobName === 'dutchie_az_product_crawl'); + if (!productCrawlExists) { + await createSchedule({ + jobName: 'dutchie_az_product_crawl', + description: 'Crawl all AZ Dutchie dispensary products', + enabled: true, + baseIntervalMinutes: 240, // 4 hours + jitterMinutes: 30, // ±30 minutes + jobConfig: { pricingType: 'rec', useBothModes: true }, + startImmediately: false, + }); + console.log('[Scheduler] Created default product crawl schedule'); + } +} + +// Re-export for backward compatibility +export { crawlDispensaryProducts as crawlSingleDispensary } from './product-crawler'; + +export async function triggerImmediateCrawl(): Promise<{ success: boolean; message: string }> { + const schedules = await getAllSchedules(); + const productCrawl = schedules.find(s => s.jobName === 'dutchie_az_product_crawl'); + if (productCrawl) { + return triggerScheduleNow(productCrawl.id); + } + return { success: false, message: 'Product crawl schedule not found' }; +} diff --git a/backend/src/dutchie-az/types/index.ts b/backend/src/dutchie-az/types/index.ts new file mode 100644 index 00000000..9e135af4 --- /dev/null +++ b/backend/src/dutchie-az/types/index.ts @@ -0,0 +1,667 @@ +/** + * Dutchie AZ Data Types + * + * Complete TypeScript interfaces for the isolated Dutchie Arizona data pipeline. + * These types map directly to Dutchie's GraphQL FilteredProducts response. + */ + +// ============================================================ +// GRAPHQL RESPONSE TYPES (from Dutchie API) +// ============================================================ + +/** + * Raw Dutchie brand object from GraphQL + */ +export interface DutchieBrand { + id: string; + _id?: string; + name: string; + parentBrandId?: string; + imageUrl?: string; + description?: string; + __typename?: string; +} + +/** + * Raw Dutchie image object from GraphQL + */ +export interface DutchieImage { + url: string; + description?: string; + active?: boolean; + __typename?: string; +} + +/** + * POSMetaData.children - option-level inventory/pricing + */ +export interface DutchiePOSChild { + activeBatchTags?: any; + canonicalBrandId?: string; + canonicalBrandName?: string; + canonicalCategory?: string; + canonicalCategoryId?: string; + canonicalEffectivePotencyMg?: number; + canonicalID?: string; + canonicalPackageId?: string; + canonicalImgUrl?: string; + canonicalLabResultUrl?: string; + canonicalName?: string; + canonicalSKU?: string; + canonicalProductTags?: string[]; + canonicalStrainId?: string; + canonicalVendorId?: string; + kioskQuantityAvailable?: number; + medPrice?: number; + option?: string; + packageQuantity?: number; + price?: number; + quantity?: number; + quantityAvailable?: number; + recEquivalent?: number; + recPrice?: number; + standardEquivalent?: number; + __typename?: string; +} + +/** + * POSMetaData object from GraphQL + */ +export interface DutchiePOSMetaData { + activeBatchTags?: any; + canonicalBrandId?: string; + canonicalBrandName?: string; + canonicalCategory?: string; + canonicalCategoryId?: string; + canonicalID?: string; + canonicalPackageId?: string; + canonicalImgUrl?: string; + canonicalLabResultUrl?: string; + canonicalName?: string; + canonicalProductTags?: string[]; + canonicalSKU?: string; + canonicalStrainId?: string; + canonicalVendorId?: string; + children?: DutchiePOSChild[]; + integrationID?: string; + __typename?: string; +} + +/** + * THC/CBD Content structure + */ +export interface DutchiePotencyContent { + unit?: string; + range?: number[]; +} + +/** + * CannabinoidV2 structure + */ +export interface DutchieCannabinoidV2 { + value: number; + unit: string; + cannabinoid: { + name: string; + }; +} + +/** + * Special data structure + */ +export interface DutchieSpecialData { + saleSpecials?: Array<{ + specialId: string; + specialName: string; + discount: number; + percentDiscount: boolean; + dollarDiscount: boolean; + specialType: string; + }>; + bogoSpecials?: any; +} + +/** + * Complete raw product from Dutchie GraphQL FilteredProducts + */ +export interface DutchieRawProduct { + _id: string; + id?: string; + AdditionalOptions?: any; + duplicatedProductId?: string; + libraryProductId?: string; + libraryProductScore?: number; + + // Brand + brand?: DutchieBrand; + brandId?: string; + brandName?: string; + brandLogo?: string; + + // Potency + CBD?: number; + CBDContent?: DutchiePotencyContent; + THC?: number; + THCContent?: DutchiePotencyContent; + cannabinoidsV2?: DutchieCannabinoidV2[]; + + // Flags + certificateOfAnalysisEnabled?: boolean; + collectionCardBadge?: string; + comingSoon?: boolean; + featured?: boolean; + medicalOnly?: boolean; + recOnly?: boolean; + nonArmsLength?: boolean; + vapeTaxApplicable?: boolean; + useBetterPotencyTaxes?: boolean; + + // Timestamps + createdAt?: string; + updatedAt?: string; + + // Dispensary + DispensaryID: string; + enterpriseProductId?: string; + + // Images + Image?: string; + images?: DutchieImage[]; + + // Measurements + measurements?: { + netWeight?: { + unit: string; + values: number[]; + }; + volume?: any; + }; + weight?: number | string; + + // Product identity + Name: string; + cName: string; + pastCNames?: string[]; + + // Options + Options?: string[]; + rawOptions?: string[]; + limitsPerCustomer?: any; + manualInventory?: boolean; + + // POS data + POSMetaData?: DutchiePOSMetaData; + + // Pricing + Prices?: number[]; + recPrices?: number[]; + medicalPrices?: number[]; + recSpecialPrices?: number[]; + medicalSpecialPrices?: number[]; + wholesalePrices?: number[]; + pricingTierData?: any; + specialIdsPerOption?: any; + + // Specials + special?: boolean; + specialData?: DutchieSpecialData; + + // Classification + Status?: string; + strainType?: string; + subcategory?: string; + type?: string; + provider?: string; + effects?: Record; + + // Threshold flags + isBelowThreshold?: boolean; + isBelowKioskThreshold?: boolean; + optionsBelowThreshold?: boolean; + optionsBelowKioskThreshold?: boolean; + + // Misc + bottleDepositTaxCents?: number; + __typename?: string; +} + +// ============================================================ +// DERIVED TYPES +// ============================================================ + +/** + * StockStatus - derived from POSMetaData.children quantityAvailable + * - 'in_stock': At least one option has quantityAvailable > 0 + * - 'out_of_stock': All options have quantityAvailable === 0 + * - 'unknown': No POSMetaData.children or quantityAvailable data + * - 'missing_from_feed': Product was not present in the latest crawl feed + */ +export type StockStatus = 'in_stock' | 'out_of_stock' | 'unknown' | 'missing_from_feed'; + +/** + * CrawlMode - defines how products are fetched from Dutchie + * - 'mode_a': UI parity - Status: 'Active', threshold removal ON + * - 'mode_b': MAX COVERAGE - No Status filter, bypass thresholds + */ +export type CrawlMode = 'mode_a' | 'mode_b'; + +/** + * Per-option stock status type + */ +export type OptionStockStatus = 'in_stock' | 'out_of_stock' | 'unknown'; + +/** + * Get available quantity for a single option + * Priority: quantityAvailable > kioskQuantityAvailable > quantity + */ +export function getOptionQuantity(child: DutchiePOSChild): number | null { + if (typeof child.quantityAvailable === 'number') return child.quantityAvailable; + if (typeof child.kioskQuantityAvailable === 'number') return child.kioskQuantityAvailable; + if (typeof child.quantity === 'number') return child.quantity; + return null; // No quantity data available +} + +/** + * Derive stock status for a single option + * Returns: 'in_stock' if qty > 0, 'out_of_stock' if qty === 0, 'unknown' if no data + */ +export function deriveOptionStockStatus(child: DutchiePOSChild): OptionStockStatus { + const qty = getOptionQuantity(child); + if (qty === null) return 'unknown'; + return qty > 0 ? 'in_stock' : 'out_of_stock'; +} + +/** + * Derive product-level stock status from POSMetaData.children + * + * Logic per spec: + * - If ANY child is "in_stock" → product is "in_stock" + * - Else if ALL children are "out_of_stock" → product is "out_of_stock" + * - Else → product is "unknown" + * + * IMPORTANT: Threshold flags (isBelowThreshold, etc.) do NOT override stock status. + * They only indicate "low stock" - if qty > 0, status stays "in_stock". + */ +export function deriveStockStatus(product: DutchieRawProduct): StockStatus { + const children = product.POSMetaData?.children; + + // No children data - unknown + if (!children || children.length === 0) { + return 'unknown'; + } + + // Get stock status for each option + const optionStatuses = children.map(deriveOptionStockStatus); + + // If ANY option is in_stock → product is in_stock + if (optionStatuses.some(status => status === 'in_stock')) { + return 'in_stock'; + } + + // If ALL options are out_of_stock → product is out_of_stock + if (optionStatuses.every(status => status === 'out_of_stock')) { + return 'out_of_stock'; + } + + // Otherwise (mix of out_of_stock and unknown) → unknown + return 'unknown'; +} + +/** + * Calculate total quantity available across all options + * Returns null if no children data (unknown inventory), 0 if children exist but all have 0 qty + */ +export function calculateTotalQuantity(product: DutchieRawProduct): number | null { + const children = product.POSMetaData?.children; + // No children = unknown inventory, return null (NOT 0) + if (!children || children.length === 0) return null; + + // Check if any child has quantity data + const hasAnyQtyData = children.some(child => getOptionQuantity(child) !== null); + if (!hasAnyQtyData) return null; // All children lack qty data = unknown + + return children.reduce((sum, child) => { + const qty = getOptionQuantity(child); + return sum + (qty ?? 0); + }, 0); +} + +/** + * Calculate total kiosk quantity available across all options + */ +export function calculateTotalKioskQuantity(product: DutchieRawProduct): number | null { + const children = product.POSMetaData?.children; + if (!children || children.length === 0) return null; + + const hasAnyKioskQty = children.some(child => typeof child.kioskQuantityAvailable === 'number'); + if (!hasAnyKioskQty) return null; + + return children.reduce((sum, child) => sum + (child.kioskQuantityAvailable ?? 0), 0); +} + +// ============================================================ +// DATABASE ENTITY TYPES +// ============================================================ + +/** + * Dispensary - represents a Dutchie store in Arizona + */ +export interface Dispensary { + id: number; + platform: 'dutchie'; + name: string; + slug: string; + city: string; + state: string; + postalCode?: string; + latitude?: number; + longitude?: number; + address?: string; + platformDispensaryId?: string; // Resolved internal ID (e.g., "6405ef617056e8014d79101b") + isDelivery?: boolean; + isPickup?: boolean; + rawMetadata?: any; // Full discovery node + lastCrawledAt?: Date; + productCount?: number; + createdAt: Date; + updatedAt: Date; +} + +/** + * DutchieProduct - canonical product identity per store + */ +export interface DutchieProduct { + id: number; + dispensaryId: number; + platform: 'dutchie'; + + externalProductId: string; // from _id or id + platformDispensaryId: string; // mirror of Dispensary.platformDispensaryId + cName?: string; // cName / slug + name: string; // Name + + // Brand + brandName?: string; + brandId?: string; + brandLogoUrl?: string; + + // Classification + type?: string; + subcategory?: string; + strainType?: string; + provider?: string; + + // Potency + thc?: number; + thcContent?: number; + cbd?: number; + cbdContent?: number; + cannabinoidsV2?: DutchieCannabinoidV2[]; + effects?: Record; + + // Status / flags + status?: string; + medicalOnly: boolean; + recOnly: boolean; + featured: boolean; + comingSoon: boolean; + certificateOfAnalysisEnabled: boolean; + + isBelowThreshold: boolean; + isBelowKioskThreshold: boolean; + optionsBelowThreshold: boolean; + optionsBelowKioskThreshold: boolean; + + // Derived stock status (from POSMetaData.children quantityAvailable) + stockStatus: StockStatus; + totalQuantityAvailable?: number | null; // null = unknown (no children), 0 = all OOS + + // Images + primaryImageUrl?: string; + images?: DutchieImage[]; + + // Misc + measurements?: any; + weight?: string; + pastCNames?: string[]; + + createdAtDutchie?: Date; + updatedAtDutchie?: Date; + + latestRawPayload?: any; // Full product node from last crawl + + createdAt: Date; + updatedAt: Date; +} + +/** + * DutchieProductOptionSnapshot - child-level option data from POSMetaData.children + */ +export interface DutchieProductOptionSnapshot { + optionId: string; // canonicalID or canonicalPackageId or canonicalSKU + canonicalId?: string; + canonicalPackageId?: string; + canonicalSKU?: string; + canonicalName?: string; + + canonicalCategory?: string; + canonicalCategoryId?: string; + canonicalBrandId?: string; + canonicalBrandName?: string; + canonicalStrainId?: string; + canonicalVendorId?: string; + + optionLabel?: string; // from option field + packageQuantity?: number; + recEquivalent?: number; + standardEquivalent?: number; + + priceCents?: number; // price * 100 + recPriceCents?: number; // recPrice * 100 + medPriceCents?: number; // medPrice * 100 + + quantity?: number; + quantityAvailable?: number; + kioskQuantityAvailable?: number; + + activeBatchTags?: any; + canonicalImgUrl?: string; + canonicalLabResultUrl?: string; + canonicalEffectivePotencyMg?: number; + + rawChildPayload?: any; // Full POSMetaData.children node +} + +/** + * DutchieProductSnapshot - per crawl, includes options[] + */ +export interface DutchieProductSnapshot { + id: number; + dutchieProductId: number; + dispensaryId: number; + platformDispensaryId: string; + externalProductId: string; + pricingType: 'rec' | 'med' | 'unknown'; + crawlMode: CrawlMode; // Which crawl mode captured this snapshot + + status?: string; + featured: boolean; + special: boolean; + medicalOnly: boolean; + recOnly: boolean; + + // Flag indicating if product was present in feed (false = missing_from_feed snapshot) + isPresentInFeed: boolean; + + // Derived stock status for this snapshot + stockStatus: StockStatus; + + // Price summary (aggregated from children, in cents) + recMinPriceCents?: number; + recMaxPriceCents?: number; + recMinSpecialPriceCents?: number; + medMinPriceCents?: number; + medMaxPriceCents?: number; + medMinSpecialPriceCents?: number; + wholesaleMinPriceCents?: number; + + // Inventory summary (aggregated from POSMetaData.children) + totalQuantityAvailable?: number | null; // null = unknown (no children), 0 = all OOS + totalKioskQuantityAvailable?: number | null; + manualInventory: boolean; + isBelowThreshold: boolean; + isBelowKioskThreshold: boolean; + + // Option-level data + options: DutchieProductOptionSnapshot[]; + + // Full raw product node at this crawl time + rawPayload: any; + + crawledAt: Date; + createdAt: Date; + updatedAt: Date; +} + +/** + * CrawlJob - tracks crawl execution status + */ +export interface CrawlJob { + id: number; + jobType: 'discovery' | 'product_crawl' | 'resolve_ids'; + dispensaryId?: number; + status: 'pending' | 'running' | 'completed' | 'failed'; + startedAt?: Date; + completedAt?: Date; + errorMessage?: string; + productsFound?: number; + snapshotsCreated?: number; + metadata?: any; + createdAt: Date; + updatedAt: Date; +} + +/** + * JobSchedule - recurring job configuration with jitter support + * Times "wander" around the clock due to random jitter after each run + */ +export type JobStatus = 'success' | 'error' | 'partial' | 'running' | null; + +export interface JobSchedule { + id: number; + jobName: string; + description?: string; + enabled: boolean; + + // Timing configuration + baseIntervalMinutes: number; // e.g., 240 (4 hours) + jitterMinutes: number; // e.g., 30 (±30 minutes) + + // Last run tracking + lastRunAt?: Date; + lastStatus?: JobStatus; + lastErrorMessage?: string; + lastDurationMs?: number; + + // Next run (calculated with jitter) + nextRunAt?: Date; + + // Job-specific config + jobConfig?: Record; + + createdAt: Date; + updatedAt: Date; +} + +/** + * JobRunLog - history of job executions + */ +export interface JobRunLog { + id: number; + scheduleId: number; + jobName: string; + status: 'pending' | 'running' | 'success' | 'error' | 'partial'; + startedAt?: Date; + completedAt?: Date; + durationMs?: number; + errorMessage?: string; + + // Results summary + itemsProcessed?: number; + itemsSucceeded?: number; + itemsFailed?: number; + + metadata?: any; + createdAt: Date; +} + +// ============================================================ +// GRAPHQL OPERATION TYPES +// ============================================================ + +export interface FilteredProductsVariables { + includeEnterpriseSpecials: boolean; + productsFilter: { + dispensaryId: string; + pricingType: 'rec' | 'med'; + strainTypes?: string[]; + subcategories?: string[]; + Status?: string; + types?: string[]; + useCache?: boolean; + isDefaultSort?: boolean; + sortBy?: string; + sortDirection?: number; + bypassOnlineThresholds?: boolean; + isKioskMenu?: boolean; + removeProductsBelowOptionThresholds?: boolean; + }; + page: number; + perPage: number; +} + +export interface GetAddressBasedDispensaryDataVariables { + input: { + dispensaryId: string; // The slug like "AZ-Deeply-Rooted" + }; +} + +export interface ConsumerDispensariesVariables { + filter: { + lat: number; + lng: number; + radius: number; // in meters or km + isDelivery?: boolean; + searchText?: string; + }; +} + +// ============================================================ +// API RESPONSE TYPES +// ============================================================ + +export interface DashboardStats { + dispensaryCount: number; + productCount: number; + snapshotCount24h: number; + lastCrawlTime?: Date; + failedJobCount: number; + brandCount: number; + categoryCount: number; +} + +export interface CategorySummary { + type: string; + subcategory: string; + productCount: number; + dispensaryCount: number; + avgPrice?: number; +} + +export interface BrandSummary { + brandName: string; + brandId?: string; + brandLogoUrl?: string; + productCount: number; + dispensaryCount: number; +} diff --git a/backend/src/index.ts b/backend/src/index.ts index ddcdfc01..d1b387c8 100755 --- a/backend/src/index.ts +++ b/backend/src/index.ts @@ -56,6 +56,8 @@ import parallelScrapeRoutes from './routes/parallel-scrape'; import scheduleRoutes from './routes/schedule'; import crawlerSandboxRoutes from './routes/crawler-sandbox'; import versionRoutes from './routes/version'; +import publicApiRoutes from './routes/public-api'; +import { dutchieAZRouter } from './dutchie-az'; import { trackApiUsage, checkRateLimit } from './middleware/apiTokenTracker'; import { startCrawlScheduler } from './services/crawl-scheduler'; import { validateWordPressPermissions } from './middleware/wordpressPermissions'; @@ -86,6 +88,11 @@ app.use('/api/parallel-scrape', parallelScrapeRoutes); app.use('/api/schedule', scheduleRoutes); app.use('/api/crawler-sandbox', crawlerSandboxRoutes); app.use('/api/version', versionRoutes); +app.use('/api/dutchie-az', dutchieAZRouter); + +// Public API v1 - External consumer endpoints (WordPress, etc.) +// Uses dutchie_az data pipeline with per-dispensary API key auth +app.use('/api/v1', publicApiRoutes); async function startServer() { try { diff --git a/backend/src/routes/api-permissions.ts b/backend/src/routes/api-permissions.ts index b01944ca..11aadcde 100644 --- a/backend/src/routes/api-permissions.ts +++ b/backend/src/routes/api-permissions.ts @@ -27,18 +27,18 @@ router.get('/', requireRole('superadmin', 'admin'), async (req, res) => { } }); -// Get all stores for dropdown (must be before /:id to avoid route conflict) -router.get('/stores', requireRole('superadmin', 'admin'), async (req, res) => { +// Get all dispensaries for dropdown (must be before /:id to avoid route conflict) +router.get('/dispensaries', requireRole('superadmin', 'admin'), async (req, res) => { try { const result = await pool.query(` SELECT id, name - FROM stores + FROM dispensaries ORDER BY name `); - res.json({ stores: result.rows }); + res.json({ dispensaries: result.rows }); } catch (error) { - console.error('Error fetching stores:', error); - res.status(500).json({ error: 'Failed to fetch stores' }); + console.error('Error fetching dispensaries:', error); + res.status(500).json({ error: 'Failed to fetch dispensaries' }); } }); @@ -67,22 +67,22 @@ router.get('/:id', requireRole('superadmin', 'admin'), async (req, res) => { // Create new API permission router.post('/', requireRole('superadmin', 'admin'), async (req, res) => { try { - const { user_name, allowed_ips, allowed_domains, store_id } = req.body; + const { user_name, allowed_ips, allowed_domains, dispensary_id } = req.body; if (!user_name) { return res.status(400).json({ error: 'User name is required' }); } - if (!store_id) { - return res.status(400).json({ error: 'Store is required' }); + if (!dispensary_id) { + return res.status(400).json({ error: 'Dispensary is required' }); } - // Get store name for display - const storeResult = await pool.query('SELECT name FROM stores WHERE id = $1', [store_id]); - if (storeResult.rows.length === 0) { - return res.status(400).json({ error: 'Invalid store ID' }); + // Get dispensary name for display + const dispensaryResult = await pool.query('SELECT name FROM dispensaries WHERE id = $1', [dispensary_id]); + if (dispensaryResult.rows.length === 0) { + return res.status(400).json({ error: 'Invalid dispensary ID' }); } - const storeName = storeResult.rows[0].name; + const dispensaryName = dispensaryResult.rows[0].name; const apiKey = generateApiKey(); @@ -93,8 +93,8 @@ router.post('/', requireRole('superadmin', 'admin'), async (req, res) => { allowed_ips, allowed_domains, is_active, - store_id, - store_name + dispensary_id, + dispensary_name ) VALUES ($1, $2, $3, $4, 1, $5, $6) RETURNING * @@ -103,8 +103,8 @@ router.post('/', requireRole('superadmin', 'admin'), async (req, res) => { apiKey, allowed_ips || null, allowed_domains || null, - store_id, - storeName + dispensary_id, + dispensaryName ]); res.status(201).json({ diff --git a/backend/src/routes/public-api.ts b/backend/src/routes/public-api.ts new file mode 100644 index 00000000..344406eb --- /dev/null +++ b/backend/src/routes/public-api.ts @@ -0,0 +1,750 @@ +/** + * Public API Routes for External Consumers (WordPress, etc.) + * + * These routes use the dutchie_az data pipeline and are protected by API key auth. + * Designed for Deeply Rooted and other WordPress sites consuming menu data. + */ + +import { Router, Request, Response, NextFunction } from 'express'; +import { pool } from '../db/migrate'; +import { query as dutchieAzQuery } from '../dutchie-az/db/connection'; +import ipaddr from 'ipaddr.js'; + +const router = Router(); + +// ============================================================ +// TYPES +// ============================================================ + +interface ApiKeyPermission { + id: number; + user_name: string; + api_key: string; + allowed_ips: string | null; + allowed_domains: string | null; + is_active: number; + dispensary_id: number; + dispensary_name: string; + dutchie_az_store_id?: number; +} + +interface PublicApiRequest extends Request { + apiPermission?: ApiKeyPermission; +} + +// ============================================================ +// MIDDLEWARE +// ============================================================ + +/** + * Validates if an IP address matches any of the allowed IP patterns + */ +function isIpAllowed(clientIp: string, allowedIps: string[]): boolean { + try { + const clientAddr = ipaddr.process(clientIp); + + for (const allowedIp of allowedIps) { + const trimmed = allowedIp.trim(); + if (!trimmed) continue; + + if (trimmed.includes('/')) { + try { + const range = ipaddr.parseCIDR(trimmed); + if (clientAddr.match(range)) { + return true; + } + } catch (e) { + console.warn(`Invalid CIDR notation: ${trimmed}`); + continue; + } + } else { + try { + const allowedAddr = ipaddr.process(trimmed); + if (clientAddr.toString() === allowedAddr.toString()) { + return true; + } + } catch (e) { + console.warn(`Invalid IP address: ${trimmed}`); + continue; + } + } + } + + return false; + } catch (error) { + console.error('Error processing client IP:', error); + return false; + } +} + +/** + * Validates if a domain matches any of the allowed domain patterns + */ +function isDomainAllowed(origin: string, allowedDomains: string[]): boolean { + try { + const url = new URL(origin); + const domain = url.hostname; + + for (const allowedDomain of allowedDomains) { + const trimmed = allowedDomain.trim(); + if (!trimmed) continue; + + if (trimmed.startsWith('*.')) { + const baseDomain = trimmed.substring(2); + if (domain === baseDomain || domain.endsWith('.' + baseDomain)) { + return true; + } + } else { + if (domain === trimmed) { + return true; + } + } + } + + return false; + } catch (error) { + console.error('Error processing domain:', error); + return false; + } +} + +/** + * Middleware to validate API key and resolve dispensary -> dutchie_az store mapping + */ +async function validatePublicApiKey( + req: PublicApiRequest, + res: Response, + next: NextFunction +) { + const apiKey = req.headers['x-api-key'] as string; + + if (!apiKey) { + return res.status(401).json({ + error: 'Missing API key', + message: 'Provide your API key in the X-API-Key header' + }); + } + + try { + // Query WordPress permissions table with dispensary info + const result = await pool.query(` + SELECT + p.id, + p.user_name, + p.api_key, + p.allowed_ips, + p.allowed_domains, + p.is_active, + p.dispensary_id, + p.dispensary_name + FROM wp_dutchie_api_permissions p + WHERE p.api_key = $1 AND p.is_active = 1 + `, [apiKey]); + + if (result.rows.length === 0) { + return res.status(401).json({ + error: 'Invalid API key' + }); + } + + const permission = result.rows[0]; + + // Validate IP if configured + const clientIp = (req.headers['x-forwarded-for'] as string)?.split(',')[0].trim() || + (req.headers['x-real-ip'] as string) || + req.ip || + req.connection.remoteAddress || + ''; + + if (permission.allowed_ips) { + const allowedIps = permission.allowed_ips.split('\n').filter((ip: string) => ip.trim()); + + if (allowedIps.length > 0 && !isIpAllowed(clientIp, allowedIps)) { + return res.status(403).json({ + error: 'IP address not allowed', + client_ip: clientIp + }); + } + } + + // Validate domain if configured + const origin = req.get('origin') || req.get('referer') || ''; + + if (permission.allowed_domains && origin) { + const allowedDomains = permission.allowed_domains.split('\n').filter((d: string) => d.trim()); + + if (allowedDomains.length > 0 && !isDomainAllowed(origin, allowedDomains)) { + return res.status(403).json({ + error: 'Domain not allowed', + origin: origin + }); + } + } + + // Resolve the dutchie_az store for this dispensary + // Match by dispensary name (from main DB) to dutchie_az.dispensaries.name + const storeResult = await dutchieAzQuery<{ id: number }>(` + SELECT id FROM dispensaries + WHERE LOWER(TRIM(name)) = LOWER(TRIM($1)) + OR LOWER(TRIM(name)) LIKE LOWER(TRIM($1)) || '%' + OR LOWER(TRIM($1)) LIKE LOWER(TRIM(name)) || '%' + ORDER BY + CASE WHEN LOWER(TRIM(name)) = LOWER(TRIM($1)) THEN 0 ELSE 1 END, + id + LIMIT 1 + `, [permission.dispensary_name]); + + if (storeResult.rows.length > 0) { + permission.dutchie_az_store_id = storeResult.rows[0].id; + } + + // Update last_used_at timestamp (async, don't wait) + pool.query(` + UPDATE wp_dutchie_api_permissions + SET last_used_at = CURRENT_TIMESTAMP + WHERE id = $1 + `, [permission.id]).catch((err: Error) => { + console.error('Error updating last_used_at:', err); + }); + + req.apiPermission = permission; + next(); + } catch (error) { + console.error('Public API validation error:', error); + return res.status(500).json({ + error: 'Internal server error during API validation' + }); + } +} + +// Apply middleware to all routes +router.use(validatePublicApiKey); + +// ============================================================ +// PRODUCT ENDPOINTS +// ============================================================ + +/** + * GET /api/v1/products + * Get products for the authenticated dispensary + * + * Query params: + * - category: Filter by product type (e.g., 'flower', 'edible') + * - brand: Filter by brand name + * - in_stock_only: Only return in-stock products (default: false) + * - limit: Max products to return (default: 100, max: 500) + * - offset: Pagination offset (default: 0) + */ +router.get('/products', async (req: PublicApiRequest, res: Response) => { + try { + const permission = req.apiPermission!; + + // Check if we have a dutchie_az store mapping + if (!permission.dutchie_az_store_id) { + return res.status(503).json({ + error: 'No menu data available', + message: `Menu data for ${permission.dispensary_name} is not yet available. The dispensary may not be set up in the new data pipeline.`, + dispensary_name: permission.dispensary_name + }); + } + + const { + category, + brand, + in_stock_only = 'false', + limit = '100', + offset = '0' + } = req.query; + + // Build query + let whereClause = 'WHERE p.dispensary_id = $1'; + const params: any[] = [permission.dutchie_az_store_id]; + let paramIndex = 2; + + // Filter by stock status if requested + if (in_stock_only === 'true' || in_stock_only === '1') { + whereClause += ` AND p.stock_status = 'in_stock'`; + } + + // Filter by category (maps to 'type' in dutchie_az) + if (category) { + whereClause += ` AND LOWER(p.type) = LOWER($${paramIndex})`; + params.push(category); + paramIndex++; + } + + // Filter by brand + if (brand) { + whereClause += ` AND LOWER(p.brand_name) LIKE LOWER($${paramIndex})`; + params.push(`%${brand}%`); + paramIndex++; + } + + // Enforce limits + const limitNum = Math.min(parseInt(limit as string, 10) || 100, 500); + const offsetNum = parseInt(offset as string, 10) || 0; + params.push(limitNum, offsetNum); + + // Query products with latest snapshot data + const { rows: products } = await dutchieAzQuery(` + SELECT + p.id, + p.external_product_id as dutchie_id, + p.name, + p.brand_name as brand, + p.type as category, + p.subcategory, + p.strain_type, + p.stock_status, + p.thc, + p.cbd, + p.primary_image_url as image_url, + p.images, + p.effects, + p.created_at, + p.updated_at, + -- Latest snapshot data for pricing + s.rec_min_price_cents, + s.rec_max_price_cents, + s.rec_min_special_price_cents, + s.med_min_price_cents, + s.med_max_price_cents, + s.med_min_special_price_cents, + s.total_quantity_available, + s.options, + s.special, + s.crawled_at as snapshot_at + FROM dutchie_products p + LEFT JOIN LATERAL ( + SELECT * FROM dutchie_product_snapshots + WHERE dutchie_product_id = p.id + ORDER BY crawled_at DESC + LIMIT 1 + ) s ON true + ${whereClause} + ORDER BY p.name ASC + LIMIT $${paramIndex} OFFSET $${paramIndex + 1} + `, params); + + // Get total count for pagination + const { rows: countRows } = await dutchieAzQuery(` + SELECT COUNT(*) as total FROM dutchie_products p ${whereClause} + `, params.slice(0, -2)); + + // Transform products to backward-compatible format + const transformedProducts = products.map((p) => { + // Extract first image URL from images array + let imageUrl = p.image_url; + if (!imageUrl && p.images && Array.isArray(p.images) && p.images.length > 0) { + const firstImage = p.images[0]; + imageUrl = typeof firstImage === 'string' ? firstImage : firstImage?.url; + } + + // Convert prices from cents to dollars + const regularPrice = p.rec_min_price_cents + ? (p.rec_min_price_cents / 100).toFixed(2) + : null; + const salePrice = p.rec_min_special_price_cents + ? (p.rec_min_special_price_cents / 100).toFixed(2) + : null; + + return { + id: p.id, + dutchie_id: p.dutchie_id, + name: p.name, + brand: p.brand || null, + category: p.category || null, + subcategory: p.subcategory || null, + strain_type: p.strain_type || null, + description: null, // Not stored in dutchie_products, would need snapshot + regular_price: regularPrice, + sale_price: salePrice, + thc_percentage: p.thc ? parseFloat(p.thc) : null, + cbd_percentage: p.cbd ? parseFloat(p.cbd) : null, + image_url: imageUrl || null, + in_stock: p.stock_status === 'in_stock', + on_special: p.special || false, + effects: p.effects || [], + options: p.options || [], + quantity_available: p.total_quantity_available || 0, + created_at: p.created_at, + updated_at: p.updated_at, + snapshot_at: p.snapshot_at + }; + }); + + res.json({ + success: true, + dispensary: permission.dispensary_name, + products: transformedProducts, + pagination: { + total: parseInt(countRows[0]?.total || '0', 10), + limit: limitNum, + offset: offsetNum, + has_more: offsetNum + products.length < parseInt(countRows[0]?.total || '0', 10) + } + }); + } catch (error: any) { + console.error('Public API products error:', error); + res.status(500).json({ + error: 'Failed to fetch products', + message: error.message + }); + } +}); + +/** + * GET /api/v1/products/:id + * Get a single product by ID + */ +router.get('/products/:id', async (req: PublicApiRequest, res: Response) => { + try { + const permission = req.apiPermission!; + const { id } = req.params; + + if (!permission.dutchie_az_store_id) { + return res.status(503).json({ + error: 'No menu data available', + message: `Menu data for ${permission.dispensary_name} is not yet available.` + }); + } + + // Get product with latest snapshot + const { rows: products } = await dutchieAzQuery(` + SELECT + p.*, + s.rec_min_price_cents, + s.rec_max_price_cents, + s.rec_min_special_price_cents, + s.med_min_price_cents, + s.med_max_price_cents, + s.total_quantity_available, + s.options, + s.special, + s.crawled_at as snapshot_at + FROM dutchie_products p + LEFT JOIN LATERAL ( + SELECT * FROM dutchie_product_snapshots + WHERE dutchie_product_id = p.id + ORDER BY crawled_at DESC + LIMIT 1 + ) s ON true + WHERE p.id = $1 AND p.dispensary_id = $2 + `, [id, permission.dutchie_az_store_id]); + + if (products.length === 0) { + return res.status(404).json({ + error: 'Product not found' + }); + } + + const p = products[0]; + + // Extract first image URL + let imageUrl = p.primary_image_url; + if (!imageUrl && p.images && Array.isArray(p.images) && p.images.length > 0) { + const firstImage = p.images[0]; + imageUrl = typeof firstImage === 'string' ? firstImage : firstImage?.url; + } + + res.json({ + success: true, + product: { + id: p.id, + dutchie_id: p.external_product_id, + name: p.name, + brand: p.brand_name || null, + category: p.type || null, + subcategory: p.subcategory || null, + strain_type: p.strain_type || null, + regular_price: p.rec_min_price_cents ? (p.rec_min_price_cents / 100).toFixed(2) : null, + sale_price: p.rec_min_special_price_cents ? (p.rec_min_special_price_cents / 100).toFixed(2) : null, + thc_percentage: p.thc ? parseFloat(p.thc) : null, + cbd_percentage: p.cbd ? parseFloat(p.cbd) : null, + image_url: imageUrl || null, + images: p.images || [], + in_stock: p.stock_status === 'in_stock', + on_special: p.special || false, + effects: p.effects || [], + options: p.options || [], + quantity_available: p.total_quantity_available || 0, + created_at: p.created_at, + updated_at: p.updated_at, + snapshot_at: p.snapshot_at + } + }); + } catch (error: any) { + console.error('Public API product detail error:', error); + res.status(500).json({ + error: 'Failed to fetch product', + message: error.message + }); + } +}); + +/** + * GET /api/v1/categories + * Get all categories for the authenticated dispensary + */ +router.get('/categories', async (req: PublicApiRequest, res: Response) => { + try { + const permission = req.apiPermission!; + + if (!permission.dutchie_az_store_id) { + return res.status(503).json({ + error: 'No menu data available', + message: `Menu data for ${permission.dispensary_name} is not yet available.` + }); + } + + const { rows: categories } = await dutchieAzQuery(` + SELECT + type as category, + subcategory, + COUNT(*) as product_count, + COUNT(*) FILTER (WHERE stock_status = 'in_stock') as in_stock_count + FROM dutchie_products + WHERE dispensary_id = $1 AND type IS NOT NULL + GROUP BY type, subcategory + ORDER BY type, subcategory + `, [permission.dutchie_az_store_id]); + + res.json({ + success: true, + dispensary: permission.dispensary_name, + categories + }); + } catch (error: any) { + console.error('Public API categories error:', error); + res.status(500).json({ + error: 'Failed to fetch categories', + message: error.message + }); + } +}); + +/** + * GET /api/v1/brands + * Get all brands for the authenticated dispensary + */ +router.get('/brands', async (req: PublicApiRequest, res: Response) => { + try { + const permission = req.apiPermission!; + + if (!permission.dutchie_az_store_id) { + return res.status(503).json({ + error: 'No menu data available', + message: `Menu data for ${permission.dispensary_name} is not yet available.` + }); + } + + const { rows: brands } = await dutchieAzQuery(` + SELECT + brand_name as brand, + COUNT(*) as product_count, + COUNT(*) FILTER (WHERE stock_status = 'in_stock') as in_stock_count + FROM dutchie_products + WHERE dispensary_id = $1 AND brand_name IS NOT NULL + GROUP BY brand_name + ORDER BY product_count DESC + `, [permission.dutchie_az_store_id]); + + res.json({ + success: true, + dispensary: permission.dispensary_name, + brands + }); + } catch (error: any) { + console.error('Public API brands error:', error); + res.status(500).json({ + error: 'Failed to fetch brands', + message: error.message + }); + } +}); + +/** + * GET /api/v1/specials + * Get products on special/sale for the authenticated dispensary + */ +router.get('/specials', async (req: PublicApiRequest, res: Response) => { + try { + const permission = req.apiPermission!; + + if (!permission.dutchie_az_store_id) { + return res.status(503).json({ + error: 'No menu data available', + message: `Menu data for ${permission.dispensary_name} is not yet available.` + }); + } + + const { limit = '100', offset = '0' } = req.query; + const limitNum = Math.min(parseInt(limit as string, 10) || 100, 500); + const offsetNum = parseInt(offset as string, 10) || 0; + + // Get products with special pricing from latest snapshot + const { rows: products } = await dutchieAzQuery(` + SELECT + p.id, + p.external_product_id as dutchie_id, + p.name, + p.brand_name as brand, + p.type as category, + p.subcategory, + p.strain_type, + p.stock_status, + p.primary_image_url as image_url, + s.rec_min_price_cents, + s.rec_min_special_price_cents, + s.special, + s.options, + p.updated_at, + s.crawled_at as snapshot_at + FROM dutchie_products p + INNER JOIN LATERAL ( + SELECT * FROM dutchie_product_snapshots + WHERE dutchie_product_id = p.id + ORDER BY crawled_at DESC + LIMIT 1 + ) s ON true + WHERE p.dispensary_id = $1 + AND s.special = true + AND p.stock_status = 'in_stock' + ORDER BY p.name ASC + LIMIT $2 OFFSET $3 + `, [permission.dutchie_az_store_id, limitNum, offsetNum]); + + // Get total count + const { rows: countRows } = await dutchieAzQuery(` + SELECT COUNT(*) as total + FROM dutchie_products p + INNER JOIN LATERAL ( + SELECT special FROM dutchie_product_snapshots + WHERE dutchie_product_id = p.id + ORDER BY crawled_at DESC + LIMIT 1 + ) s ON true + WHERE p.dispensary_id = $1 + AND s.special = true + AND p.stock_status = 'in_stock' + `, [permission.dutchie_az_store_id]); + + const transformedProducts = products.map((p) => ({ + id: p.id, + dutchie_id: p.dutchie_id, + name: p.name, + brand: p.brand || null, + category: p.category || null, + strain_type: p.strain_type || null, + regular_price: p.rec_min_price_cents ? (p.rec_min_price_cents / 100).toFixed(2) : null, + sale_price: p.rec_min_special_price_cents ? (p.rec_min_special_price_cents / 100).toFixed(2) : null, + image_url: p.image_url || null, + in_stock: p.stock_status === 'in_stock', + options: p.options || [], + updated_at: p.updated_at, + snapshot_at: p.snapshot_at + })); + + res.json({ + success: true, + dispensary: permission.dispensary_name, + specials: transformedProducts, + pagination: { + total: parseInt(countRows[0]?.total || '0', 10), + limit: limitNum, + offset: offsetNum, + has_more: offsetNum + products.length < parseInt(countRows[0]?.total || '0', 10) + } + }); + } catch (error: any) { + console.error('Public API specials error:', error); + res.status(500).json({ + error: 'Failed to fetch specials', + message: error.message + }); + } +}); + +/** + * GET /api/v1/menu + * Get complete menu summary for the authenticated dispensary + */ +router.get('/menu', async (req: PublicApiRequest, res: Response) => { + try { + const permission = req.apiPermission!; + + if (!permission.dutchie_az_store_id) { + return res.status(503).json({ + error: 'No menu data available', + message: `Menu data for ${permission.dispensary_name} is not yet available.` + }); + } + + // Get counts by category + const { rows: categoryCounts } = await dutchieAzQuery(` + SELECT + type as category, + COUNT(*) as total, + COUNT(*) FILTER (WHERE stock_status = 'in_stock') as in_stock + FROM dutchie_products + WHERE dispensary_id = $1 AND type IS NOT NULL + GROUP BY type + ORDER BY total DESC + `, [permission.dutchie_az_store_id]); + + // Get overall stats + const { rows: stats } = await dutchieAzQuery(` + SELECT + COUNT(*) as total_products, + COUNT(*) FILTER (WHERE stock_status = 'in_stock') as in_stock_count, + COUNT(DISTINCT brand_name) as brand_count, + COUNT(DISTINCT type) as category_count, + MAX(updated_at) as last_updated + FROM dutchie_products + WHERE dispensary_id = $1 + `, [permission.dutchie_az_store_id]); + + // Get specials count + const { rows: specialsCount } = await dutchieAzQuery(` + SELECT COUNT(*) as count + FROM dutchie_products p + INNER JOIN LATERAL ( + SELECT special FROM dutchie_product_snapshots + WHERE dutchie_product_id = p.id + ORDER BY crawled_at DESC + LIMIT 1 + ) s ON true + WHERE p.dispensary_id = $1 + AND s.special = true + AND p.stock_status = 'in_stock' + `, [permission.dutchie_az_store_id]); + + const summary = stats[0] || {}; + + res.json({ + success: true, + dispensary: permission.dispensary_name, + menu: { + total_products: parseInt(summary.total_products || '0', 10), + in_stock_count: parseInt(summary.in_stock_count || '0', 10), + brand_count: parseInt(summary.brand_count || '0', 10), + category_count: parseInt(summary.category_count || '0', 10), + specials_count: parseInt(specialsCount[0]?.count || '0', 10), + last_updated: summary.last_updated, + categories: categoryCounts.map((c) => ({ + name: c.category, + total: parseInt(c.total, 10), + in_stock: parseInt(c.in_stock, 10) + })) + } + }); + } catch (error: any) { + console.error('Public API menu error:', error); + res.status(500).json({ + error: 'Failed to fetch menu summary', + message: error.message + }); + } +}); + +export default router; diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index eeea8e50..4c50568d 100755 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -20,6 +20,9 @@ import { ScraperSchedule } from './pages/ScraperSchedule'; import { ScraperTools } from './pages/ScraperTools'; import { ChangeApproval } from './pages/ChangeApproval'; import { ApiPermissions } from './pages/ApiPermissions'; +import { DutchieAZSchedule } from './pages/DutchieAZSchedule'; +import { DutchieAZStores } from './pages/DutchieAZStores'; +import { DutchieAZStoreDetail } from './pages/DutchieAZStoreDetail'; import { PrivateRoute } from './components/PrivateRoute'; export default function App() { @@ -46,6 +49,9 @@ export default function App() { } /> } /> } /> + } /> + } /> + } /> } /> } /> diff --git a/frontend/src/components/Layout.tsx b/frontend/src/components/Layout.tsx index 729fdf1f..1e52d805 100755 --- a/frontend/src/components/Layout.tsx +++ b/frontend/src/components/Layout.tsx @@ -13,6 +13,7 @@ import { Wrench, Activity, Clock, + Calendar, Shield, FileText, Settings, @@ -162,6 +163,21 @@ export function Layout({ children }: LayoutProps) { /> + + } + label="AZ Stores" + isActive={isActive('/dutchie-az', false)} + /> + } + label="AZ Schedule" + isActive={isActive('/dutchie-az-schedule')} + /> + + ('/api/api-permissions'); } - async getApiPermissionStores() { - return this.request<{ stores: Array<{ id: number; name: string }> }>('/api/api-permissions/stores'); + async getApiPermissionDispensaries() { + return this.request<{ dispensaries: Array<{ id: number; name: string }> }>('/api/api-permissions/dispensaries'); } - async createApiPermission(data: { user_name: string; store_id: number; allowed_ips?: string; allowed_domains?: string }) { + async createApiPermission(data: { user_name: string; dispensary_id: number; allowed_ips?: string; allowed_domains?: string }) { return this.request<{ permission: any; message: string }>('/api/api-permissions', { method: 'POST', body: JSON.stringify(data), @@ -525,6 +525,313 @@ class ApiClient { image_tag: string; }>('/api/version'); } + + // ============================================================ + // DUTCHIE AZ API + // ============================================================ + + // Dutchie AZ Dashboard + async getDutchieAZDashboard() { + return this.request<{ + dispensaryCount: number; + productCount: number; + snapshotCount24h: number; + lastCrawlTime: string | null; + failedJobCount: number; + brandCount: number; + categoryCount: number; + }>('/api/dutchie-az/dashboard'); + } + + // Dutchie AZ Schedules (CRUD) + async getDutchieAZSchedules() { + return this.request<{ + schedules: Array<{ + id: number; + jobName: string; + description: string | null; + enabled: boolean; + baseIntervalMinutes: number; + jitterMinutes: number; + lastRunAt: string | null; + lastStatus: string | null; + lastErrorMessage: string | null; + lastDurationMs: number | null; + nextRunAt: string | null; + jobConfig: Record | null; + createdAt: string; + updatedAt: string; + }>; + }>('/api/dutchie-az/admin/schedules'); + } + + async getDutchieAZSchedule(id: number) { + return this.request<{ + id: number; + jobName: string; + description: string | null; + enabled: boolean; + baseIntervalMinutes: number; + jitterMinutes: number; + lastRunAt: string | null; + lastStatus: string | null; + lastErrorMessage: string | null; + lastDurationMs: number | null; + nextRunAt: string | null; + jobConfig: Record | null; + createdAt: string; + updatedAt: string; + }>(`/api/dutchie-az/admin/schedules/${id}`); + } + + async createDutchieAZSchedule(data: { + jobName: string; + description?: string; + enabled?: boolean; + baseIntervalMinutes: number; + jitterMinutes: number; + jobConfig?: Record; + startImmediately?: boolean; + }) { + return this.request('/api/dutchie-az/admin/schedules', { + method: 'POST', + body: JSON.stringify(data), + }); + } + + async updateDutchieAZSchedule(id: number, data: { + description?: string; + enabled?: boolean; + baseIntervalMinutes?: number; + jitterMinutes?: number; + jobConfig?: Record; + }) { + return this.request(`/api/dutchie-az/admin/schedules/${id}`, { + method: 'PUT', + body: JSON.stringify(data), + }); + } + + async deleteDutchieAZSchedule(id: number) { + return this.request<{ success: boolean; message: string }>(`/api/dutchie-az/admin/schedules/${id}`, { + method: 'DELETE', + }); + } + + async triggerDutchieAZSchedule(id: number) { + return this.request<{ success: boolean; message: string }>(`/api/dutchie-az/admin/schedules/${id}/trigger`, { + method: 'POST', + }); + } + + async initDutchieAZSchedules() { + return this.request<{ success: boolean; schedules: any[] }>('/api/dutchie-az/admin/schedules/init', { + method: 'POST', + }); + } + + // Dutchie AZ Run Logs + async getDutchieAZScheduleLogs(scheduleId: number, limit?: number, offset?: number) { + const params = new URLSearchParams(); + if (limit) params.append('limit', limit.toString()); + if (offset) params.append('offset', offset.toString()); + const queryString = params.toString() ? `?${params.toString()}` : ''; + return this.request<{ logs: any[]; total: number }>(`/api/dutchie-az/admin/schedules/${scheduleId}/logs${queryString}`); + } + + async getDutchieAZRunLogs(options?: { scheduleId?: number; jobName?: string; limit?: number; offset?: number }) { + const params = new URLSearchParams(); + if (options?.scheduleId) params.append('scheduleId', options.scheduleId.toString()); + if (options?.jobName) params.append('jobName', options.jobName); + if (options?.limit) params.append('limit', options.limit.toString()); + if (options?.offset) params.append('offset', options.offset.toString()); + const queryString = params.toString() ? `?${params.toString()}` : ''; + return this.request<{ logs: any[]; total: number }>(`/api/dutchie-az/admin/run-logs${queryString}`); + } + + // Dutchie AZ Scheduler Control + async getDutchieAZSchedulerStatus() { + return this.request<{ running: boolean; pollIntervalMs: number }>('/api/dutchie-az/admin/scheduler/status'); + } + + async startDutchieAZScheduler() { + return this.request<{ success: boolean; message: string }>('/api/dutchie-az/admin/scheduler/start', { + method: 'POST', + }); + } + + async stopDutchieAZScheduler() { + return this.request<{ success: boolean; message: string }>('/api/dutchie-az/admin/scheduler/stop', { + method: 'POST', + }); + } + + async triggerDutchieAZImmediateCrawl() { + return this.request<{ success: boolean; message: string }>('/api/dutchie-az/admin/scheduler/trigger', { + method: 'POST', + }); + } + + // Dutchie AZ Stores + async getDutchieAZStores(params?: { city?: string; hasPlatformId?: boolean; limit?: number; offset?: number }) { + const searchParams = new URLSearchParams(); + if (params?.city) searchParams.append('city', params.city); + if (params?.hasPlatformId !== undefined) searchParams.append('hasPlatformId', String(params.hasPlatformId)); + if (params?.limit) searchParams.append('limit', params.limit.toString()); + if (params?.offset) searchParams.append('offset', params.offset.toString()); + const queryString = searchParams.toString() ? `?${searchParams.toString()}` : ''; + return this.request<{ stores: any[]; total: number }>(`/api/dutchie-az/stores${queryString}`); + } + + async getDutchieAZStore(id: number) { + return this.request(`/api/dutchie-az/stores/${id}`); + } + + async getDutchieAZStoreSummary(id: number) { + return this.request<{ + dispensary: any; + totalProducts: number; + inStockCount: number; + outOfStockCount: number; + unknownStockCount: number; + missingFromFeedCount: number; + categories: Array<{ type: string; subcategory: string; product_count: number }>; + brands: Array<{ brand_name: string; product_count: number }>; + brandCount: number; + categoryCount: number; + lastCrawl: any | null; + }>(`/api/dutchie-az/stores/${id}/summary`); + } + + async getDutchieAZStoreProducts(id: number, params?: { + stockStatus?: string; + type?: string; + subcategory?: string; + brandName?: string; + search?: string; + limit?: number; + offset?: number; + }) { + const searchParams = new URLSearchParams(); + if (params?.stockStatus) searchParams.append('stockStatus', params.stockStatus); + if (params?.type) searchParams.append('type', params.type); + if (params?.subcategory) searchParams.append('subcategory', params.subcategory); + if (params?.brandName) searchParams.append('brandName', params.brandName); + if (params?.search) searchParams.append('search', params.search); + if (params?.limit) searchParams.append('limit', params.limit.toString()); + if (params?.offset) searchParams.append('offset', params.offset.toString()); + const queryString = searchParams.toString() ? `?${searchParams.toString()}` : ''; + return this.request<{ + products: Array<{ + id: number; + external_id: string; + name: string; + slug: string; + brand: string; + type: string; + subcategory: string; + strain_type: string; + stock_status: string; + in_stock: boolean; + missing_from_feed: boolean; + regular_price: number | null; + sale_price: number | null; + med_price: number | null; + med_sale_price: number | null; + thc_percentage: number | null; + cbd_percentage: number | null; + image_url: string | null; + description: string | null; + options: any | null; + total_quantity: number | null; + first_seen_at: string; + last_seen_at: string; + updated_at: string; + snapshot_at: string | null; + }>; + total: number; + limit: number; + offset: number; + }>(`/api/dutchie-az/stores/${id}/products${queryString}`); + } + + async getDutchieAZStoreBrands(id: number) { + return this.request<{ + brands: Array<{ brand: string; product_count: number }>; + }>(`/api/dutchie-az/stores/${id}/brands`); + } + + async getDutchieAZStoreCategories(id: number) { + return this.request<{ + categories: Array<{ type: string; subcategory: string; product_count: number }>; + }>(`/api/dutchie-az/stores/${id}/categories`); + } + + // Dutchie AZ Debug + async getDutchieAZDebugSummary() { + return this.request<{ + tableCounts: { + dispensary_count: string; + dispensaries_with_platform_id: string; + product_count: string; + snapshot_count: string; + job_count: string; + completed_jobs: string; + failed_jobs: string; + }; + stockDistribution: Array<{ stock_status: string; count: string }>; + productsByDispensary: Array<{ + id: number; + name: string; + slug: string; + platform_dispensary_id: string; + product_count: string; + last_product_update: string; + }>; + recentSnapshots: Array<{ + id: number; + dutchie_product_id: number; + product_name: string; + dispensary_name: string; + crawled_at: string; + }>; + }>('/api/dutchie-az/debug/summary'); + } + + async getDutchieAZDebugStore(id: number) { + return this.request<{ + dispensary: any; + productStats: { + total_products: string; + in_stock: string; + out_of_stock: string; + unknown: string; + missing_from_feed: string; + earliest_product: string; + latest_product: string; + last_update: string; + }; + snapshotStats: { + total_snapshots: string; + earliest_snapshot: string; + latest_snapshot: string; + products_with_snapshots: string; + }; + recentJobs: any[]; + sampleProducts: { + inStock: any[]; + outOfStock: any[]; + }; + categories: Array<{ type: string; subcategory: string; count: string }>; + }>(`/api/dutchie-az/debug/store/${id}`); + } + + async triggerDutchieAZCrawl(id: number, options?: { pricingType?: string; useBothModes?: boolean }) { + return this.request(`/api/dutchie-az/admin/crawl/${id}`, { + method: 'POST', + body: JSON.stringify(options || {}), + }); + } } export const api = new ApiClient(API_URL); diff --git a/frontend/src/pages/ApiPermissions.tsx b/frontend/src/pages/ApiPermissions.tsx index a4e4670d..7a43de10 100644 --- a/frontend/src/pages/ApiPermissions.tsx +++ b/frontend/src/pages/ApiPermissions.tsx @@ -12,23 +12,23 @@ interface ApiPermission { is_active: number; created_at: string; last_used_at: string | null; - store_id: number | null; - store_name: string | null; + dispensary_id: number | null; + dispensary_name: string | null; } -interface Store { +interface Dispensary { id: number; name: string; } export function ApiPermissions() { const [permissions, setPermissions] = useState([]); - const [stores, setStores] = useState([]); + const [dispensaries, setDispensaries] = useState([]); const [loading, setLoading] = useState(true); const [showAddForm, setShowAddForm] = useState(false); const [newPermission, setNewPermission] = useState({ user_name: '', - store_id: '', + dispensary_id: '', allowed_ips: '', allowed_domains: '', }); @@ -36,15 +36,15 @@ export function ApiPermissions() { useEffect(() => { loadPermissions(); - loadStores(); + loadDispensaries(); }, []); - const loadStores = async () => { + const loadDispensaries = async () => { try { - const data = await api.getApiPermissionStores(); - setStores(data.stores); + const data = await api.getApiPermissionDispensaries(); + setDispensaries(data.dispensaries); } catch (error: any) { - console.error('Failed to load stores:', error); + console.error('Failed to load dispensaries:', error); } }; @@ -68,18 +68,18 @@ export function ApiPermissions() { return; } - if (!newPermission.store_id) { - setNotification({ message: 'Store is required', type: 'error' }); + if (!newPermission.dispensary_id) { + setNotification({ message: 'Dispensary is required', type: 'error' }); return; } try { const result = await api.createApiPermission({ ...newPermission, - store_id: parseInt(newPermission.store_id), + dispensary_id: parseInt(newPermission.dispensary_id), }); setNotification({ message: result.message, type: 'success' }); - setNewPermission({ user_name: '', store_id: '', allowed_ips: '', allowed_domains: '' }); + setNewPermission({ user_name: '', dispensary_id: '', allowed_ips: '', allowed_domains: '' }); setShowAddForm(false); loadPermissions(); } catch (error: any) { @@ -182,22 +182,22 @@ export function ApiPermissions() {
-

The store this API token can access

+

The dispensary this API token can access

@@ -261,7 +261,7 @@ export function ApiPermissions() { User Name - Store + Dispensary API Key @@ -290,7 +290,7 @@ export function ApiPermissions() {
{perm.user_name}
-
{perm.store_name || No store}
+
{perm.dispensary_name || No dispensary}
diff --git a/frontend/src/pages/DutchieAZSchedule.tsx b/frontend/src/pages/DutchieAZSchedule.tsx new file mode 100644 index 00000000..8b3726ad --- /dev/null +++ b/frontend/src/pages/DutchieAZSchedule.tsx @@ -0,0 +1,697 @@ +import { useEffect, useState } from 'react'; +import { Layout } from '../components/Layout'; +import { api } from '../lib/api'; + +interface JobSchedule { + id: number; + jobName: string; + description: string | null; + enabled: boolean; + baseIntervalMinutes: number; + jitterMinutes: number; + lastRunAt: string | null; + lastStatus: string | null; + lastErrorMessage: string | null; + lastDurationMs: number | null; + nextRunAt: string | null; + jobConfig: Record | null; + createdAt: string; + updatedAt: string; +} + +interface RunLog { + id: number; + schedule_id: number; + job_name: string; + status: string; + started_at: string | null; + completed_at: string | null; + duration_ms: number | null; + error_message: string | null; + items_processed: number | null; + items_succeeded: number | null; + items_failed: number | null; + metadata: any; + created_at: string; +} + +export function DutchieAZSchedule() { + const [schedules, setSchedules] = useState([]); + const [runLogs, setRunLogs] = useState([]); + const [schedulerStatus, setSchedulerStatus] = useState<{ running: boolean; pollIntervalMs: number } | null>(null); + const [loading, setLoading] = useState(true); + const [autoRefresh, setAutoRefresh] = useState(true); + const [activeTab, setActiveTab] = useState<'schedules' | 'logs'>('schedules'); + const [editingSchedule, setEditingSchedule] = useState(null); + const [showCreateModal, setShowCreateModal] = useState(false); + + useEffect(() => { + loadData(); + + if (autoRefresh) { + const interval = setInterval(loadData, 10000); + return () => clearInterval(interval); + } + }, [autoRefresh]); + + const loadData = async () => { + try { + const [schedulesData, logsData, statusData] = await Promise.all([ + api.getDutchieAZSchedules(), + api.getDutchieAZRunLogs({ limit: 50 }), + api.getDutchieAZSchedulerStatus(), + ]); + + setSchedules(schedulesData.schedules || []); + setRunLogs(logsData.logs || []); + setSchedulerStatus(statusData); + } catch (error) { + console.error('Failed to load schedule data:', error); + } finally { + setLoading(false); + } + }; + + const handleToggleScheduler = async () => { + try { + if (schedulerStatus?.running) { + await api.stopDutchieAZScheduler(); + } else { + await api.startDutchieAZScheduler(); + } + await loadData(); + } catch (error) { + console.error('Failed to toggle scheduler:', error); + } + }; + + const handleInitSchedules = async () => { + try { + await api.initDutchieAZSchedules(); + await loadData(); + } catch (error) { + console.error('Failed to initialize schedules:', error); + } + }; + + const handleTriggerSchedule = async (id: number) => { + try { + await api.triggerDutchieAZSchedule(id); + await loadData(); + } catch (error) { + console.error('Failed to trigger schedule:', error); + } + }; + + const handleToggleEnabled = async (schedule: JobSchedule) => { + try { + await api.updateDutchieAZSchedule(schedule.id, { enabled: !schedule.enabled }); + await loadData(); + } catch (error) { + console.error('Failed to toggle schedule:', error); + } + }; + + const handleUpdateSchedule = async (id: number, updates: Partial) => { + try { + await api.updateDutchieAZSchedule(id, updates); + setEditingSchedule(null); + await loadData(); + } catch (error) { + console.error('Failed to update schedule:', error); + } + }; + + const handleDeleteSchedule = async (id: number) => { + if (!confirm('Are you sure you want to delete this schedule?')) return; + try { + await api.deleteDutchieAZSchedule(id); + await loadData(); + } catch (error) { + console.error('Failed to delete schedule:', error); + } + }; + + const formatTimeAgo = (dateString: string | null) => { + if (!dateString) return 'Never'; + const date = new Date(dateString); + const now = new Date(); + const diffMs = now.getTime() - date.getTime(); + const diffMins = Math.floor(diffMs / 60000); + const diffHours = Math.floor(diffMins / 60); + const diffDays = Math.floor(diffHours / 24); + + if (diffMins < 1) return 'Just now'; + if (diffMins < 60) return `${diffMins}m ago`; + if (diffHours < 24) return `${diffHours}h ago`; + return `${diffDays}d ago`; + }; + + const formatTimeUntil = (dateString: string | null) => { + if (!dateString) return 'Not scheduled'; + const date = new Date(dateString); + const now = new Date(); + const diffMs = date.getTime() - now.getTime(); + + if (diffMs < 0) return 'Overdue'; + + const diffMins = Math.floor(diffMs / 60000); + const diffHours = Math.floor(diffMins / 60); + + if (diffMins < 60) return `${diffMins}m`; + return `${diffHours}h ${diffMins % 60}m`; + }; + + const formatDuration = (ms: number | null) => { + if (!ms) return '-'; + if (ms < 1000) return `${ms}ms`; + const seconds = Math.floor(ms / 1000); + const minutes = Math.floor(seconds / 60); + if (minutes < 1) return `${seconds}s`; + return `${minutes}m ${seconds % 60}s`; + }; + + const formatInterval = (baseMinutes: number, jitterMinutes: number) => { + const hours = Math.floor(baseMinutes / 60); + const mins = baseMinutes % 60; + const jitterHours = Math.floor(jitterMinutes / 60); + const jitterMins = jitterMinutes % 60; + + let base = hours > 0 ? `${hours}h` : ''; + if (mins > 0) base += `${mins}m`; + + let jitter = jitterHours > 0 ? `${jitterHours}h` : ''; + if (jitterMins > 0) jitter += `${jitterMins}m`; + + return `${base} +/- ${jitter}`; + }; + + const getStatusColor = (status: string | null) => { + switch (status) { + case 'success': return { bg: '#d1fae5', color: '#065f46' }; + case 'running': return { bg: '#dbeafe', color: '#1e40af' }; + case 'error': return { bg: '#fee2e2', color: '#991b1b' }; + case 'partial': return { bg: '#fef3c7', color: '#92400e' }; + default: return { bg: '#f3f4f6', color: '#374151' }; + } + }; + + return ( + +
+
+
+

Dutchie AZ Schedule

+

+ Jittered scheduling for Arizona Dutchie product crawls +

+
+
+ +
+
+ + {/* Scheduler Status Card */} +
+
+
+
Scheduler Status
+
+ + + {schedulerStatus?.running ? 'Running' : 'Stopped'} + +
+
+
+
Poll Interval
+
+ {schedulerStatus ? `${schedulerStatus.pollIntervalMs / 1000}s` : '-'} +
+
+
+
Active Schedules
+
+ {schedules.filter(s => s.enabled).length} / {schedules.length} +
+
+
+
+ + {schedules.length === 0 && ( + + )} +
+
+ + {/* Tabs */} +
+ + +
+ + {activeTab === 'schedules' && ( +
+ {schedules.length === 0 ? ( +
+ No schedules configured. Click "Initialize Default Schedules" to create the default crawl schedule. +
+ ) : ( + + + + + + + + + + + + + + {schedules.map((schedule) => ( + + + + + + + + + + ))} + +
Job NameEnabledInterval (Jitter)Last RunNext RunLast StatusActions
+
{schedule.jobName}
+ {schedule.description && ( +
+ {schedule.description} +
+ )} + {schedule.jobConfig && ( +
+ Config: {JSON.stringify(schedule.jobConfig)} +
+ )} +
+ + +
+ {formatInterval(schedule.baseIntervalMinutes, schedule.jitterMinutes)} +
+
+
{formatTimeAgo(schedule.lastRunAt)}
+ {schedule.lastDurationMs && ( +
+ Duration: {formatDuration(schedule.lastDurationMs)} +
+ )} +
+
+ {formatTimeUntil(schedule.nextRunAt)} +
+ {schedule.nextRunAt && ( +
+ {new Date(schedule.nextRunAt).toLocaleString()} +
+ )} +
+ {schedule.lastStatus ? ( +
+ + {schedule.lastStatus} + + {schedule.lastErrorMessage && ( + + )} +
+ ) : ( + Never run + )} +
+
+ + +
+
+ )} +
+ )} + + {activeTab === 'logs' && ( +
+ {runLogs.length === 0 ? ( +
+ No run logs yet. Logs will appear here after jobs execute. +
+ ) : ( + + + + + + + + + + + + + + {runLogs.map((log) => ( + + + + + + + + + + ))} + +
JobStatusStartedDurationProcessedSucceededFailed
+
{log.job_name}
+
Run #{log.id}
+
+ + {log.status} + + {log.error_message && ( + + )} + +
{log.started_at ? new Date(log.started_at).toLocaleString() : '-'}
+
{formatTimeAgo(log.started_at)}
+
+ {formatDuration(log.duration_ms)} + + {log.items_processed ?? '-'} + + {log.items_succeeded ?? '-'} + + {log.items_failed ?? '-'} +
+ )} +
+ )} + + {/* Edit Modal */} + {editingSchedule && ( +
+
+

Edit Schedule: {editingSchedule.jobName}

+ +
+ + setEditingSchedule({ ...editingSchedule, description: e.target.value })} + style={{ + width: '100%', + padding: '10px', + borderRadius: '6px', + border: '1px solid #ddd', + fontSize: '14px' + }} + /> +
+ +
+
+ + setEditingSchedule({ ...editingSchedule, baseIntervalMinutes: parseInt(e.target.value) || 240 })} + style={{ + width: '100%', + padding: '10px', + borderRadius: '6px', + border: '1px solid #ddd', + fontSize: '14px' + }} + /> +
+ = {Math.floor(editingSchedule.baseIntervalMinutes / 60)}h {editingSchedule.baseIntervalMinutes % 60}m +
+
+
+ + setEditingSchedule({ ...editingSchedule, jitterMinutes: parseInt(e.target.value) || 30 })} + style={{ + width: '100%', + padding: '10px', + borderRadius: '6px', + border: '1px solid #ddd', + fontSize: '14px' + }} + /> +
+ +/- {editingSchedule.jitterMinutes}m random offset +
+
+
+ +
+ Effective range: {Math.floor((editingSchedule.baseIntervalMinutes - editingSchedule.jitterMinutes) / 60)}h {(editingSchedule.baseIntervalMinutes - editingSchedule.jitterMinutes) % 60}m + {' to '} + {Math.floor((editingSchedule.baseIntervalMinutes + editingSchedule.jitterMinutes) / 60)}h {(editingSchedule.baseIntervalMinutes + editingSchedule.jitterMinutes) % 60}m +
+ +
+ + +
+
+
+ )} +
+
+ ); +} diff --git a/frontend/src/pages/DutchieAZStoreDetail.tsx b/frontend/src/pages/DutchieAZStoreDetail.tsx new file mode 100644 index 00000000..22ce0d2d --- /dev/null +++ b/frontend/src/pages/DutchieAZStoreDetail.tsx @@ -0,0 +1,620 @@ +import { useEffect, useState } from 'react'; +import { useParams, useNavigate } from 'react-router-dom'; +import { Layout } from '../components/Layout'; +import { api } from '../lib/api'; +import { + Building2, + Phone, + MapPin, + ExternalLink, + ArrowLeft, + Package, + Tag, + RefreshCw, + ChevronDown, + Clock, + CheckCircle, + XCircle, + AlertCircle +} from 'lucide-react'; + +export function DutchieAZStoreDetail() { + const { id } = useParams(); + const navigate = useNavigate(); + const [summary, setSummary] = useState(null); + const [products, setProducts] = useState([]); + const [loading, setLoading] = useState(true); + const [productsLoading, setProductsLoading] = useState(false); + const [activeTab, setActiveTab] = useState<'products' | 'brands' | 'categories'>('products'); + const [showUpdateDropdown, setShowUpdateDropdown] = useState(false); + const [isUpdating, setIsUpdating] = useState(false); + const [searchQuery, setSearchQuery] = useState(''); + const [currentPage, setCurrentPage] = useState(1); + const [totalProducts, setTotalProducts] = useState(0); + const [itemsPerPage] = useState(25); + const [stockFilter, setStockFilter] = useState(''); + + const formatDate = (dateStr: string) => { + if (!dateStr) return 'Never'; + const date = new Date(dateStr); + const now = new Date(); + const diffMs = now.getTime() - date.getTime(); + const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24)); + + if (diffDays === 0) return 'Today'; + if (diffDays === 1) return 'Yesterday'; + if (diffDays < 7) return `${diffDays} days ago`; + return date.toLocaleDateString(); + }; + + useEffect(() => { + if (id) { + loadStoreSummary(); + } + }, [id]); + + useEffect(() => { + if (id && activeTab === 'products') { + loadProducts(); + } + }, [id, currentPage, searchQuery, stockFilter, activeTab]); + + // Reset to page 1 when filters change + useEffect(() => { + setCurrentPage(1); + }, [searchQuery, stockFilter]); + + const loadStoreSummary = async () => { + setLoading(true); + try { + const data = await api.getDutchieAZStoreSummary(parseInt(id!, 10)); + setSummary(data); + } catch (error) { + console.error('Failed to load store summary:', error); + } finally { + setLoading(false); + } + }; + + const loadProducts = async () => { + if (!id) return; + setProductsLoading(true); + try { + const data = await api.getDutchieAZStoreProducts(parseInt(id, 10), { + search: searchQuery || undefined, + stockStatus: stockFilter || undefined, + limit: itemsPerPage, + offset: (currentPage - 1) * itemsPerPage, + }); + setProducts(data.products); + setTotalProducts(data.total); + } catch (error) { + console.error('Failed to load products:', error); + } finally { + setProductsLoading(false); + } + }; + + const handleCrawl = async () => { + setShowUpdateDropdown(false); + setIsUpdating(true); + try { + await api.triggerDutchieAZCrawl(parseInt(id!, 10)); + alert('Crawl started! Refresh the page in a few minutes to see updated data.'); + } catch (error) { + console.error('Failed to trigger crawl:', error); + alert('Failed to start crawl. Please try again.'); + } finally { + setIsUpdating(false); + } + }; + + const totalPages = Math.ceil(totalProducts / itemsPerPage); + + if (loading) { + return ( + +
+
+

Loading store...

+
+
+ ); + } + + if (!summary) { + return ( + +
+

Store not found

+
+
+ ); + } + + const { dispensary, brands, categories, lastCrawl } = summary; + + return ( + +
+ {/* Header */} +
+ + + {/* Update Button */} +
+ + + {showUpdateDropdown && !isUpdating && ( +
+ +
+ )} +
+
+ + {/* Store Header */} +
+
+
+
+ +
+
+

+ {dispensary.dba_name || dispensary.name} +

+ {dispensary.company_name && ( +

{dispensary.company_name}

+ )} +

+ Platform ID: {dispensary.platform_dispensary_id || 'Not resolved'} +

+
+
+
+ +
+ Last Crawl: + + {lastCrawl?.completed_at + ? new Date(lastCrawl.completed_at).toLocaleDateString('en-US', { + year: 'numeric', + month: 'short', + day: 'numeric', + hour: '2-digit', + minute: '2-digit' + }) + : 'Never'} + + {lastCrawl?.status && ( + + {lastCrawl.status} + + )} +
+
+
+
+ {dispensary.address && ( +
+ + + {dispensary.address}, {dispensary.city}, {dispensary.state} {dispensary.zip} + +
+ )} + {dispensary.phone && ( +
+ + {dispensary.phone} +
+ )} + {dispensary.website && ( + + + Website + + )} +
+
+ + {/* Dashboard Metrics */} +
+ + + + + + + + + +
+ + {/* Content Tabs */} +
+
+
+ + + +
+
+ +
+ {activeTab === 'products' && ( +
+ {/* Search and Filter */} +
+ setSearchQuery(e.target.value)} + className="input input-bordered input-sm flex-1" + /> + + {(searchQuery || stockFilter) && ( + + )} +
+ {totalProducts} products +
+
+ + {productsLoading ? ( +
+
+

Loading products...

+
+ ) : products.length === 0 ? ( +

No products found

+ ) : ( + <> +
+ + + + + + + + + + + + + + + + {products.map((product) => ( + + + + + + + + + + + + ))} + +
ImageProduct NameBrandTypePriceTHC %StockQtyLast Updated
+ {product.image_url ? ( + {product.name} e.currentTarget.style.display = 'none'} + /> + ) : '-'} + +
{product.name}
+
+
{product.brand || '-'}
+
+ {product.type || '-'} + {product.subcategory && ( + {product.subcategory} + )} + + {product.sale_price ? ( +
+ ${product.sale_price} + ${product.regular_price} +
+ ) : product.regular_price ? ( + `$${product.regular_price}` + ) : '-'} +
+ {product.thc_percentage ? ( + {product.thc_percentage}% + ) : '-'} + + {product.stock_status === 'in_stock' ? ( + In Stock + ) : product.stock_status === 'out_of_stock' ? ( + Out + ) : ( + Unknown + )} + + {product.total_quantity != null ? product.total_quantity : '-'} + + {product.updated_at ? formatDate(product.updated_at) : '-'} +
+
+ + {/* Pagination */} + {totalPages > 1 && ( +
+ + +
+ {Array.from({ length: Math.min(5, totalPages) }, (_, i) => { + let page: number; + if (totalPages <= 5) { + page = i + 1; + } else if (currentPage <= 3) { + page = i + 1; + } else if (currentPage >= totalPages - 2) { + page = totalPages - 4 + i; + } else { + page = currentPage - 2 + i; + } + return ( + + ); + })} +
+ + +
+ )} + + )} +
+ )} + + {activeTab === 'brands' && ( +
+ {brands.length === 0 ? ( +

No brands found

+ ) : ( +
+ {brands.map((brand: any) => ( + + ))} +
+ )} +
+ )} + + {activeTab === 'categories' && ( +
+ {categories.length === 0 ? ( +

No categories found

+ ) : ( +
+ {categories.map((cat: any, idx: number) => ( +
+

{cat.type}

+ {cat.subcategory && ( +

{cat.subcategory}

+ )} +

+ {cat.product_count} product{cat.product_count !== 1 ? 's' : ''} +

+
+ ))} +
+ )} +
+ )} +
+
+
+
+ ); +} diff --git a/frontend/src/pages/DutchieAZStores.tsx b/frontend/src/pages/DutchieAZStores.tsx new file mode 100644 index 00000000..1efe29ed --- /dev/null +++ b/frontend/src/pages/DutchieAZStores.tsx @@ -0,0 +1,194 @@ +import { useEffect, useState } from 'react'; +import { useNavigate } from 'react-router-dom'; +import { Layout } from '../components/Layout'; +import { api } from '../lib/api'; +import { + Building2, + MapPin, + Package, + RefreshCw, + CheckCircle, + XCircle +} from 'lucide-react'; + +export function DutchieAZStores() { + const navigate = useNavigate(); + const [stores, setStores] = useState([]); + const [loading, setLoading] = useState(true); + const [dashboard, setDashboard] = useState(null); + + useEffect(() => { + loadData(); + }, []); + + const loadData = async () => { + setLoading(true); + try { + const [storesData, dashboardData] = await Promise.all([ + api.getDutchieAZStores({ limit: 100 }), + api.getDutchieAZDashboard(), + ]); + setStores(storesData.stores); + setDashboard(dashboardData); + } catch (error) { + console.error('Failed to load data:', error); + } finally { + setLoading(false); + } + }; + + if (loading) { + return ( + +
+
+

Loading stores...

+
+
+ ); + } + + return ( + +
+ {/* Header */} +
+
+

Dutchie AZ Stores

+

+ Arizona dispensaries using the Dutchie platform - data from the new pipeline +

+
+ +
+ + {/* Dashboard Stats */} + {dashboard && ( +
+
+
+
+ +
+
+

Dispensaries

+

{dashboard.dispensaryCount}

+
+
+
+ +
+
+
+ +
+
+

Total Products

+

{dashboard.productCount.toLocaleString()}

+
+
+
+ +
+
+
+ +
+
+

Brands

+

{dashboard.brandCount}

+
+
+
+ +
+
+
+ +
+
+

Failed Jobs (24h)

+

{dashboard.failedJobCount}

+
+
+
+
+ )} + + {/* Stores List */} +
+
+

All Stores ({stores.length})

+
+
+ + + + + + + + + + + + {stores.map((store) => ( + + + + + + + + ))} + +
NameCityPlatform IDStatusActions
+
+
+ +
+
+

{store.dba_name || store.name}

+ {store.company_name && store.company_name !== store.name && ( +

{store.company_name}

+ )} +
+
+
+
+ + {store.city}, {store.state} +
+
+ {store.platform_dispensary_id ? ( + {store.platform_dispensary_id} + ) : ( + Not Resolved + )} + + {store.platform_dispensary_id ? ( + Ready + ) : ( + Pending + )} + + +
+
+
+
+
+ ); +}