From 2f483b308487d0ce2c13ed42d2bb8b8a4dc075ca Mon Sep 17 00:00:00 2001 From: Kelly Date: Tue, 9 Dec 2025 00:05:34 -0700 Subject: [PATCH] feat: SEO template library, discovery pipeline, and orchestrator enhancements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## SEO Template Library - Add complete template library with 7 page types (state, city, category, brand, product, search, regeneration) - Add Template Library tab in SEO Orchestrator with accordion-based editors - Add template preview, validation, and variable injection engine - Add API endpoints: /api/seo/templates, preview, validate, generate, regenerate ## Discovery Pipeline - Add promotion.ts for discovery location validation and promotion - Add discover-all-states.ts script for multi-state discovery - Add promotion log migration (067) - Enhance discovery routes and types ## Orchestrator & Admin - Add crawl_enabled filter to stores page - Add API permissions page - Add job queue management - Add price analytics routes - Add markets and intelligence routes - Enhance dashboard and worker monitoring ## Infrastructure - Add migrations for worker definitions, SEO settings, field alignment - Add canonical pipeline for scraper v2 - Update hydration and sync orchestrator - Enhance multi-state query service πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- CLAUDE.md | 106 +- backend/migrations/051_worker_definitions.sql | 119 ++ backend/migrations/052_seo_settings.sql | 49 + .../066_dutchie_field_alignment.sql | 140 +++ backend/migrations/067_promotion_log.sql | 24 + .../migrations/068_crawler_status_alerts.sql | 95 ++ backend/migrations/069_six_stage_status.sql | 163 +++ backend/migrations/070_product_variants.sql | 239 ++++ .../071_harmonize_store_products.sql | 53 + backend/migrations/072_product_views.sql | 74 ++ backend/src/discovery/discovery-crawler.ts | 63 +- backend/src/discovery/index.ts | 13 +- backend/src/discovery/location-discovery.ts | 199 +-- backend/src/discovery/promotion.ts | 579 +++++++++ backend/src/discovery/routes.ts | 170 ++- backend/src/discovery/types.ts | 39 + backend/src/hydration/canonical-upsert.ts | 237 +++- backend/src/hydration/incremental-sync.ts | 151 ++- backend/src/hydration/worker.ts | 3 +- backend/src/index.ts | 40 +- backend/src/multi-state/routes.ts | 24 +- .../src/multi-state/state-query-service.ts | 513 ++++++-- backend/src/platforms/dutchie/client.ts | 5 +- backend/src/routes/admin-debug.ts | 168 +++ backend/src/routes/analytics.ts | 12 +- backend/src/routes/api-permissions.ts | 2 +- backend/src/routes/campaigns.ts | 18 +- backend/src/routes/categories.ts | 8 +- backend/src/routes/click-analytics.ts | 20 +- backend/src/routes/dashboard.ts | 24 +- backend/src/routes/dispensaries.ts | 110 +- backend/src/routes/intelligence.ts | 254 ++++ backend/src/routes/job-queue.ts | 467 +++++++ backend/src/routes/markets.ts | 667 ++++++++++ backend/src/routes/orchestrator-admin.ts | 474 ++++++- backend/src/routes/pipeline.ts | 1084 +++++++++++++++++ backend/src/routes/price-analytics.ts | 472 +++++++ backend/src/routes/products.ts | 249 +++- backend/src/routes/public-api.ts | 196 +-- backend/src/routes/scraper-monitor.ts | 6 +- backend/src/routes/seo.ts | 341 +++++- backend/src/routes/settings.ts | 54 + backend/src/routes/stores.ts | 104 +- backend/src/routes/workers.ts | 424 ++++++- backend/src/scraper-v2/canonical-pipeline.ts | 353 ++++++ backend/src/scraper-v2/engine.ts | 4 + backend/src/scraper-v2/index.ts | 5 + backend/src/scripts/discover-all-states.ts | 385 ++++++ backend/src/scripts/estimate-bandwidth.ts | 173 +++ backend/src/scripts/retry-platform-ids.ts | 137 +++ backend/src/scripts/run-discovery.ts | 22 +- .../src/scripts/test-crawl-to-canonical.ts | 271 +++++ backend/src/seo/settings.ts | 521 ++++++++ backend/src/seo/template-engine.ts | 369 ++++++ backend/src/services/LegalStateService.ts | 6 +- .../src/system/services/sync-orchestrator.ts | 70 +- cannaiq/dist/index.html | 2 +- cannaiq/src/App.tsx | 11 + cannaiq/src/components/Layout.tsx | 8 +- cannaiq/src/components/StateSelector.tsx | 15 +- .../src/components/StoreOrchestratorPanel.tsx | 43 +- cannaiq/src/lib/api.ts | 194 ++- cannaiq/src/pages/AISettings.tsx | 350 ++++++ cannaiq/src/pages/ApiPermissions.tsx | 564 ++++++--- cannaiq/src/pages/CrossStateCompare.tsx | 185 ++- cannaiq/src/pages/Dashboard.tsx | 126 +- cannaiq/src/pages/DiscoveryLocations.tsx | 279 +++++ cannaiq/src/pages/DispensaryDetail.tsx | 103 +- cannaiq/src/pages/JobQueue.tsx | 727 +++++++++++ cannaiq/src/pages/NationalDashboard.tsx | 79 +- cannaiq/src/pages/OrchestratorBrands.tsx | 324 ++++- cannaiq/src/pages/OrchestratorDashboard.tsx | 179 +-- cannaiq/src/pages/OrchestratorProducts.tsx | 583 ++++++++- cannaiq/src/pages/OrchestratorStores.tsx | 176 ++- cannaiq/src/pages/PriceCompare.tsx | 407 +++++++ cannaiq/src/pages/ProductDetail.tsx | 299 ++++- cannaiq/src/pages/Specials.tsx | 379 ++++++ cannaiq/src/pages/StateDetail.tsx | 383 ++++++ cannaiq/src/pages/StateHeatmap.tsx | 8 +- cannaiq/src/pages/WorkersDashboard.tsx | 523 +++++++- .../src/pages/admin/seo/SeoOrchestrator.tsx | 787 +++++++++++- cannaiq/vite.config.ts | 4 +- docs/DUTCHIE_CRAWL_WORKFLOW.md | 671 ++++++++++ 83 files changed, 16700 insertions(+), 1277 deletions(-) create mode 100644 backend/migrations/051_worker_definitions.sql create mode 100644 backend/migrations/052_seo_settings.sql create mode 100644 backend/migrations/066_dutchie_field_alignment.sql create mode 100644 backend/migrations/067_promotion_log.sql create mode 100644 backend/migrations/068_crawler_status_alerts.sql create mode 100644 backend/migrations/069_six_stage_status.sql create mode 100644 backend/migrations/070_product_variants.sql create mode 100644 backend/migrations/071_harmonize_store_products.sql create mode 100644 backend/migrations/072_product_views.sql create mode 100644 backend/src/discovery/promotion.ts create mode 100644 backend/src/routes/admin-debug.ts create mode 100644 backend/src/routes/intelligence.ts create mode 100644 backend/src/routes/job-queue.ts create mode 100644 backend/src/routes/markets.ts create mode 100644 backend/src/routes/pipeline.ts create mode 100644 backend/src/routes/price-analytics.ts create mode 100644 backend/src/scraper-v2/canonical-pipeline.ts create mode 100644 backend/src/scripts/discover-all-states.ts create mode 100644 backend/src/scripts/estimate-bandwidth.ts create mode 100644 backend/src/scripts/retry-platform-ids.ts create mode 100644 backend/src/scripts/test-crawl-to-canonical.ts create mode 100644 backend/src/seo/settings.ts create mode 100644 backend/src/seo/template-engine.ts create mode 100644 cannaiq/src/pages/AISettings.tsx create mode 100644 cannaiq/src/pages/DiscoveryLocations.tsx create mode 100644 cannaiq/src/pages/JobQueue.tsx create mode 100644 cannaiq/src/pages/PriceCompare.tsx create mode 100644 cannaiq/src/pages/Specials.tsx create mode 100644 cannaiq/src/pages/StateDetail.tsx create mode 100644 docs/DUTCHIE_CRAWL_WORKFLOW.md diff --git a/CLAUDE.md b/CLAUDE.md index b88ec83e..c890fb3b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -489,12 +489,78 @@ import { saveImage, getImageUrl } from '../utils/storage-adapter'; ## UI ANONYMIZATION RULES -- No vendor names in forward-facing URLs: use `/api/az/...`, `/az`, `/az-schedule` +- No vendor names in forward-facing URLs - No "dutchie", "treez", "jane", "weedmaps", "leafly" visible in consumer UIs - Internal admin tools may show provider names for debugging --- +## DUTCHIE DISCOVERY PIPELINE (Added 2025-01) + +### Overview +Automated discovery of Dutchie-powered dispensaries across all US states. + +### Flow +``` +1. getAllCitiesByState GraphQL β†’ Get all cities for a state +2. ConsumerDispensaries GraphQL β†’ Get stores for each city +3. Upsert to dutchie_discovery_locations (keyed by platform_location_id) +4. AUTO-VALIDATE: Check required fields +5. AUTO-PROMOTE: Create/update dispensaries with crawl_enabled=true +6. Log all actions to dutchie_promotion_log +``` + +### Tables +| Table | Purpose | +|-------|---------| +| `dutchie_discovery_cities` | Cities known to have dispensaries | +| `dutchie_discovery_locations` | Raw discovered store data | +| `dispensaries` | Canonical stores (promoted from discovery) | +| `dutchie_promotion_log` | Audit trail for validation/promotion | + +### Files +| File | Purpose | +|------|---------| +| `src/discovery/discovery-crawler.ts` | Main orchestrator | +| `src/discovery/location-discovery.ts` | GraphQL fetching | +| `src/discovery/promotion.ts` | Validation & promotion logic | +| `src/scripts/run-discovery.ts` | CLI interface | +| `migrations/067_promotion_log.sql` | Audit log table | + +### GraphQL Hashes (in `src/platforms/dutchie/client.ts`) +| Query | Hash | +|-------|------| +| `GetAllCitiesByState` | `ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6` | +| `ConsumerDispensaries` | `0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b` | + +### Usage +```bash +# Discover all stores in a state +npx tsx src/scripts/run-discovery.ts discover:state AZ +npx tsx src/scripts/run-discovery.ts discover:state CA + +# Check stats +npx tsx src/scripts/run-discovery.ts stats +``` + +### Validation Rules +A discovery location must have: +- `platform_location_id` (MongoDB ObjectId, 24 hex chars) +- `name` +- `city` +- `state_code` +- `platform_menu_url` + +Invalid records are marked `status='rejected'` with errors logged. + +### Key Design Decisions +- `platform_location_id` MUST be MongoDB ObjectId (not slug) +- Old geo-based discovery stored slugs β†’ deleted as garbage data +- Rate limit: 2 seconds between city requests to avoid API throttling +- Promotion is idempotent via `ON CONFLICT (platform_dispensary_id)` + +--- + ## FUTURE TODO / PENDING FEATURES - [ ] Orchestrator observability dashboard @@ -639,16 +705,19 @@ export default defineConfig({ - **DB**: Use the single CannaiQ database via `CANNAIQ_DB_*` env vars. No hardcoded names. - **Images**: No MinIO. Save to local /images/products//-.webp (and brands); preserve original URL; serve via backend static. -- **Dutchie GraphQL**: Endpoint https://dutchie.com/api-3/graphql. Variables must use productsFilter.dispensaryId (platform_dispensary_id). Mode A: Status="Active". Mode B: Status=null/activeOnly:false. +- **Dutchie GraphQL**: Endpoint https://dutchie.com/api-3/graphql. Variables must use productsFilter.dispensaryId (platform_dispensary_id). **CRITICAL: Use `Status: 'Active'`, NOT `null`** (null returns 0 products). - **cName/slug**: Derive cName from each store's menu_url (/embedded-menu/ or /dispensary/). No hardcoded defaults. -- **Dual-mode always**: useBothModes:true to get pricing (Mode A) + full coverage (Mode B). - **Batch DB writes**: Chunk products/snapshots/missing (100–200) to avoid OOM. -- **OOS/missing**: Include inactive/OOS in Mode B. Union A+B, dedupe by external_product_id+dispensary_id. -- **API/Frontend**: Use /api/az/... endpoints (stores/products/brands/categories/summary/dashboard). +- **API/Frontend**: Use `/api/stores`, `/api/products`, `/api/workers`, `/api/pipeline` endpoints. - **Scheduling**: Crawl only menu_type='dutchie' AND platform_dispensary_id IS NOT NULL. 4-hour crawl with jitter. -- **Monitor**: /scraper-monitor (and /az-schedule) should show active/recent jobs from job_run_logs/crawl_jobs. +- **THC/CBD values**: Clamp to ≀100 - some products report milligrams as percentages. +- **Column names**: Use `name_raw`, `brand_name_raw`, `category_raw`, `subcategory_raw` (NOT `name`, `brand_name`, etc.) + +- **Monitor**: `/api/workers` shows active/recent jobs from job queue. - **No slug guessing**: Never use defaults. Always derive per store from menu_url and resolve platform IDs per location. +**πŸ“– Full Documentation: See `docs/DUTCHIE_CRAWL_WORKFLOW.md` for complete pipeline documentation.** + --- ### Detailed Rules @@ -691,7 +760,7 @@ export default defineConfig({ - Use dutchie GraphQL pipeline only for `menu_type='dutchie'`. 6) **Frontend** - - Forward-facing URLs: `/api/az`, `/az`, `/az-schedule`; no vendor names. + - Forward-facing URLs should not contain vendor names. - `/scraper-schedule`: add filters/search, keep as master view for all schedules; reflect platform ID/menu_type status and controls. 7) **No slug guessing** @@ -740,18 +809,21 @@ export default defineConfig({ 16) **API Route Semantics** - **Route Groups:** - - `/api/admin/...` = Admin/operator actions (crawl triggers, health checks) - - `/api/az/...` = Arizona data slice (stores, products, metrics) + **Route Groups (as registered in `src/index.ts`):** + - `/api/stores` = Store/dispensary CRUD and listing + - `/api/products` = Product listing and details + - `/api/workers` = Job queue monitoring (replaces legacy `/api/dutchie-az/...`) + - `/api/pipeline` = Crawl pipeline triggers + - `/api/admin/orchestrator` = Orchestrator admin actions + - `/api/discovery` = Platform discovery (Dutchie, etc.) - `/api/v1/...` = Public API for external consumers (WordPress, etc.) - **Crawl Trigger (CANONICAL):** - ``` - POST /api/admin/crawl/:dispensaryId - ``` + **Crawl Trigger:** + Check `/api/pipeline` or `/api/admin/orchestrator` routes for crawl triggers. + The legacy `POST /api/admin/crawl/:dispensaryId` does NOT exist. 17) **Monitoring and logging** - - /scraper-monitor (and /az-schedule) should show active/recent jobs from job_run_logs/crawl_jobs + - `/api/workers` shows active/recent jobs from job queue - Auto-refresh every 30 seconds - System Logs page should show real log data, not just startup messages @@ -783,8 +855,8 @@ export default defineConfig({ - **Job schedules** (managed in `job_schedules` table): - `dutchie_az_menu_detection`: Runs daily with 60-min jitter - `dutchie_az_product_crawl`: Runs every 4 hours with 30-min jitter - - **Trigger schedules**: `curl -X POST /api/az/admin/schedules/{id}/trigger` - - **Check schedule status**: `curl /api/az/admin/schedules` + - **Monitor jobs**: `GET /api/workers` + - **Trigger crawls**: Check `/api/pipeline` routes 21) **Frontend Architecture - AVOID OVER-ENGINEERING** diff --git a/backend/migrations/051_worker_definitions.sql b/backend/migrations/051_worker_definitions.sql new file mode 100644 index 00000000..e1dea87a --- /dev/null +++ b/backend/migrations/051_worker_definitions.sql @@ -0,0 +1,119 @@ +-- Migration 051: Worker Definitions +-- Creates a dedicated workers table for named workers with roles and assignments + +-- Workers table - defines named workers with roles +CREATE TABLE IF NOT EXISTS workers ( + id SERIAL PRIMARY KEY, + name VARCHAR(100) NOT NULL UNIQUE, + role VARCHAR(100) NOT NULL, + description TEXT, + enabled BOOLEAN DEFAULT TRUE, + + -- Schedule configuration (for dedicated crawl workers) + schedule_type VARCHAR(50) DEFAULT 'interval', -- 'interval', 'cron', 'manual' + interval_minutes INTEGER DEFAULT 240, + cron_expression VARCHAR(100), -- e.g., '0 */4 * * *' + jitter_minutes INTEGER DEFAULT 30, + + -- Assignment scope + assignment_type VARCHAR(50) DEFAULT 'all', -- 'all', 'state', 'dispensary', 'chain' + assigned_state_codes TEXT[], -- e.g., ['AZ', 'CA'] + assigned_dispensary_ids INTEGER[], + assigned_chain_ids INTEGER[], + + -- Job configuration + job_type VARCHAR(50) NOT NULL DEFAULT 'dutchie_product_crawl', + job_config JSONB DEFAULT '{}', + priority INTEGER DEFAULT 0, + max_concurrent INTEGER DEFAULT 1, + + -- Status tracking + status VARCHAR(50) DEFAULT 'idle', -- 'idle', 'running', 'paused', 'error' + last_run_at TIMESTAMPTZ, + last_status VARCHAR(50), + last_error TEXT, + last_duration_ms INTEGER, + next_run_at TIMESTAMPTZ, + current_job_id INTEGER, + + -- Metrics + total_runs INTEGER DEFAULT 0, + successful_runs INTEGER DEFAULT 0, + failed_runs INTEGER DEFAULT 0, + avg_duration_ms INTEGER, + + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW() +); + +-- Worker run history +CREATE TABLE IF NOT EXISTS worker_runs ( + id SERIAL PRIMARY KEY, + worker_id INTEGER NOT NULL REFERENCES workers(id) ON DELETE CASCADE, + started_at TIMESTAMPTZ DEFAULT NOW(), + completed_at TIMESTAMPTZ, + status VARCHAR(50) DEFAULT 'running', -- 'running', 'success', 'error', 'cancelled' + duration_ms INTEGER, + + -- What was processed + jobs_created INTEGER DEFAULT 0, + jobs_completed INTEGER DEFAULT 0, + jobs_failed INTEGER DEFAULT 0, + dispensaries_crawled INTEGER DEFAULT 0, + products_found INTEGER DEFAULT 0, + + error_message TEXT, + metadata JSONB DEFAULT '{}', + + created_at TIMESTAMPTZ DEFAULT NOW() +); + +-- Index for efficient lookups +CREATE INDEX IF NOT EXISTS idx_workers_enabled ON workers(enabled) WHERE enabled = TRUE; +CREATE INDEX IF NOT EXISTS idx_workers_next_run ON workers(next_run_at) WHERE enabled = TRUE; +CREATE INDEX IF NOT EXISTS idx_workers_status ON workers(status); +CREATE INDEX IF NOT EXISTS idx_worker_runs_worker_id ON worker_runs(worker_id); +CREATE INDEX IF NOT EXISTS idx_worker_runs_started_at ON worker_runs(started_at DESC); + +-- Add worker_id to dispensary_crawl_jobs if not exists +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'dispensary_crawl_jobs' AND column_name = 'assigned_worker_id' + ) THEN + ALTER TABLE dispensary_crawl_jobs ADD COLUMN assigned_worker_id INTEGER REFERENCES workers(id); + END IF; +END $$; + +-- Migrate existing job_schedules workers to new workers table +INSERT INTO workers (name, role, description, enabled, interval_minutes, jitter_minutes, job_type, job_config, last_run_at, last_status, last_error, last_duration_ms, next_run_at) +SELECT + worker_name, + worker_role, + description, + enabled, + base_interval_minutes, + jitter_minutes, + job_name, + job_config, + last_run_at, + last_status, + last_error_message, + last_duration_ms, + next_run_at +FROM job_schedules +WHERE worker_name IS NOT NULL +ON CONFLICT (name) DO UPDATE SET + updated_at = NOW(); + +-- Available worker roles (reference) +COMMENT ON TABLE workers IS 'Named workers with specific roles and assignments. Roles include: +- product_sync: Crawls products from dispensary menus +- store_discovery: Discovers new dispensary locations +- entry_point_finder: Detects menu providers and resolves platform IDs +- analytics_refresh: Refreshes materialized views and analytics +- price_monitor: Monitors price changes and triggers alerts +- inventory_sync: Syncs inventory levels +- image_processor: Downloads and processes product images +- data_validator: Validates data integrity'; diff --git a/backend/migrations/052_seo_settings.sql b/backend/migrations/052_seo_settings.sql new file mode 100644 index 00000000..d3b375d1 --- /dev/null +++ b/backend/migrations/052_seo_settings.sql @@ -0,0 +1,49 @@ +-- Migration 052: SEO Settings Table +-- Key/value store for SEO Orchestrator configuration + +CREATE TABLE IF NOT EXISTS seo_settings ( + id SERIAL PRIMARY KEY, + key TEXT UNIQUE NOT NULL, + value JSONB NOT NULL, + created_at TIMESTAMP DEFAULT NOW(), + updated_at TIMESTAMP DEFAULT NOW() +); + +-- Create index on key for fast lookups +CREATE INDEX IF NOT EXISTS idx_seo_settings_key ON seo_settings(key); + +-- Seed with default settings +INSERT INTO seo_settings (key, value) VALUES + -- Section 1: Global Content Generation Settings + ('primary_prompt_template', '"You are a cannabis industry content expert. Generate SEO-optimized content for {{page_type}} pages about {{subject}}. Focus on: {{focus_areas}}. Maintain a {{tone}} tone and keep content {{length}}."'), + ('regeneration_prompt_template', '"Regenerate the following SEO content with fresh perspectives. Original topic: {{subject}}. Improve upon: {{improvement_areas}}. Maintain compliance with cannabis industry standards."'), + ('default_content_length', '"medium"'), + ('tone_voice', '"informational"'), + + -- Section 2: Automatic Refresh Rules + ('auto_refresh_interval', '"weekly"'), + ('trigger_pct_product_change', 'true'), + ('trigger_pct_brand_change', 'true'), + ('trigger_new_stores', 'true'), + ('trigger_market_shift', 'false'), + ('webhook_url', '""'), + ('notify_on_trigger', 'false'), + + -- Section 3: Page-Level Defaults + ('default_title_template', '"{{state_name}} Dispensaries | Find Cannabis Near You | CannaiQ"'), + ('default_meta_description_template', '"Discover the best dispensaries in {{state_name}}. Browse {{dispensary_count}}+ licensed retailers, compare prices, and find cannabis products near you."'), + ('default_slug_template', '"dispensaries-{{state_code_lower}}"'), + ('default_og_image_template', '"/images/seo/og-{{state_code_lower}}.jpg"'), + ('enable_ai_images', 'false'), + + -- Section 4: Crawl / Dataset Configuration + ('primary_data_provider', '"cannaiq"'), + ('fallback_data_provider', '"dutchie"'), + ('min_data_freshness_hours', '24'), + ('stale_data_behavior', '"allow_with_warning"') +ON CONFLICT (key) DO NOTHING; + +-- Record migration +INSERT INTO schema_migrations (version, name, applied_at) +VALUES ('052', 'seo_settings', NOW()) +ON CONFLICT (version) DO NOTHING; diff --git a/backend/migrations/066_dutchie_field_alignment.sql b/backend/migrations/066_dutchie_field_alignment.sql new file mode 100644 index 00000000..da40109a --- /dev/null +++ b/backend/migrations/066_dutchie_field_alignment.sql @@ -0,0 +1,140 @@ +-- Migration 066: Align dispensaries and discovery_locations tables with Dutchie field names +-- Uses snake_case convention (Postgres standard) mapped from Dutchie's camelCase +-- +-- Changes: +-- 1. dispensaries: rename addressβ†’address1, zipβ†’zipcode, remove company_name +-- 2. dispensaries: add missing Dutchie fields +-- 3. dutchie_discovery_locations: add missing Dutchie fields + +-- ============================================================================ +-- DISPENSARIES TABLE +-- ============================================================================ + +-- Rename address to address1 (matches Dutchie's address1) +ALTER TABLE dispensaries RENAME COLUMN address TO address1; + +-- Rename zip to zipcode (matches Dutchie's zip, but we use zipcode for clarity) +ALTER TABLE dispensaries RENAME COLUMN zip TO zipcode; + +-- Drop company_name (redundant with name) +ALTER TABLE dispensaries DROP COLUMN IF EXISTS company_name; + +-- Add address2 +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS address2 VARCHAR(255); + +-- Add country +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS country VARCHAR(100) DEFAULT 'United States'; + +-- Add timezone +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS timezone VARCHAR(50); + +-- Add email +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS email VARCHAR(255); + +-- Add description +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS description TEXT; + +-- Add logo_image (Dutchie: logoImage) +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS logo_image TEXT; + +-- Add banner_image (Dutchie: bannerImage) +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS banner_image TEXT; + +-- Add offer_pickup (Dutchie: offerPickup) +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_pickup BOOLEAN DEFAULT TRUE; + +-- Add offer_delivery (Dutchie: offerDelivery) +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_delivery BOOLEAN DEFAULT FALSE; + +-- Add offer_curbside_pickup (Dutchie: offerCurbsidePickup) +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_curbside_pickup BOOLEAN DEFAULT FALSE; + +-- Add is_medical (Dutchie: isMedical) +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS is_medical BOOLEAN DEFAULT FALSE; + +-- Add is_recreational (Dutchie: isRecreational) +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS is_recreational BOOLEAN DEFAULT FALSE; + +-- Add chain_slug (Dutchie: chain) +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS chain_slug VARCHAR(255); + +-- Add enterprise_id (Dutchie: retailer.enterpriseId) +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS enterprise_id VARCHAR(100); + +-- Add status (Dutchie: status - open/closed) +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS status VARCHAR(50); + +-- Add c_name (Dutchie: cName - the URL slug used in embedded menus) +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS c_name VARCHAR(255); + +-- ============================================================================ +-- DUTCHIE_DISCOVERY_LOCATIONS TABLE +-- ============================================================================ + +-- Add phone +ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS phone VARCHAR(50); + +-- Add website (Dutchie: embedBackUrl) +ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS website TEXT; + +-- Add email +ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS email VARCHAR(255); + +-- Add description +ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS description TEXT; + +-- Add logo_image +ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS logo_image TEXT; + +-- Add banner_image +ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS banner_image TEXT; + +-- Add chain_slug +ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS chain_slug VARCHAR(255); + +-- Add enterprise_id +ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS enterprise_id VARCHAR(100); + +-- Add c_name +ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS c_name VARCHAR(255); + +-- Add country +ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS country VARCHAR(100) DEFAULT 'United States'; + +-- Add store status +ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS store_status VARCHAR(50); + +-- ============================================================================ +-- INDEXES +-- ============================================================================ + +-- Index for chain lookups +CREATE INDEX IF NOT EXISTS idx_dispensaries_chain_slug ON dispensaries(chain_slug) WHERE chain_slug IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_discovery_locations_chain_slug ON dutchie_discovery_locations(chain_slug) WHERE chain_slug IS NOT NULL; + +-- Index for enterprise lookups (for multi-location chains) +CREATE INDEX IF NOT EXISTS idx_dispensaries_enterprise_id ON dispensaries(enterprise_id) WHERE enterprise_id IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_discovery_locations_enterprise_id ON dutchie_discovery_locations(enterprise_id) WHERE enterprise_id IS NOT NULL; + +-- Index for c_name lookups +CREATE INDEX IF NOT EXISTS idx_dispensaries_c_name ON dispensaries(c_name) WHERE c_name IS NOT NULL; + +-- ============================================================================ +-- COMMENTS +-- ============================================================================ + +COMMENT ON COLUMN dispensaries.address1 IS 'Street address line 1 (Dutchie: address1)'; +COMMENT ON COLUMN dispensaries.address2 IS 'Street address line 2 (Dutchie: address2)'; +COMMENT ON COLUMN dispensaries.zipcode IS 'ZIP/postal code (Dutchie: zip)'; +COMMENT ON COLUMN dispensaries.c_name IS 'Dutchie URL slug for embedded menus (Dutchie: cName)'; +COMMENT ON COLUMN dispensaries.chain_slug IS 'Chain identifier slug (Dutchie: chain)'; +COMMENT ON COLUMN dispensaries.enterprise_id IS 'Parent enterprise UUID (Dutchie: retailer.enterpriseId)'; +COMMENT ON COLUMN dispensaries.logo_image IS 'Logo image URL (Dutchie: logoImage)'; +COMMENT ON COLUMN dispensaries.banner_image IS 'Banner image URL (Dutchie: bannerImage)'; +COMMENT ON COLUMN dispensaries.offer_pickup IS 'Offers in-store pickup (Dutchie: offerPickup)'; +COMMENT ON COLUMN dispensaries.offer_delivery IS 'Offers delivery (Dutchie: offerDelivery)'; +COMMENT ON COLUMN dispensaries.offer_curbside_pickup IS 'Offers curbside pickup (Dutchie: offerCurbsidePickup)'; +COMMENT ON COLUMN dispensaries.is_medical IS 'Licensed for medical sales (Dutchie: isMedical)'; +COMMENT ON COLUMN dispensaries.is_recreational IS 'Licensed for recreational sales (Dutchie: isRecreational)'; + +SELECT 'Migration 066 completed: Dutchie field alignment' as status; diff --git a/backend/migrations/067_promotion_log.sql b/backend/migrations/067_promotion_log.sql new file mode 100644 index 00000000..8938e810 --- /dev/null +++ b/backend/migrations/067_promotion_log.sql @@ -0,0 +1,24 @@ +-- Promotion log table for tracking discovery β†’ dispensary promotions +-- Tracks validation and promotion actions for audit/review + +CREATE TABLE IF NOT EXISTS dutchie_promotion_log ( + id SERIAL PRIMARY KEY, + discovery_id INTEGER REFERENCES dutchie_discovery_locations(id) ON DELETE SET NULL, + dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE SET NULL, + action VARCHAR(50) NOT NULL, -- 'validated', 'rejected', 'promoted_create', 'promoted_update', 'skipped' + state_code VARCHAR(10), + store_name VARCHAR(255), + validation_errors TEXT[], -- Array of error messages if rejected + field_changes JSONB, -- Before/after snapshot of changed fields + triggered_by VARCHAR(100) DEFAULT 'auto', -- 'auto', 'manual', 'api' + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +); + +-- Indexes for efficient querying +CREATE INDEX IF NOT EXISTS idx_promotion_log_discovery_id ON dutchie_promotion_log(discovery_id); +CREATE INDEX IF NOT EXISTS idx_promotion_log_dispensary_id ON dutchie_promotion_log(dispensary_id); +CREATE INDEX IF NOT EXISTS idx_promotion_log_action ON dutchie_promotion_log(action); +CREATE INDEX IF NOT EXISTS idx_promotion_log_state_code ON dutchie_promotion_log(state_code); +CREATE INDEX IF NOT EXISTS idx_promotion_log_created_at ON dutchie_promotion_log(created_at DESC); + +COMMENT ON TABLE dutchie_promotion_log IS 'Audit log for discovery location validation and promotion to dispensaries'; diff --git a/backend/migrations/068_crawler_status_alerts.sql b/backend/migrations/068_crawler_status_alerts.sql new file mode 100644 index 00000000..87df48b2 --- /dev/null +++ b/backend/migrations/068_crawler_status_alerts.sql @@ -0,0 +1,95 @@ +-- Migration 068: Crawler Status Alerts +-- Creates status_alerts table for dashboard notifications and status change logging + +-- ============================================================ +-- STATUS ALERTS TABLE +-- ============================================================ + +CREATE TABLE IF NOT EXISTS crawler_status_alerts ( + id SERIAL PRIMARY KEY, + + -- References + dispensary_id INTEGER REFERENCES dispensaries(id), + profile_id INTEGER REFERENCES dispensary_crawler_profiles(id), + + -- Alert info + alert_type VARCHAR(50) NOT NULL, -- 'status_change', 'crawl_error', 'validation_failed', 'promoted', 'demoted' + severity VARCHAR(20) DEFAULT 'info', -- 'info', 'warning', 'error', 'critical' + + -- Status transition + previous_status VARCHAR(50), + new_status VARCHAR(50), + + -- Context + message TEXT, + error_details JSONB, + metadata JSONB, -- Additional context (product counts, error codes, etc.) + + -- Tracking + acknowledged BOOLEAN DEFAULT FALSE, + acknowledged_at TIMESTAMP WITH TIME ZONE, + acknowledged_by VARCHAR(100), + + -- Timestamps + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +); + +-- Indexes for common queries +CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_dispensary ON crawler_status_alerts(dispensary_id); +CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_type ON crawler_status_alerts(alert_type); +CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_severity ON crawler_status_alerts(severity); +CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_unack ON crawler_status_alerts(acknowledged) WHERE acknowledged = FALSE; +CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_created ON crawler_status_alerts(created_at DESC); + +-- ============================================================ +-- STATUS DEFINITIONS (for reference/validation) +-- ============================================================ + +COMMENT ON TABLE crawler_status_alerts IS 'Crawler status change notifications for dashboard alerting'; +COMMENT ON COLUMN crawler_status_alerts.alert_type IS 'Type: status_change, crawl_error, validation_failed, promoted, demoted'; +COMMENT ON COLUMN crawler_status_alerts.severity IS 'Severity: info, warning, error, critical'; +COMMENT ON COLUMN crawler_status_alerts.previous_status IS 'Previous crawler status before change'; +COMMENT ON COLUMN crawler_status_alerts.new_status IS 'New crawler status after change'; + +-- ============================================================ +-- STATUS TRACKING ON PROFILES +-- ============================================================ + +-- Add columns for status tracking if not exists +DO $$ +BEGIN + -- Consecutive success count for auto-promotion + IF NOT EXISTS (SELECT 1 FROM information_schema.columns + WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'consecutive_successes') THEN + ALTER TABLE dispensary_crawler_profiles ADD COLUMN consecutive_successes INTEGER DEFAULT 0; + END IF; + + -- Consecutive failure count for auto-demotion + IF NOT EXISTS (SELECT 1 FROM information_schema.columns + WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'consecutive_failures') THEN + ALTER TABLE dispensary_crawler_profiles ADD COLUMN consecutive_failures INTEGER DEFAULT 0; + END IF; + + -- Last status change timestamp + IF NOT EXISTS (SELECT 1 FROM information_schema.columns + WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'status_changed_at') THEN + ALTER TABLE dispensary_crawler_profiles ADD COLUMN status_changed_at TIMESTAMP WITH TIME ZONE; + END IF; + + -- Status change reason + IF NOT EXISTS (SELECT 1 FROM information_schema.columns + WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'status_reason') THEN + ALTER TABLE dispensary_crawler_profiles ADD COLUMN status_reason TEXT; + END IF; +END $$; + +-- ============================================================ +-- VALID STATUS VALUES +-- ============================================================ +-- Status values for dispensary_crawler_profiles.status: +-- 'sandbox' - Newly created, being validated +-- 'production' - Healthy, actively crawled +-- 'needs_manual' - Requires human intervention +-- 'failing' - Multiple consecutive failures +-- 'disabled' - Manually disabled +-- 'legacy' - No profile, uses default method (virtual status) diff --git a/backend/migrations/069_six_stage_status.sql b/backend/migrations/069_six_stage_status.sql new file mode 100644 index 00000000..0a0e02da --- /dev/null +++ b/backend/migrations/069_six_stage_status.sql @@ -0,0 +1,163 @@ +-- Migration 069: Seven-Stage Status System +-- +-- Implements explicit 7-stage pipeline for store lifecycle: +-- 1. discovered - Found via Dutchie API, raw data +-- 2. validated - Passed field checks, ready for promotion +-- 3. promoted - In dispensaries table, has crawler profile +-- 4. sandbox - First crawl attempted, testing +-- 5. hydrating - Products are being loaded/updated +-- 6. production - Healthy, scheduled crawls via Horizon +-- 7. failing - Crawl errors, needs attention + +-- ============================================================ +-- STAGE ENUM TYPE +-- ============================================================ + +DO $$ +BEGIN + -- Create enum if not exists + IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'store_stage') THEN + CREATE TYPE store_stage AS ENUM ( + 'discovered', + 'validated', + 'promoted', + 'sandbox', + 'hydrating', + 'production', + 'failing' + ); + END IF; +END $$; + +-- ============================================================ +-- UPDATE DISCOVERY LOCATIONS TABLE +-- ============================================================ + +-- Add stage column to discovery locations (replaces status) +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM information_schema.columns + WHERE table_name = 'dutchie_discovery_locations' AND column_name = 'stage') THEN + ALTER TABLE dutchie_discovery_locations ADD COLUMN stage VARCHAR(20) DEFAULT 'discovered'; + END IF; +END $$; + +-- Migrate existing status values to stage +UPDATE dutchie_discovery_locations +SET stage = CASE + WHEN status = 'discovered' THEN 'discovered' + WHEN status = 'verified' THEN 'validated' + WHEN status = 'rejected' THEN 'failing' + WHEN status = 'merged' THEN 'validated' + ELSE 'discovered' +END +WHERE stage IS NULL OR stage = ''; + +-- ============================================================ +-- UPDATE CRAWLER PROFILES TABLE +-- ============================================================ + +-- Ensure status column exists and update to new values +UPDATE dispensary_crawler_profiles +SET status = CASE + WHEN status = 'sandbox' THEN 'sandbox' + WHEN status = 'production' THEN 'production' + WHEN status = 'needs_manual' THEN 'failing' + WHEN status = 'failing' THEN 'failing' + WHEN status = 'disabled' THEN 'failing' + WHEN status IS NULL THEN 'promoted' + ELSE 'promoted' +END; + +-- ============================================================ +-- ADD STAGE TRACKING TO DISPENSARIES +-- ============================================================ + +DO $$ +BEGIN + -- Add stage column to dispensaries for quick filtering + IF NOT EXISTS (SELECT 1 FROM information_schema.columns + WHERE table_name = 'dispensaries' AND column_name = 'stage') THEN + ALTER TABLE dispensaries ADD COLUMN stage VARCHAR(20) DEFAULT 'promoted'; + END IF; + + -- Add stage_changed_at for tracking + IF NOT EXISTS (SELECT 1 FROM information_schema.columns + WHERE table_name = 'dispensaries' AND column_name = 'stage_changed_at') THEN + ALTER TABLE dispensaries ADD COLUMN stage_changed_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP; + END IF; + + -- Add first_crawl_at to track sandbox β†’ production transition + IF NOT EXISTS (SELECT 1 FROM information_schema.columns + WHERE table_name = 'dispensaries' AND column_name = 'first_crawl_at') THEN + ALTER TABLE dispensaries ADD COLUMN first_crawl_at TIMESTAMP WITH TIME ZONE; + END IF; + + -- Add last_successful_crawl_at + IF NOT EXISTS (SELECT 1 FROM information_schema.columns + WHERE table_name = 'dispensaries' AND column_name = 'last_successful_crawl_at') THEN + ALTER TABLE dispensaries ADD COLUMN last_successful_crawl_at TIMESTAMP WITH TIME ZONE; + END IF; +END $$; + +-- Set initial stage for existing dispensaries based on their crawler profile status +UPDATE dispensaries d +SET stage = COALESCE( + (SELECT dcp.status FROM dispensary_crawler_profiles dcp + WHERE dcp.dispensary_id = d.id AND dcp.enabled = true + ORDER BY dcp.updated_at DESC LIMIT 1), + 'promoted' +) +WHERE d.stage IS NULL OR d.stage = ''; + +-- ============================================================ +-- INDEXES FOR STAGE-BASED QUERIES +-- ============================================================ + +CREATE INDEX IF NOT EXISTS idx_dispensaries_stage ON dispensaries(stage); +CREATE INDEX IF NOT EXISTS idx_dispensaries_stage_state ON dispensaries(stage, state); +CREATE INDEX IF NOT EXISTS idx_discovery_locations_stage ON dutchie_discovery_locations(stage); +CREATE INDEX IF NOT EXISTS idx_crawler_profiles_status ON dispensary_crawler_profiles(status); + +-- ============================================================ +-- STAGE TRANSITION LOG +-- ============================================================ + +CREATE TABLE IF NOT EXISTS stage_transitions ( + id SERIAL PRIMARY KEY, + + -- What changed + entity_type VARCHAR(20) NOT NULL, -- 'discovery_location' or 'dispensary' + entity_id INTEGER NOT NULL, + + -- Stage change + from_stage VARCHAR(20), + to_stage VARCHAR(20) NOT NULL, + + -- Context + trigger_type VARCHAR(50) NOT NULL, -- 'api', 'scheduler', 'manual', 'auto' + trigger_endpoint VARCHAR(200), + + -- Outcome + success BOOLEAN DEFAULT TRUE, + error_message TEXT, + metadata JSONB, + + -- Timing + duration_ms INTEGER, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +); + +CREATE INDEX IF NOT EXISTS idx_stage_transitions_entity ON stage_transitions(entity_type, entity_id); +CREATE INDEX IF NOT EXISTS idx_stage_transitions_to_stage ON stage_transitions(to_stage); +CREATE INDEX IF NOT EXISTS idx_stage_transitions_created ON stage_transitions(created_at DESC); + +-- ============================================================ +-- COMMENTS +-- ============================================================ + +COMMENT ON TABLE stage_transitions IS 'Audit log for all stage transitions in the pipeline'; +COMMENT ON COLUMN dispensaries.stage IS 'Current pipeline stage: discovered, validated, promoted, sandbox, production, failing'; +COMMENT ON COLUMN dispensaries.stage_changed_at IS 'When the stage was last changed'; +COMMENT ON COLUMN dispensaries.first_crawl_at IS 'When the first crawl was attempted (sandbox stage)'; +COMMENT ON COLUMN dispensaries.last_successful_crawl_at IS 'When the last successful crawl completed'; diff --git a/backend/migrations/070_product_variants.sql b/backend/migrations/070_product_variants.sql new file mode 100644 index 00000000..f560c24e --- /dev/null +++ b/backend/migrations/070_product_variants.sql @@ -0,0 +1,239 @@ +-- ============================================================================ +-- Migration 070: Product Variants Tables +-- ============================================================================ +-- +-- Purpose: Store variant-level pricing and inventory as first-class entities +-- to enable time-series analytics, price comparisons, and sale tracking. +-- +-- Enables queries like: +-- - Price history for a specific variant (1g Blue Dream over time) +-- - Sale frequency analysis (how often is this on special?) +-- - Cross-store price comparison (who has cheapest 1g flower?) +-- - Current specials across all stores +-- +-- RULES: +-- - STRICTLY ADDITIVE (no DROP, DELETE, TRUNCATE) +-- - All new tables use IF NOT EXISTS +-- - All indexes use IF NOT EXISTS +-- +-- ============================================================================ + +-- ============================================================================ +-- SECTION 1: PRODUCT_VARIANTS TABLE (Current State) +-- ============================================================================ +-- One row per product+option combination. Tracks current pricing/inventory. + +CREATE TABLE IF NOT EXISTS product_variants ( + id SERIAL PRIMARY KEY, + store_product_id INTEGER NOT NULL REFERENCES store_products(id) ON DELETE CASCADE, + dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE, + + -- Variant identity (from Dutchie POSMetaData.children) + option VARCHAR(100) NOT NULL, -- "1g", "3.5g", "1/8oz", "100mg" + canonical_sku VARCHAR(100), -- Dutchie canonicalSKU + canonical_id VARCHAR(100), -- Dutchie canonicalID + canonical_name VARCHAR(500), -- Dutchie canonicalName + + -- Current pricing (in dollars, not cents) + price_rec NUMERIC(10,2), + price_med NUMERIC(10,2), + price_rec_special NUMERIC(10,2), + price_med_special NUMERIC(10,2), + + -- Current inventory + quantity INTEGER, + quantity_available INTEGER, + in_stock BOOLEAN DEFAULT TRUE, + + -- Special/sale status + is_on_special BOOLEAN DEFAULT FALSE, + + -- Weight/size parsing (for analytics) + weight_value NUMERIC(10,2), -- 1, 3.5, 28, etc. + weight_unit VARCHAR(20), -- g, oz, mg, ml, etc. + + -- Timestamps + first_seen_at TIMESTAMPTZ DEFAULT NOW(), + last_seen_at TIMESTAMPTZ DEFAULT NOW(), + last_price_change_at TIMESTAMPTZ, + last_stock_change_at TIMESTAMPTZ, + + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + + UNIQUE(store_product_id, option) +); + +-- Indexes for common queries +CREATE INDEX IF NOT EXISTS idx_variants_store_product ON product_variants(store_product_id); +CREATE INDEX IF NOT EXISTS idx_variants_dispensary ON product_variants(dispensary_id); +CREATE INDEX IF NOT EXISTS idx_variants_option ON product_variants(option); +CREATE INDEX IF NOT EXISTS idx_variants_in_stock ON product_variants(dispensary_id, in_stock) WHERE in_stock = TRUE; +CREATE INDEX IF NOT EXISTS idx_variants_on_special ON product_variants(dispensary_id, is_on_special) WHERE is_on_special = TRUE; +CREATE INDEX IF NOT EXISTS idx_variants_canonical_sku ON product_variants(canonical_sku) WHERE canonical_sku IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_variants_price_rec ON product_variants(price_rec) WHERE price_rec IS NOT NULL; + +COMMENT ON TABLE product_variants IS 'Current state of each product variant (weight/size option). One row per product+option.'; +COMMENT ON COLUMN product_variants.option IS 'Weight/size option string from Dutchie (e.g., "1g", "3.5g", "1/8oz")'; +COMMENT ON COLUMN product_variants.canonical_sku IS 'Dutchie POS SKU for cross-store matching'; + + +-- ============================================================================ +-- SECTION 2: PRODUCT_VARIANT_SNAPSHOTS TABLE (Historical Data) +-- ============================================================================ +-- Time-series data for variant pricing. One row per variant per crawl. +-- CRITICAL: NEVER DELETE from this table. + +CREATE TABLE IF NOT EXISTS product_variant_snapshots ( + id SERIAL PRIMARY KEY, + product_variant_id INTEGER NOT NULL REFERENCES product_variants(id) ON DELETE CASCADE, + store_product_id INTEGER REFERENCES store_products(id) ON DELETE SET NULL, + dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE, + crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL, + + -- Variant identity (denormalized for query performance) + option VARCHAR(100) NOT NULL, + + -- Pricing at time of capture + price_rec NUMERIC(10,2), + price_med NUMERIC(10,2), + price_rec_special NUMERIC(10,2), + price_med_special NUMERIC(10,2), + + -- Inventory at time of capture + quantity INTEGER, + in_stock BOOLEAN DEFAULT TRUE, + + -- Special status at time of capture + is_on_special BOOLEAN DEFAULT FALSE, + + -- Feed presence (FALSE = variant missing from crawl) + is_present_in_feed BOOLEAN DEFAULT TRUE, + + -- Capture timestamp + captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + + created_at TIMESTAMPTZ DEFAULT NOW() +); + +-- Indexes for time-series queries +CREATE INDEX IF NOT EXISTS idx_variant_snapshots_variant ON product_variant_snapshots(product_variant_id, captured_at DESC); +CREATE INDEX IF NOT EXISTS idx_variant_snapshots_dispensary ON product_variant_snapshots(dispensary_id, captured_at DESC); +CREATE INDEX IF NOT EXISTS idx_variant_snapshots_crawl ON product_variant_snapshots(crawl_run_id) WHERE crawl_run_id IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_variant_snapshots_captured ON product_variant_snapshots(captured_at DESC); +CREATE INDEX IF NOT EXISTS idx_variant_snapshots_special ON product_variant_snapshots(is_on_special, captured_at DESC) WHERE is_on_special = TRUE; +CREATE INDEX IF NOT EXISTS idx_variant_snapshots_option ON product_variant_snapshots(option, captured_at DESC); + +COMMENT ON TABLE product_variant_snapshots IS 'Historical variant pricing/inventory. One row per variant per crawl. NEVER DELETE.'; + + +-- ============================================================================ +-- SECTION 3: USEFUL VIEWS +-- ============================================================================ + +-- View: Current specials across all stores +CREATE OR REPLACE VIEW v_current_specials AS +SELECT + pv.id as variant_id, + sp.id as product_id, + sp.name_raw as product_name, + sp.brand_name_raw as brand_name, + sp.category_raw as category, + d.id as dispensary_id, + d.name as dispensary_name, + d.city, + d.state, + pv.option, + pv.price_rec, + pv.price_rec_special, + ROUND(((pv.price_rec - pv.price_rec_special) / NULLIF(pv.price_rec, 0)) * 100, 1) as discount_percent, + pv.quantity, + pv.in_stock, + pv.last_seen_at +FROM product_variants pv +JOIN store_products sp ON sp.id = pv.store_product_id +JOIN dispensaries d ON d.id = pv.dispensary_id +WHERE pv.is_on_special = TRUE + AND pv.in_stock = TRUE + AND pv.price_rec_special IS NOT NULL + AND pv.price_rec_special < pv.price_rec; + +COMMENT ON VIEW v_current_specials IS 'All products currently on special across all stores'; + + +-- View: Price comparison for a product across stores +CREATE OR REPLACE VIEW v_price_comparison AS +SELECT + sp.name_raw as product_name, + sp.brand_name_raw as brand_name, + sp.category_raw as category, + pv.option, + d.id as dispensary_id, + d.name as dispensary_name, + d.city, + pv.price_rec, + pv.price_rec_special, + pv.is_on_special, + pv.in_stock, + pv.quantity, + RANK() OVER (PARTITION BY sp.name_raw, pv.option ORDER BY COALESCE(pv.price_rec_special, pv.price_rec) ASC) as price_rank +FROM product_variants pv +JOIN store_products sp ON sp.id = pv.store_product_id +JOIN dispensaries d ON d.id = pv.dispensary_id +WHERE pv.in_stock = TRUE + AND (pv.price_rec IS NOT NULL OR pv.price_rec_special IS NOT NULL); + +COMMENT ON VIEW v_price_comparison IS 'Compare prices for same product across stores, ranked by price'; + + +-- View: Latest snapshot per variant +CREATE OR REPLACE VIEW v_latest_variant_snapshots AS +SELECT DISTINCT ON (product_variant_id) + pvs.* +FROM product_variant_snapshots pvs +ORDER BY product_variant_id, captured_at DESC; + + +-- ============================================================================ +-- SECTION 4: HELPER FUNCTION FOR SALE FREQUENCY +-- ============================================================================ + +-- Function to calculate sale frequency for a variant +CREATE OR REPLACE FUNCTION get_variant_sale_stats(p_variant_id INTEGER, p_days INTEGER DEFAULT 30) +RETURNS TABLE ( + total_snapshots BIGINT, + times_on_special BIGINT, + special_frequency_pct NUMERIC, + avg_discount_pct NUMERIC, + min_price NUMERIC, + max_price NUMERIC, + avg_price NUMERIC +) AS $$ +BEGIN + RETURN QUERY + SELECT + COUNT(*)::BIGINT as total_snapshots, + COUNT(*) FILTER (WHERE is_on_special)::BIGINT as times_on_special, + ROUND((COUNT(*) FILTER (WHERE is_on_special)::NUMERIC / NULLIF(COUNT(*), 0)) * 100, 1) as special_frequency_pct, + ROUND(AVG( + CASE WHEN is_on_special AND price_rec_special IS NOT NULL AND price_rec IS NOT NULL + THEN ((price_rec - price_rec_special) / NULLIF(price_rec, 0)) * 100 + END + ), 1) as avg_discount_pct, + MIN(COALESCE(price_rec_special, price_rec)) as min_price, + MAX(price_rec) as max_price, + ROUND(AVG(COALESCE(price_rec_special, price_rec)), 2) as avg_price + FROM product_variant_snapshots + WHERE product_variant_id = p_variant_id + AND captured_at >= NOW() - (p_days || ' days')::INTERVAL; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION get_variant_sale_stats IS 'Get sale frequency and price stats for a variant over N days'; + + +-- ============================================================================ +-- DONE +-- ============================================================================ + +SELECT 'Migration 070 completed. Product variants tables ready for time-series analytics.' AS status; diff --git a/backend/migrations/071_harmonize_store_products.sql b/backend/migrations/071_harmonize_store_products.sql new file mode 100644 index 00000000..da5a57af --- /dev/null +++ b/backend/migrations/071_harmonize_store_products.sql @@ -0,0 +1,53 @@ +-- Migration 071: Harmonize store_products with dutchie_products +-- Adds missing columns to store_products to consolidate on a single canonical table + +-- Product details +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS description TEXT; +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS weight VARCHAR(50); +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS weights JSONB; +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS measurements JSONB; + +-- Cannabinoid/terpene data +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS effects JSONB; +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS terpenes JSONB; +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cannabinoids_v2 JSONB; +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS thc_content NUMERIC(10,4); +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cbd_content NUMERIC(10,4); + +-- Images +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS images JSONB; +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS primary_image_url TEXT; + +-- Inventory +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS total_quantity_available INTEGER DEFAULT 0; + +-- Status/flags +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS status VARCHAR(50); +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS featured BOOLEAN DEFAULT FALSE; +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS coming_soon BOOLEAN DEFAULT FALSE; +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_lost BOOLEAN DEFAULT FALSE; +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_lost_at TIMESTAMP WITH TIME ZONE; +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_restored_at TIMESTAMP WITH TIME ZONE; + +-- Threshold flags (Dutchie-specific) +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS is_below_threshold BOOLEAN DEFAULT FALSE; +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS is_below_kiosk_threshold BOOLEAN DEFAULT FALSE; +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS options_below_threshold BOOLEAN DEFAULT FALSE; +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS options_below_kiosk_threshold BOOLEAN DEFAULT FALSE; +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS certificate_of_analysis_enabled BOOLEAN DEFAULT FALSE; + +-- Platform metadata +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS external_product_id VARCHAR(100); +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS c_name VARCHAR(500); +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS past_c_names TEXT[]; +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS latest_raw_payload JSONB; +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS created_at_platform TIMESTAMP WITH TIME ZONE; +ALTER TABLE store_products ADD COLUMN IF NOT EXISTS updated_at_platform TIMESTAMP WITH TIME ZONE; + +-- Indexes for common queries +CREATE INDEX IF NOT EXISTS idx_store_products_external_id ON store_products(external_product_id); +CREATE INDEX IF NOT EXISTS idx_store_products_visibility_lost ON store_products(visibility_lost) WHERE visibility_lost = TRUE; +CREATE INDEX IF NOT EXISTS idx_store_products_status ON store_products(status); + +-- Add comment +COMMENT ON TABLE store_products IS 'Canonical product table - consolidated from dutchie_products'; diff --git a/backend/migrations/072_product_views.sql b/backend/migrations/072_product_views.sql new file mode 100644 index 00000000..e8f0653d --- /dev/null +++ b/backend/migrations/072_product_views.sql @@ -0,0 +1,74 @@ +-- Migration 072: Create compatibility views for store_products and store_product_snapshots +-- These views provide backward-compatible column names for API routes + +-- v_products view - aliases store_products columns to match legacy dutchie_products naming +CREATE OR REPLACE VIEW v_products AS +SELECT + id, + dispensary_id, + provider_product_id as external_product_id, + provider_product_id as dutchie_id, + name_raw as name, + brand_name_raw as brand_name, + category_raw as type, + subcategory_raw as subcategory, + strain_type, + thc_percent as thc, + cbd_percent as cbd, + stock_status, + is_in_stock, + stock_quantity, + image_url, + primary_image_url, + images, + effects, + description, + is_on_special, + featured, + medical_only, + rec_only, + external_product_id as external_id, + provider, + created_at, + updated_at +FROM store_products; + +-- v_product_snapshots view - aliases store_product_snapshots columns to match legacy naming +CREATE OR REPLACE VIEW v_product_snapshots AS +SELECT + id, + store_product_id, + dispensary_id, + provider, + provider_product_id, + crawl_run_id, + captured_at as crawled_at, + name_raw, + brand_name_raw, + category_raw, + subcategory_raw, + -- Convert price_rec (dollars) to rec_min_price_cents (cents) + CASE WHEN price_rec IS NOT NULL THEN (price_rec * 100)::integer END as rec_min_price_cents, + CASE WHEN price_rec IS NOT NULL THEN (price_rec * 100)::integer END as rec_max_price_cents, + CASE WHEN price_rec_special IS NOT NULL THEN (price_rec_special * 100)::integer END as rec_min_special_price_cents, + CASE WHEN price_med IS NOT NULL THEN (price_med * 100)::integer END as med_min_price_cents, + CASE WHEN price_med IS NOT NULL THEN (price_med * 100)::integer END as med_max_price_cents, + CASE WHEN price_med_special IS NOT NULL THEN (price_med_special * 100)::integer END as med_min_special_price_cents, + is_on_special as special, + discount_percent, + is_in_stock, + stock_quantity, + stock_status, + stock_quantity as total_quantity_available, + thc_percent, + cbd_percent, + image_url, + raw_data as options, + created_at +FROM store_product_snapshots; + +-- Add indexes for the views' underlying tables +CREATE INDEX IF NOT EXISTS idx_store_products_dispensary ON store_products(dispensary_id); +CREATE INDEX IF NOT EXISTS idx_store_products_stock ON store_products(stock_status); +CREATE INDEX IF NOT EXISTS idx_store_snapshots_product ON store_product_snapshots(store_product_id); +CREATE INDEX IF NOT EXISTS idx_store_snapshots_captured ON store_product_snapshots(captured_at DESC); diff --git a/backend/src/discovery/discovery-crawler.ts b/backend/src/discovery/discovery-crawler.ts index 4f6f9576..117bc759 100644 --- a/backend/src/discovery/discovery-crawler.ts +++ b/backend/src/discovery/discovery-crawler.ts @@ -3,14 +3,23 @@ * * Main orchestrator for the Dutchie store discovery pipeline. * - * Flow: - * 1. Discover cities from Dutchie (or use seeded cities) - * 2. For each city, discover store locations - * 3. Upsert all data to discovery tables - * 4. Admin verifies locations manually - * 5. Verified locations are promoted to canonical dispensaries + * AUTOMATED FLOW (as of 2025-01): + * 1. Fetch cities dynamically from Dutchie GraphQL (getAllCitiesByState) + * 2. For each city, discover store locations via ConsumerDispensaries query + * 3. Upsert locations to dutchie_discovery_locations (keyed by platform_location_id) + * 4. AUTO-VALIDATE: Check required fields (name, city, state, platform_menu_url, platform_location_id) + * 5. AUTO-PROMOTE: Valid locations are upserted to dispensaries table with crawl_enabled=true + * 6. All actions logged to dutchie_promotion_log for audit * - * This module does NOT create canonical dispensaries automatically. + * Tables involved: + * - dutchie_discovery_cities: Known cities for each state + * - dutchie_discovery_locations: Raw discovered store data + * - dispensaries: Canonical store records (promoted from discovery) + * - dutchie_promotion_log: Audit trail for validation/promotion + * + * Usage: + * npx tsx src/scripts/run-discovery.ts discover:state AZ + * npx tsx src/scripts/run-discovery.ts discover:state CA */ import { Pool } from 'pg'; @@ -24,11 +33,12 @@ import { getCitiesToCrawl, getCityBySlug, seedKnownCities, - ARIZONA_CITIES, } from './city-discovery'; import { discoverLocationsForCity, + getCitiesForState, } from './location-discovery'; +import { promoteDiscoveredLocations } from './promotion'; // ============================================================ // FULL DISCOVERY @@ -162,6 +172,25 @@ export async function runFullDiscovery( console.log(`Errors: ${totalErrors}`); } + // Step 4: Auto-validate and promote discovered locations + if (!dryRun && totalLocationsUpserted > 0) { + console.log('\n[Discovery] Step 4: Auto-promoting discovered locations...'); + const promotionResult = await promoteDiscoveredLocations(stateCode, false); + console.log(`[Discovery] Promotion complete:`); + console.log(` Created: ${promotionResult.created} new dispensaries`); + console.log(` Updated: ${promotionResult.updated} existing dispensaries`); + console.log(` Rejected: ${promotionResult.rejected} (validation failed)`); + if (promotionResult.rejectedRecords.length > 0) { + console.log(` Rejection reasons:`); + promotionResult.rejectedRecords.slice(0, 5).forEach(r => { + console.log(` - ${r.name}: ${r.errors.join(', ')}`); + }); + if (promotionResult.rejectedRecords.length > 5) { + console.log(` ... and ${promotionResult.rejectedRecords.length - 5} more`); + } + } + } + return { cities: cityResult, locations: locationResults, @@ -235,11 +264,19 @@ export async function discoverState( console.log(`[Discovery] Discovering state: ${stateCode}`); - // Seed known cities for this state - if (stateCode === 'AZ') { - console.log('[Discovery] Seeding Arizona cities...'); - const seeded = await seedKnownCities(pool, ARIZONA_CITIES); - console.log(`[Discovery] Seeded ${seeded.created} new cities, ${seeded.updated} updated`); + // Dynamically fetch and seed cities for this state + console.log(`[Discovery] Fetching cities for ${stateCode} from Dutchie...`); + const cityNames = await getCitiesForState(stateCode); + if (cityNames.length > 0) { + const cities = cityNames.map(name => ({ + name, + slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''), + stateCode, + })); + const seeded = await seedKnownCities(pool, cities); + console.log(`[Discovery] Seeded ${seeded.created} new cities, ${seeded.updated} updated for ${stateCode}`); + } else { + console.log(`[Discovery] No cities found for ${stateCode}`); } // Run full discovery for this state diff --git a/backend/src/discovery/index.ts b/backend/src/discovery/index.ts index 6ab74bba..796e2226 100644 --- a/backend/src/discovery/index.ts +++ b/backend/src/discovery/index.ts @@ -13,7 +13,6 @@ export { getCitiesToCrawl, getCityBySlug, seedKnownCities, - ARIZONA_CITIES, } from './city-discovery'; // Location Discovery @@ -33,5 +32,17 @@ export { DiscoveryStats, } from './discovery-crawler'; +// Promotion +export { + validateForPromotion, + validateDiscoveredLocations, + promoteDiscoveredLocations, + promoteSingleLocation, + ValidationResult, + ValidationSummary, + PromotionResult, + PromotionSummary, +} from './promotion'; + // Routes export { createDiscoveryRoutes } from './routes'; diff --git a/backend/src/discovery/location-discovery.ts b/backend/src/discovery/location-discovery.ts index 8b69a5f2..cd551d56 100644 --- a/backend/src/discovery/location-discovery.ts +++ b/backend/src/discovery/location-discovery.ts @@ -134,10 +134,10 @@ export interface StateWithCities { } /** - * Fetch all states with their cities from Dutchie's __NEXT_DATA__ + * Fetch all states with their cities via direct GraphQL query * - * This fetches a city page and extracts the statesWithDispensaries data - * which contains all states and their cities where Dutchie has dispensaries. + * Uses the getAllCitiesByState persisted query which returns all states + * and cities where Dutchie has dispensaries. */ export async function fetchStatesWithDispensaries( options: { verbose?: boolean } = {} @@ -147,84 +147,53 @@ export async function fetchStatesWithDispensaries( // Initialize proxy if USE_PROXY=true await initDiscoveryProxy(); - console.log('[LocationDiscovery] Fetching statesWithDispensaries from Dutchie...'); + console.log('[LocationDiscovery] Fetching statesWithDispensaries via GraphQL...'); - // Fetch any city page to get the __NEXT_DATA__ with statesWithDispensaries - // Using a known city that's likely to exist - const result = await fetchPage('/dispensaries/az/phoenix', { maxRetries: 3 }); + try { + // Use direct GraphQL query - much cleaner than scraping __NEXT_DATA__ + const result = await executeGraphQL( + 'getAllCitiesByState', + {}, // No variables needed + GRAPHQL_HASHES.GetAllCitiesByState, + { maxRetries: 3, retryOn403: true } + ); - if (!result || result.status !== 200) { - console.error('[LocationDiscovery] Failed to fetch city page'); - return []; - } - - const nextData = extractNextData(result.html); - if (!nextData) { - console.error('[LocationDiscovery] No __NEXT_DATA__ found'); - return []; - } - - // Extract statesWithDispensaries from Apollo state - const apolloState = nextData.props?.pageProps?.initialApolloState; - if (!apolloState) { - console.error('[LocationDiscovery] No initialApolloState found'); - return []; - } - - // Find ROOT_QUERY.statesWithDispensaries - const rootQuery = apolloState['ROOT_QUERY']; - if (!rootQuery) { - console.error('[LocationDiscovery] No ROOT_QUERY found'); - return []; - } - - // The statesWithDispensaries is at ROOT_QUERY.statesWithDispensaries - const statesRefs = rootQuery.statesWithDispensaries; - if (!Array.isArray(statesRefs)) { - console.error('[LocationDiscovery] statesWithDispensaries not found or not an array'); - return []; - } - - // Resolve the references to actual state data - const states: StateWithCities[] = []; - for (const ref of statesRefs) { - // ref might be { __ref: "StateWithDispensaries:0" } or direct object - let stateData: any; - - if (ref && ref.__ref) { - stateData = apolloState[ref.__ref]; - } else { - stateData = ref; + const statesData = result?.data?.statesWithDispensaries; + if (!Array.isArray(statesData)) { + console.error('[LocationDiscovery] statesWithDispensaries not found in response'); + return []; } - if (stateData && stateData.name) { - // Parse cities JSON array if it's a string - let cities = stateData.cities; - if (typeof cities === 'string') { - try { - cities = JSON.parse(cities); - } catch { - cities = []; - } + // Map to our StateWithCities format + const states: StateWithCities[] = []; + for (const state of statesData) { + if (state && state.name) { + // Filter out null cities + const cities = Array.isArray(state.cities) + ? state.cities.filter((c: string | null) => c !== null) + : []; + + states.push({ + name: state.name, + country: state.country || 'US', + cities, + }); } - - states.push({ - name: stateData.name, - country: stateData.country || 'US', - cities: Array.isArray(cities) ? cities : [], - }); } - } - if (verbose) { - console.log(`[LocationDiscovery] Found ${states.length} states`); - for (const state of states) { - console.log(` ${state.name}: ${state.cities.length} cities`); + if (verbose) { + console.log(`[LocationDiscovery] Found ${states.length} states`); + for (const state of states) { + console.log(` ${state.name}: ${state.cities.length} cities`); + } } - } - console.log(`[LocationDiscovery] Loaded ${states.length} states with cities`); - return states; + console.log(`[LocationDiscovery] Loaded ${states.length} states with cities`); + return states; + } catch (error: any) { + console.error(`[LocationDiscovery] Failed to fetch states: ${error.message}`); + return []; + } } /** @@ -751,31 +720,57 @@ async function scrapeLocationCards( /** * Normalize a raw location response to a consistent format. + * Maps Dutchie camelCase fields to our snake_case equivalents. */ function normalizeLocationResponse(raw: any): DutchieLocationResponse { const slug = raw.slug || raw.cName || raw.urlSlug || ''; const id = raw.id || raw._id || raw.dispensaryId || ''; + // Extract location data - GraphQL response nests address info in .location + const loc = raw.location || {}; + + // Extract coordinates from geometry.coordinates [longitude, latitude] + const coords = loc.geometry?.coordinates || []; + const longitude = coords[0] || raw.longitude || raw.lng || loc.longitude || loc.lng; + const latitude = coords[1] || raw.latitude || raw.lat || loc.latitude || loc.lat; + return { id, name: raw.name || raw.dispensaryName || '', slug, - address: raw.address || raw.fullAddress || '', - address1: raw.address1 || raw.addressLine1 || raw.streetAddress || '', - address2: raw.address2 || raw.addressLine2 || '', - city: raw.city || '', - state: raw.state || raw.stateCode || '', - zip: raw.zip || raw.zipCode || raw.postalCode || '', - country: raw.country || raw.countryCode || 'US', - latitude: raw.latitude || raw.lat || raw.location?.latitude, - longitude: raw.longitude || raw.lng || raw.location?.longitude, + cName: raw.cName || raw.slug || '', + address: raw.address || raw.fullAddress || loc.ln1 || '', + address1: raw.address1 || raw.addressLine1 || raw.streetAddress || loc.ln1 || '', + address2: raw.address2 || raw.addressLine2 || loc.ln2 || '', + city: raw.city || loc.city || '', + state: raw.state || raw.stateCode || loc.state || '', + zip: raw.zip || raw.zipCode || raw.postalCode || loc.zipcode || loc.zip || '', + country: raw.country || raw.countryCode || loc.country || 'United States', + latitude, + longitude, timezone: raw.timezone || raw.tz || '', menuUrl: raw.menuUrl || (slug ? `https://dutchie.com/dispensary/${slug}` : ''), retailType: raw.retailType || raw.type || '', + // Service offerings offerPickup: raw.offerPickup ?? raw.storeSettings?.offerPickup ?? true, offerDelivery: raw.offerDelivery ?? raw.storeSettings?.offerDelivery ?? false, - isRecreational: raw.isRecreational ?? raw.retailType?.includes('Recreational') ?? true, - isMedical: raw.isMedical ?? raw.retailType?.includes('Medical') ?? true, + offerCurbsidePickup: raw.offerCurbsidePickup ?? false, + // License types + isRecreational: raw.isRecreational ?? raw.recDispensary ?? raw.retailType?.includes('Recreational') ?? true, + isMedical: raw.isMedical ?? raw.medicalDispensary ?? raw.retailType?.includes('Medical') ?? true, + // Contact info + phone: raw.phone || '', + email: raw.email || '', + website: raw.embedBackUrl || '', + // Branding + description: raw.description || '', + logoImage: raw.logoImage || '', + bannerImage: raw.bannerImage || '', + // Chain/enterprise info + chainSlug: raw.chain || '', + enterpriseId: raw.retailer?.enterpriseId || '', + // Status + status: raw.status || '', // Preserve raw data ...raw, }; @@ -826,15 +821,27 @@ export async function upsertLocation( offers_pickup, is_recreational, is_medical, + phone, + website, + email, + description, + logo_image, + banner_image, + chain_slug, + enterprise_id, + c_name, + country, + store_status, last_seen_at, updated_at - ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, NOW(), NOW()) + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, NOW(), NOW()) ON CONFLICT (platform, platform_location_id) DO UPDATE SET name = EXCLUDED.name, platform_menu_url = EXCLUDED.platform_menu_url, raw_address = COALESCE(EXCLUDED.raw_address, dutchie_discovery_locations.raw_address), address_line1 = COALESCE(EXCLUDED.address_line1, dutchie_discovery_locations.address_line1), + address_line2 = COALESCE(EXCLUDED.address_line2, dutchie_discovery_locations.address_line2), city = COALESCE(EXCLUDED.city, dutchie_discovery_locations.city), state_code = COALESCE(EXCLUDED.state_code, dutchie_discovery_locations.state_code), postal_code = COALESCE(EXCLUDED.postal_code, dutchie_discovery_locations.postal_code), @@ -846,6 +853,17 @@ export async function upsertLocation( offers_pickup = COALESCE(EXCLUDED.offers_pickup, dutchie_discovery_locations.offers_pickup), is_recreational = COALESCE(EXCLUDED.is_recreational, dutchie_discovery_locations.is_recreational), is_medical = COALESCE(EXCLUDED.is_medical, dutchie_discovery_locations.is_medical), + phone = COALESCE(EXCLUDED.phone, dutchie_discovery_locations.phone), + website = COALESCE(EXCLUDED.website, dutchie_discovery_locations.website), + email = COALESCE(EXCLUDED.email, dutchie_discovery_locations.email), + description = COALESCE(EXCLUDED.description, dutchie_discovery_locations.description), + logo_image = COALESCE(EXCLUDED.logo_image, dutchie_discovery_locations.logo_image), + banner_image = COALESCE(EXCLUDED.banner_image, dutchie_discovery_locations.banner_image), + chain_slug = COALESCE(EXCLUDED.chain_slug, dutchie_discovery_locations.chain_slug), + enterprise_id = COALESCE(EXCLUDED.enterprise_id, dutchie_discovery_locations.enterprise_id), + c_name = COALESCE(EXCLUDED.c_name, dutchie_discovery_locations.c_name), + country = COALESCE(EXCLUDED.country, dutchie_discovery_locations.country), + store_status = COALESCE(EXCLUDED.store_status, dutchie_discovery_locations.store_status), last_seen_at = NOW(), updated_at = NOW() RETURNING id, (xmax = 0) as is_new`, @@ -861,7 +879,7 @@ export async function upsertLocation( location.city || null, location.state || null, location.zip || null, - location.country || 'US', + location.country || 'United States', location.latitude || null, location.longitude || null, location.timezone || null, @@ -871,6 +889,17 @@ export async function upsertLocation( location.offerPickup ?? null, location.isRecreational ?? null, location.isMedical ?? null, + location.phone || null, + location.website || null, + location.email || null, + location.description || null, + location.logoImage || null, + location.bannerImage || null, + location.chainSlug || null, + location.enterpriseId || null, + location.cName || null, + location.country || 'United States', + location.status || null, ] ); diff --git a/backend/src/discovery/promotion.ts b/backend/src/discovery/promotion.ts new file mode 100644 index 00000000..0a74d2b6 --- /dev/null +++ b/backend/src/discovery/promotion.ts @@ -0,0 +1,579 @@ +/** + * Discovery Promotion Service + * + * Handles the promotion of discovery locations to dispensaries: + * 1. Discovery β†’ Raw data in dutchie_discovery_locations (status='discovered') + * 2. Validation β†’ Check required fields, reject incomplete records + * 3. Promotion β†’ Idempotent upsert to dispensaries, link back via dispensary_id + */ + +import { pool } from '../db/pool'; +import { DiscoveryLocationRow, DiscoveryStatus } from './types'; + +// ============================================================ +// VALIDATION +// ============================================================ + +export interface ValidationResult { + valid: boolean; + errors: string[]; +} + +export interface ValidationSummary { + totalChecked: number; + validCount: number; + invalidCount: number; + invalidRecords: Array<{ + id: number; + name: string; + errors: string[]; + }>; +} + +/** + * Validate a single discovery location has all required fields for promotion + */ +export function validateForPromotion(loc: DiscoveryLocationRow): ValidationResult { + const errors: string[] = []; + + // Required fields + if (!loc.platform_location_id) { + errors.push('Missing platform_location_id'); + } + if (!loc.name || loc.name.trim() === '') { + errors.push('Missing name'); + } + if (!loc.city || loc.city.trim() === '') { + errors.push('Missing city'); + } + if (!loc.state_code || loc.state_code.trim() === '') { + errors.push('Missing state_code'); + } + if (!loc.platform_menu_url) { + errors.push('Missing platform_menu_url'); + } + + return { + valid: errors.length === 0, + errors, + }; +} + +/** + * Validate all discovered locations and return summary + */ +export async function validateDiscoveredLocations( + stateCode?: string +): Promise { + let query = ` + SELECT * FROM dutchie_discovery_locations + WHERE status = 'discovered' + `; + const params: string[] = []; + + if (stateCode) { + query += ` AND state_code = $1`; + params.push(stateCode); + } + + const result = await pool.query(query, params); + const locations = result.rows as DiscoveryLocationRow[]; + + const invalidRecords: ValidationSummary['invalidRecords'] = []; + let validCount = 0; + + for (const loc of locations) { + const validation = validateForPromotion(loc); + if (validation.valid) { + validCount++; + } else { + invalidRecords.push({ + id: loc.id, + name: loc.name, + errors: validation.errors, + }); + } + } + + return { + totalChecked: locations.length, + validCount, + invalidCount: invalidRecords.length, + invalidRecords, + }; +} + +// ============================================================ +// PROMOTION +// ============================================================ + +export interface PromotionResult { + discoveryId: number; + dispensaryId: number; + action: 'created' | 'updated' | 'skipped'; + name: string; +} + +export interface PromotionSummary { + totalProcessed: number; + created: number; + updated: number; + skipped: number; + rejected: number; + results: PromotionResult[]; + rejectedRecords: Array<{ + id: number; + name: string; + errors: string[]; + }>; + durationMs: number; +} + +/** + * Generate a URL-safe slug from name and city + */ +function generateSlug(name: string, city: string, state: string): string { + const base = `${name}-${city}-${state}` + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-|-$/g, '') + .substring(0, 100); + return base; +} + +/** + * Log a promotion action to dutchie_promotion_log + */ +async function logPromotionAction( + action: string, + discoveryId: number | null, + dispensaryId: number | null, + stateCode: string | null, + storeName: string | null, + validationErrors: string[] | null = null, + fieldChanges: Record | null = null, + triggeredBy: string = 'auto' +): Promise { + await pool.query(` + INSERT INTO dutchie_promotion_log + (discovery_id, dispensary_id, action, state_code, store_name, validation_errors, field_changes, triggered_by) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8) + `, [ + discoveryId, + dispensaryId, + action, + stateCode, + storeName, + validationErrors, + fieldChanges ? JSON.stringify(fieldChanges) : null, + triggeredBy, + ]); +} + +/** + * Create a status alert for the dashboard + */ +export async function createStatusAlert( + dispensaryId: number, + profileId: number | null, + alertType: string, + severity: 'info' | 'warning' | 'error' | 'critical', + message: string, + previousStatus?: string | null, + newStatus?: string | null, + metadata?: Record +): Promise { + const result = await pool.query(` + INSERT INTO crawler_status_alerts + (dispensary_id, profile_id, alert_type, severity, message, previous_status, new_status, metadata) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8) + RETURNING id + `, [ + dispensaryId, + profileId, + alertType, + severity, + message, + previousStatus || null, + newStatus || null, + metadata ? JSON.stringify(metadata) : null, + ]); + return result.rows[0].id; +} + +/** + * Create or update crawler profile for a dispensary with initial sandbox status + */ +async function ensureCrawlerProfile( + dispensaryId: number, + dispensaryName: string, + platformDispensaryId: string +): Promise<{ profileId: number; created: boolean }> { + // Check if profile already exists + const existingResult = await pool.query(` + SELECT id FROM dispensary_crawler_profiles + WHERE dispensary_id = $1 AND enabled = true + LIMIT 1 + `, [dispensaryId]); + + if (existingResult.rows.length > 0) { + return { profileId: existingResult.rows[0].id, created: false }; + } + + // Create new profile with sandbox status + const profileKey = dispensaryName + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-|-$/g, '') + .substring(0, 50); + + const insertResult = await pool.query(` + INSERT INTO dispensary_crawler_profiles ( + dispensary_id, + profile_name, + profile_key, + crawler_type, + status, + status_reason, + status_changed_at, + config, + enabled, + consecutive_successes, + consecutive_failures, + created_at, + updated_at + ) VALUES ( + $1, $2, $3, 'dutchie', 'sandbox', 'Newly promoted from discovery', CURRENT_TIMESTAMP, + $4::jsonb, true, 0, 0, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP + ) + RETURNING id + `, [ + dispensaryId, + dispensaryName, + profileKey, + JSON.stringify({ + platformDispensaryId, + useBothModes: true, + downloadImages: true, + trackStock: true, + }), + ]); + + const profileId = insertResult.rows[0].id; + + // Create status alert for new sandbox store + await createStatusAlert( + dispensaryId, + profileId, + 'promoted', + 'info', + `${dispensaryName} promoted to sandbox - awaiting first successful crawl`, + null, + 'sandbox', + { source: 'discovery_promotion', platformDispensaryId } + ); + + return { profileId, created: true }; +} + +/** + * Promote a single discovery location to dispensaries table + * Idempotent: uses ON CONFLICT on platform_dispensary_id + */ +async function promoteLocation( + loc: DiscoveryLocationRow +): Promise { + const slug = loc.platform_slug || generateSlug(loc.name, loc.city || '', loc.state_code || ''); + + // Upsert into dispensaries + // ON CONFLICT by platform_dispensary_id ensures idempotency + const upsertResult = await pool.query(` + INSERT INTO dispensaries ( + platform, + name, + slug, + city, + state, + address1, + address2, + zipcode, + postal_code, + phone, + website, + email, + latitude, + longitude, + timezone, + platform_dispensary_id, + menu_url, + menu_type, + description, + logo_image, + banner_image, + offer_pickup, + offer_delivery, + is_medical, + is_recreational, + chain_slug, + enterprise_id, + c_name, + country, + status, + crawl_enabled, + dutchie_verified, + dutchie_verified_at, + dutchie_discovery_id, + created_at, + updated_at + ) VALUES ( + $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, + $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, + $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, + $31, $32, $33, $34, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP + ) + ON CONFLICT (platform_dispensary_id) WHERE platform_dispensary_id IS NOT NULL + DO UPDATE SET + name = EXCLUDED.name, + city = EXCLUDED.city, + state = EXCLUDED.state, + address1 = EXCLUDED.address1, + address2 = EXCLUDED.address2, + zipcode = EXCLUDED.zipcode, + postal_code = EXCLUDED.postal_code, + phone = EXCLUDED.phone, + website = EXCLUDED.website, + email = EXCLUDED.email, + latitude = EXCLUDED.latitude, + longitude = EXCLUDED.longitude, + timezone = EXCLUDED.timezone, + menu_url = EXCLUDED.menu_url, + description = EXCLUDED.description, + logo_image = EXCLUDED.logo_image, + banner_image = EXCLUDED.banner_image, + offer_pickup = EXCLUDED.offer_pickup, + offer_delivery = EXCLUDED.offer_delivery, + is_medical = EXCLUDED.is_medical, + is_recreational = EXCLUDED.is_recreational, + chain_slug = EXCLUDED.chain_slug, + enterprise_id = EXCLUDED.enterprise_id, + c_name = EXCLUDED.c_name, + country = EXCLUDED.country, + status = EXCLUDED.status, + dutchie_discovery_id = EXCLUDED.dutchie_discovery_id, + updated_at = CURRENT_TIMESTAMP + RETURNING id, (xmax = 0) AS inserted + `, [ + loc.platform || 'dutchie', // $1 platform + loc.name, // $2 name + slug, // $3 slug + loc.city, // $4 city + loc.state_code, // $5 state + loc.address_line1, // $6 address1 + loc.address_line2, // $7 address2 + loc.postal_code, // $8 zipcode + loc.postal_code, // $9 postal_code + loc.phone, // $10 phone + loc.website, // $11 website + loc.email, // $12 email + loc.latitude, // $13 latitude + loc.longitude, // $14 longitude + loc.timezone, // $15 timezone + loc.platform_location_id, // $16 platform_dispensary_id + loc.platform_menu_url, // $17 menu_url + 'dutchie', // $18 menu_type + loc.description, // $19 description + loc.logo_image, // $20 logo_image + loc.banner_image, // $21 banner_image + loc.offers_pickup ?? true, // $22 offer_pickup + loc.offers_delivery ?? false, // $23 offer_delivery + loc.is_medical ?? false, // $24 is_medical + loc.is_recreational ?? true, // $25 is_recreational + loc.chain_slug, // $26 chain_slug + loc.enterprise_id, // $27 enterprise_id + loc.c_name, // $28 c_name + loc.country || 'United States', // $29 country + loc.store_status || 'open', // $30 status + true, // $31 crawl_enabled + true, // $32 dutchie_verified + new Date(), // $33 dutchie_verified_at + loc.id, // $34 dutchie_discovery_id + ]); + + const dispensaryId = upsertResult.rows[0].id; + const wasInserted = upsertResult.rows[0].inserted; + + // Link discovery location back to dispensary and update status + await pool.query(` + UPDATE dutchie_discovery_locations + SET + dispensary_id = $1, + status = 'verified', + verified_at = CURRENT_TIMESTAMP, + verified_by = 'auto-promotion' + WHERE id = $2 + `, [dispensaryId, loc.id]); + + // Create crawler profile with sandbox status for new dispensaries + if (wasInserted && loc.platform_location_id) { + await ensureCrawlerProfile(dispensaryId, loc.name, loc.platform_location_id); + } + + const action = wasInserted ? 'promoted_create' : 'promoted_update'; + + // Log the promotion + await logPromotionAction( + action, + loc.id, + dispensaryId, + loc.state_code, + loc.name, + null, + { slug, city: loc.city, platform_location_id: loc.platform_location_id } + ); + + return { + discoveryId: loc.id, + dispensaryId, + action: wasInserted ? 'created' : 'updated', + name: loc.name, + }; +} + +/** + * Promote all valid discovered locations to dispensaries + * + * @param stateCode Optional filter by state (e.g., 'CA', 'AZ') + * @param dryRun If true, only validate without making changes + */ +export async function promoteDiscoveredLocations( + stateCode?: string, + dryRun = false +): Promise { + const startTime = Date.now(); + + let query = ` + SELECT * FROM dutchie_discovery_locations + WHERE status = 'discovered' + `; + const params: string[] = []; + + if (stateCode) { + query += ` AND state_code = $1`; + params.push(stateCode); + } + + query += ` ORDER BY id`; + + const result = await pool.query(query, params); + const locations = result.rows as DiscoveryLocationRow[]; + + const results: PromotionResult[] = []; + const rejectedRecords: PromotionSummary['rejectedRecords'] = []; + let created = 0; + let updated = 0; + let skipped = 0; + let rejected = 0; + + for (const loc of locations) { + // Step 2: Validation + const validation = validateForPromotion(loc); + + if (!validation.valid) { + rejected++; + rejectedRecords.push({ + id: loc.id, + name: loc.name, + errors: validation.errors, + }); + + // Mark as rejected if not dry run + if (!dryRun) { + await pool.query(` + UPDATE dutchie_discovery_locations + SET status = 'rejected', notes = $1 + WHERE id = $2 + `, [validation.errors.join('; '), loc.id]); + + // Log the rejection + await logPromotionAction( + 'rejected', + loc.id, + null, + loc.state_code, + loc.name, + validation.errors + ); + } + continue; + } + + // Step 3: Promotion (skip if dry run) + if (dryRun) { + skipped++; + results.push({ + discoveryId: loc.id, + dispensaryId: 0, + action: 'skipped', + name: loc.name, + }); + continue; + } + + try { + const promotionResult = await promoteLocation(loc); + results.push(promotionResult); + + if (promotionResult.action === 'created') { + created++; + } else { + updated++; + } + } catch (error: any) { + console.error(`Failed to promote location ${loc.id} (${loc.name}):`, error.message); + rejected++; + rejectedRecords.push({ + id: loc.id, + name: loc.name, + errors: [`Promotion error: ${error.message}`], + }); + } + } + + return { + totalProcessed: locations.length, + created, + updated, + skipped, + rejected, + results, + rejectedRecords, + durationMs: Date.now() - startTime, + }; +} + +/** + * Promote a single discovery location by ID + */ +export async function promoteSingleLocation( + discoveryId: number +): Promise { + const result = await pool.query( + `SELECT * FROM dutchie_discovery_locations WHERE id = $1`, + [discoveryId] + ); + + if (result.rows.length === 0) { + throw new Error(`Discovery location ${discoveryId} not found`); + } + + const loc = result.rows[0] as DiscoveryLocationRow; + + // Validate + const validation = validateForPromotion(loc); + if (!validation.valid) { + throw new Error(`Validation failed: ${validation.errors.join(', ')}`); + } + + // Promote + return promoteLocation(loc); +} diff --git a/backend/src/discovery/routes.ts b/backend/src/discovery/routes.ts index 837f7ee0..9de22558 100644 --- a/backend/src/discovery/routes.ts +++ b/backend/src/discovery/routes.ts @@ -18,8 +18,8 @@ import { getCitiesToCrawl, getCityBySlug, seedKnownCities, - ARIZONA_CITIES, } from './city-discovery'; +import { getCitiesForState } from './location-discovery'; import { DiscoveryLocation, DiscoveryCity, @@ -27,6 +27,11 @@ import { mapLocationRowToLocation, mapCityRowToCity, } from './types'; +import { + validateDiscoveredLocations, + promoteDiscoveredLocations, + promoteSingleLocation, +} from './promotion'; export function createDiscoveryRoutes(pool: Pool): Router { const router = Router(); @@ -53,44 +58,44 @@ export function createDiscoveryRoutes(pool: Pool): Router { offset = '0', } = req.query; - let whereClause = 'WHERE platform = $1 AND active = TRUE'; + let whereClause = 'WHERE dl.platform = $1 AND dl.active = TRUE'; const params: any[] = [platform]; let paramIndex = 2; if (status) { - whereClause += ` AND status = $${paramIndex}`; + whereClause += ` AND dl.status = $${paramIndex}`; params.push(status); paramIndex++; } if (stateCode) { - whereClause += ` AND state_code = $${paramIndex}`; + whereClause += ` AND dl.state_code = $${paramIndex}`; params.push(stateCode); paramIndex++; } if (countryCode) { - whereClause += ` AND country_code = $${paramIndex}`; + whereClause += ` AND dl.country_code = $${paramIndex}`; params.push(countryCode); paramIndex++; } if (city) { - whereClause += ` AND city ILIKE $${paramIndex}`; + whereClause += ` AND dl.city ILIKE $${paramIndex}`; params.push(`%${city}%`); paramIndex++; } if (search) { - whereClause += ` AND (name ILIKE $${paramIndex} OR platform_slug ILIKE $${paramIndex})`; + whereClause += ` AND (dl.name ILIKE $${paramIndex} OR dl.platform_slug ILIKE $${paramIndex})`; params.push(`%${search}%`); paramIndex++; } if (hasDispensary === 'true') { - whereClause += ' AND dispensary_id IS NOT NULL'; + whereClause += ' AND dl.dispensary_id IS NOT NULL'; } else if (hasDispensary === 'false') { - whereClause += ' AND dispensary_id IS NULL'; + whereClause += ' AND dl.dispensary_id IS NULL'; } params.push(parseInt(limit as string, 10), parseInt(offset as string, 10)); @@ -705,15 +710,22 @@ export function createDiscoveryRoutes(pool: Pool): Router { return res.status(400).json({ error: 'stateCode is required' }); } - let cities: any[] = []; - if (stateCode === 'AZ') { - cities = ARIZONA_CITIES; - } else { + // Dynamically fetch cities from Dutchie for any state + const cityNames = await getCitiesForState(stateCode as string); + + if (cityNames.length === 0) { return res.status(400).json({ - error: `No predefined cities for state: ${stateCode}. Add cities to city-discovery.ts`, + error: `No cities found for state: ${stateCode}`, }); } + // Convert to seed format + const cities = cityNames.map(name => ({ + name, + slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''), + stateCode: stateCode as string, + })); + const result = await seedKnownCities(pool, cities); res.json({ @@ -834,6 +846,136 @@ export function createDiscoveryRoutes(pool: Pool): Router { } }); + // ============================================================ + // PROMOTION ENDPOINTS + // ============================================================ + + /** + * GET /api/discovery/admin/validate + * Validate discovered locations before promotion + */ + router.get('/admin/validate', async (req: Request, res: Response) => { + try { + const { stateCode } = req.query; + const summary = await validateDiscoveredLocations(stateCode as string | undefined); + + res.json({ + success: true, + ...summary, + }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } + }); + + /** + * POST /api/discovery/admin/promote + * Promote all valid discovered locations to dispensaries (idempotent) + * + * Query params: + * - stateCode: Filter by state (e.g., 'CA', 'AZ') + * - dryRun: If true, only validate without making changes + */ + router.post('/admin/promote', async (req: Request, res: Response) => { + try { + const { stateCode, dryRun = false } = req.body; + + console.log(`[Discovery API] Starting promotion for ${stateCode || 'all states'} (dryRun=${dryRun})`); + const summary = await promoteDiscoveredLocations(stateCode, dryRun); + + res.json({ + success: true, + ...summary, + }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } + }); + + /** + * POST /api/discovery/admin/promote/:id + * Promote a single discovery location by ID + */ + router.post('/admin/promote/:id', async (req: Request, res: Response) => { + try { + const { id } = req.params; + + console.log(`[Discovery API] Promoting single location ${id}`); + const result = await promoteSingleLocation(parseInt(id, 10)); + + res.json({ + success: true, + ...result, + }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } + }); + + // ============================================================ + // PROMOTION LOG + // ============================================================ + + /** + * GET /api/discovery/promotion-log + * Get promotion audit log + */ + router.get('/promotion-log', async (req: Request, res: Response) => { + try { + const { state, dispensary_id, limit = '100' } = req.query; + + let whereClause = 'WHERE 1=1'; + const params: any[] = []; + let paramIndex = 1; + + if (state) { + whereClause += ` AND pl.state_code = $${paramIndex}`; + params.push(state); + paramIndex++; + } + + if (dispensary_id) { + whereClause += ` AND pl.dispensary_id = $${paramIndex}`; + params.push(parseInt(dispensary_id as string, 10)); + paramIndex++; + } + + params.push(parseInt(limit as string, 10)); + + const { rows } = await pool.query(` + SELECT + pl.*, + dl.name as discovery_name, + d.name as dispensary_name + FROM dutchie_promotion_log pl + LEFT JOIN dutchie_discovery_locations dl ON pl.discovery_id = dl.id + LEFT JOIN dispensaries d ON pl.dispensary_id = d.id + ${whereClause} + ORDER BY pl.created_at DESC + LIMIT $${paramIndex} + `, params); + + res.json({ + logs: rows.map((r: any) => ({ + id: r.id, + discoveryId: r.discovery_id, + dispensaryId: r.dispensary_id, + action: r.action, + stateCode: r.state_code, + storeName: r.store_name, + validationErrors: r.validation_errors, + fieldChanges: r.field_changes, + triggeredBy: r.triggered_by, + createdAt: r.created_at, + discoveryName: r.discovery_name, + dispensaryName: r.dispensary_name, + })), + }); + } catch (error: any) { + res.status(500).json({ error: error.message }); + } + }); + return router; } diff --git a/backend/src/discovery/types.ts b/backend/src/discovery/types.ts index f8cd626d..ea31f992 100644 --- a/backend/src/discovery/types.ts +++ b/backend/src/discovery/types.ts @@ -60,6 +60,7 @@ export interface DiscoveryLocation { stateCode: string | null; postalCode: string | null; countryCode: string | null; + country: string | null; latitude: number | null; longitude: number | null; timezone: string | null; @@ -72,6 +73,18 @@ export interface DiscoveryLocation { offersPickup: boolean | null; isRecreational: boolean | null; isMedical: boolean | null; + // New Dutchie fields + phone: string | null; + website: string | null; + email: string | null; + description: string | null; + logoImage: string | null; + bannerImage: string | null; + chainSlug: string | null; + enterpriseId: string | null; + cName: string | null; + storeStatus: string | null; + // Timestamps firstSeenAt: Date; lastSeenAt: Date; lastCheckedAt: Date | null; @@ -96,6 +109,7 @@ export interface DiscoveryLocationRow { state_code: string | null; postal_code: string | null; country_code: string | null; + country: string | null; latitude: number | null; longitude: number | null; timezone: string | null; @@ -108,6 +122,18 @@ export interface DiscoveryLocationRow { offers_pickup: boolean | null; is_recreational: boolean | null; is_medical: boolean | null; + // New Dutchie fields (snake_case for DB row) + phone: string | null; + website: string | null; + email: string | null; + description: string | null; + logo_image: string | null; + banner_image: string | null; + chain_slug: string | null; + enterprise_id: string | null; + c_name: string | null; + store_status: string | null; + // Timestamps first_seen_at: Date; last_seen_at: Date; last_checked_at: Date | null; @@ -245,6 +271,7 @@ export function mapLocationRowToLocation(row: DiscoveryLocationRow): DiscoveryLo stateCode: row.state_code, postalCode: row.postal_code, countryCode: row.country_code, + country: row.country, latitude: row.latitude, longitude: row.longitude, timezone: row.timezone, @@ -257,6 +284,18 @@ export function mapLocationRowToLocation(row: DiscoveryLocationRow): DiscoveryLo offersPickup: row.offers_pickup, isRecreational: row.is_recreational, isMedical: row.is_medical, + // New Dutchie fields + phone: row.phone, + website: row.website, + email: row.email, + description: row.description, + logoImage: row.logo_image, + bannerImage: row.banner_image, + chainSlug: row.chain_slug, + enterpriseId: row.enterprise_id, + cName: row.c_name, + storeStatus: row.store_status, + // Timestamps firstSeenAt: row.first_seen_at, lastSeenAt: row.last_seen_at, lastCheckedAt: row.last_checked_at, diff --git a/backend/src/hydration/canonical-upsert.ts b/backend/src/hydration/canonical-upsert.ts index fd020878..393b9f27 100644 --- a/backend/src/hydration/canonical-upsert.ts +++ b/backend/src/hydration/canonical-upsert.ts @@ -68,7 +68,7 @@ export async function upsertStoreProducts( const result = await client.query( `INSERT INTO store_products ( dispensary_id, provider, provider_product_id, provider_brand_id, - name, brand_name, category, subcategory, + name_raw, brand_name_raw, category_raw, subcategory_raw, price_rec, price_med, price_rec_special, price_med_special, is_on_special, discount_percent, is_in_stock, stock_status, @@ -87,10 +87,10 @@ export async function upsertStoreProducts( ) ON CONFLICT (dispensary_id, provider, provider_product_id) DO UPDATE SET - name = EXCLUDED.name, - brand_name = EXCLUDED.brand_name, - category = EXCLUDED.category, - subcategory = EXCLUDED.subcategory, + name_raw = EXCLUDED.name_raw, + brand_name_raw = EXCLUDED.brand_name_raw, + category_raw = EXCLUDED.category_raw, + subcategory_raw = EXCLUDED.subcategory_raw, price_rec = EXCLUDED.price_rec, price_med = EXCLUDED.price_med, price_rec_special = EXCLUDED.price_rec_special, @@ -122,8 +122,9 @@ export async function upsertStoreProducts( productPricing?.discountPercent, productAvailability?.inStock ?? true, productAvailability?.stockStatus || 'unknown', - product.thcPercent, - product.cbdPercent, + // Clamp THC/CBD to valid percentage range (0-100) - some products report mg as % + product.thcPercent !== null && product.thcPercent <= 100 ? product.thcPercent : null, + product.cbdPercent !== null && product.cbdPercent <= 100 ? product.cbdPercent : null, product.primaryImageUrl, ] ); @@ -212,8 +213,9 @@ export async function createStoreProductSnapshots( productAvailability?.inStock ?? true, productAvailability?.quantity, productAvailability?.stockStatus || 'unknown', - product.thcPercent, - product.cbdPercent, + // Clamp THC/CBD to valid percentage range (0-100) - some products report mg as % + product.thcPercent !== null && product.thcPercent <= 100 ? product.thcPercent : null, + product.cbdPercent !== null && product.cbdPercent <= 100 ? product.cbdPercent : null, product.primaryImageUrl, JSON.stringify(product.rawProduct), ]); @@ -229,7 +231,7 @@ export async function createStoreProductSnapshots( `INSERT INTO store_product_snapshots ( dispensary_id, provider, provider_product_id, crawl_run_id, captured_at, - name, brand_name, category, subcategory, + name_raw, brand_name_raw, category_raw, subcategory_raw, price_rec, price_med, price_rec_special, price_med_special, is_on_special, discount_percent, is_in_stock, stock_quantity, stock_status, @@ -245,6 +247,202 @@ export async function createStoreProductSnapshots( return { created }; } +// ============================================================ +// VARIANT UPSERTS +// ============================================================ + +export interface UpsertVariantsResult { + upserted: number; + new: number; + updated: number; + snapshotsCreated: number; +} + +/** + * Extract variant data from raw Dutchie product + */ +function extractVariantsFromRaw(rawProduct: any): any[] { + const children = rawProduct?.POSMetaData?.children || []; + return children.map((child: any) => ({ + option: child.option || child.key || '', + canonicalSku: child.canonicalSKU || null, + canonicalId: child.canonicalID || null, + canonicalName: child.canonicalName || null, + priceRec: child.recPrice || child.price || null, + priceMed: child.medPrice || null, + priceRecSpecial: child.recSpecialPrice || null, + priceMedSpecial: child.medSpecialPrice || null, + quantity: child.quantityAvailable ?? child.quantity ?? null, + inStock: (child.quantityAvailable ?? child.quantity ?? 0) > 0, + })); +} + +/** + * Parse weight value and unit from option string + * e.g., "1g" -> { value: 1, unit: "g" } + * "3.5g" -> { value: 3.5, unit: "g" } + * "1/8oz" -> { value: 0.125, unit: "oz" } + */ +function parseWeight(option: string): { value: number | null; unit: string | null } { + if (!option) return { value: null, unit: null }; + + // Handle fractions like "1/8oz" + const fractionMatch = option.match(/^(\d+)\/(\d+)\s*(g|oz|mg|ml)?$/i); + if (fractionMatch) { + const value = parseInt(fractionMatch[1]) / parseInt(fractionMatch[2]); + return { value, unit: fractionMatch[3]?.toLowerCase() || 'oz' }; + } + + // Handle decimals like "3.5g" or "100mg" + const decimalMatch = option.match(/^([\d.]+)\s*(g|oz|mg|ml|each)?$/i); + if (decimalMatch) { + return { + value: parseFloat(decimalMatch[1]), + unit: decimalMatch[2]?.toLowerCase() || null + }; + } + + return { value: null, unit: null }; +} + +/** + * Upsert variants for products and create variant snapshots + */ +export async function upsertProductVariants( + pool: Pool, + dispensaryId: number, + products: NormalizedProduct[], + crawlRunId: number | null, + options: { dryRun?: boolean } = {} +): Promise { + if (products.length === 0) { + return { upserted: 0, new: 0, updated: 0, snapshotsCreated: 0 }; + } + + const { dryRun = false } = options; + let newCount = 0; + let updatedCount = 0; + let snapshotsCreated = 0; + + for (const product of products) { + // Get the store_product_id for this product + const productResult = await pool.query( + `SELECT id FROM store_products + WHERE dispensary_id = $1 AND provider = $2 AND provider_product_id = $3`, + [dispensaryId, product.platform, product.externalProductId] + ); + + if (productResult.rows.length === 0) { + continue; // Product not found, skip variants + } + + const storeProductId = productResult.rows[0].id; + const variants = extractVariantsFromRaw(product.rawProduct); + + if (variants.length === 0) { + continue; // No variants to process + } + + if (dryRun) { + console.log(`[DryRun] Would upsert ${variants.length} variants for product ${product.externalProductId}`); + continue; + } + + for (const variant of variants) { + const { value: weightValue, unit: weightUnit } = parseWeight(variant.option); + const isOnSpecial = (variant.priceRecSpecial !== null && variant.priceRecSpecial < variant.priceRec) || + (variant.priceMedSpecial !== null && variant.priceMedSpecial < variant.priceMed); + + // Upsert variant + const variantResult = await pool.query( + `INSERT INTO product_variants ( + store_product_id, dispensary_id, + option, canonical_sku, canonical_id, canonical_name, + price_rec, price_med, price_rec_special, price_med_special, + quantity, quantity_available, in_stock, is_on_special, + weight_value, weight_unit, + first_seen_at, last_seen_at, updated_at + ) VALUES ( + $1, $2, + $3, $4, $5, $6, + $7, $8, $9, $10, + $11, $11, $12, $13, + $14, $15, + NOW(), NOW(), NOW() + ) + ON CONFLICT (store_product_id, option) + DO UPDATE SET + canonical_sku = COALESCE(EXCLUDED.canonical_sku, product_variants.canonical_sku), + canonical_id = COALESCE(EXCLUDED.canonical_id, product_variants.canonical_id), + canonical_name = COALESCE(EXCLUDED.canonical_name, product_variants.canonical_name), + price_rec = EXCLUDED.price_rec, + price_med = EXCLUDED.price_med, + price_rec_special = EXCLUDED.price_rec_special, + price_med_special = EXCLUDED.price_med_special, + quantity = EXCLUDED.quantity, + quantity_available = EXCLUDED.quantity_available, + in_stock = EXCLUDED.in_stock, + is_on_special = EXCLUDED.is_on_special, + weight_value = COALESCE(EXCLUDED.weight_value, product_variants.weight_value), + weight_unit = COALESCE(EXCLUDED.weight_unit, product_variants.weight_unit), + last_seen_at = NOW(), + last_price_change_at = CASE + WHEN product_variants.price_rec IS DISTINCT FROM EXCLUDED.price_rec + OR product_variants.price_rec_special IS DISTINCT FROM EXCLUDED.price_rec_special + THEN NOW() + ELSE product_variants.last_price_change_at + END, + last_stock_change_at = CASE + WHEN product_variants.quantity IS DISTINCT FROM EXCLUDED.quantity + THEN NOW() + ELSE product_variants.last_stock_change_at + END, + updated_at = NOW() + RETURNING id, (xmax = 0) as is_new`, + [ + storeProductId, dispensaryId, + variant.option, variant.canonicalSku, variant.canonicalId, variant.canonicalName, + variant.priceRec, variant.priceMed, variant.priceRecSpecial, variant.priceMedSpecial, + variant.quantity, variant.inStock, isOnSpecial, + weightValue, weightUnit, + ] + ); + + const variantId = variantResult.rows[0].id; + if (variantResult.rows[0]?.is_new) { + newCount++; + } else { + updatedCount++; + } + + // Create variant snapshot + await pool.query( + `INSERT INTO product_variant_snapshots ( + product_variant_id, store_product_id, dispensary_id, crawl_run_id, + option, + price_rec, price_med, price_rec_special, price_med_special, + quantity, in_stock, is_on_special, + captured_at + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, NOW())`, + [ + variantId, storeProductId, dispensaryId, crawlRunId, + variant.option, + variant.priceRec, variant.priceMed, variant.priceRecSpecial, variant.priceMedSpecial, + variant.quantity, variant.inStock, isOnSpecial, + ] + ); + snapshotsCreated++; + } + } + + return { + upserted: newCount + updatedCount, + new: newCount, + updated: updatedCount, + snapshotsCreated, + }; +} + // ============================================================ // DISCONTINUED PRODUCTS // ============================================================ @@ -373,6 +571,9 @@ export interface HydratePayloadResult { productsDiscontinued: number; snapshotsCreated: number; brandsCreated: number; + variantsUpserted: number; + variantsNew: number; + variantSnapshotsCreated: number; } /** @@ -399,7 +600,7 @@ export async function hydrateToCanonical( { dryRun } ); - // 3. Create snapshots + // 3. Create product snapshots const snapshotResult = await createStoreProductSnapshots( pool, dispensaryId, @@ -410,7 +611,16 @@ export async function hydrateToCanonical( { dryRun } ); - // 4. Mark discontinued products + // 4. Upsert variants and create variant snapshots + const variantResult = await upsertProductVariants( + pool, + dispensaryId, + normResult.products, + crawlRunId, + { dryRun } + ); + + // 5. Mark discontinued products const currentProductIds = new Set( normResult.products.map((p) => p.externalProductId) ); @@ -431,5 +641,8 @@ export async function hydrateToCanonical( productsDiscontinued: discontinuedCount, snapshotsCreated: snapshotResult.created, brandsCreated: brandResult.new, + variantsUpserted: variantResult.upserted, + variantsNew: variantResult.new, + variantSnapshotsCreated: variantResult.snapshotsCreated, }; } diff --git a/backend/src/hydration/incremental-sync.ts b/backend/src/hydration/incremental-sync.ts index e0a5a074..73f2c989 100644 --- a/backend/src/hydration/incremental-sync.ts +++ b/backend/src/hydration/incremental-sync.ts @@ -234,99 +234,94 @@ export async function syncProductsToCanonical( const result = await pool.query( `INSERT INTO store_products ( - dispensary_id, state_id, provider, provider_product_id, - provider_brand_id, provider_dispensary_id, enterprise_product_id, - legacy_dutchie_product_id, - name, brand_name, category, subcategory, product_type, strain_type, - description, effects, cannabinoids, - thc_percent, cbd_percent, thc_content_text, cbd_content_text, - is_in_stock, stock_status, stock_quantity, - total_quantity_available, total_kiosk_quantity_available, - image_url, local_image_url, local_image_thumb_url, local_image_medium_url, - original_image_url, additional_images, - is_on_special, is_featured, medical_only, rec_only, + dispensary_id, provider, provider_product_id, provider_brand_id, + platform_dispensary_id, external_product_id, + name_raw, brand_name_raw, category_raw, subcategory_raw, strain_type, + description, effects, cannabinoids_v2, + thc_percent, cbd_percent, thc_content, cbd_content, + is_in_stock, stock_status, stock_quantity, total_quantity_available, + image_url, primary_image_url, images, + is_on_special, featured, medical_only, rec_only, is_below_threshold, is_below_kiosk_threshold, - platform_status, c_name, weight, options, measurements, - first_seen_at, last_seen_at, updated_at + status, c_name, weight, measurements, + first_seen_at, last_seen_at, created_at, updated_at ) VALUES ( - $1, $2, 'dutchie', $3, - $4, $5, $6, - $7, - $8, $9, $10, $11, $12, $13, - $14, $15, $16, - $17, $18, $19, $20, - $21, $22, $23, - $24, $25, - $26, $27, $28, $29, - $30, $31, - $32, $33, $34, $35, - $36, $37, - $38, $39, $40, $41, $42, - $43, $44, NOW() + $1, 'dutchie', $2, $3, + $4, $5, + $6, $7, $8, $9, $10, + $11, $12, $13, + $14, $15, $16, $17, + $18, $19, $20, $21, + $22, $23, $24, + $25, $26, $27, $28, + $29, $30, + $31, $32, $33, $34, + $35, $36, NOW(), NOW() ) ON CONFLICT (dispensary_id, provider, provider_product_id) DO UPDATE SET - legacy_dutchie_product_id = EXCLUDED.legacy_dutchie_product_id, - name = EXCLUDED.name, - brand_name = EXCLUDED.brand_name, - category = EXCLUDED.category, - subcategory = EXCLUDED.subcategory, + name_raw = EXCLUDED.name_raw, + brand_name_raw = EXCLUDED.brand_name_raw, + category_raw = EXCLUDED.category_raw, + subcategory_raw = EXCLUDED.subcategory_raw, + strain_type = EXCLUDED.strain_type, is_in_stock = EXCLUDED.is_in_stock, stock_status = EXCLUDED.stock_status, + stock_quantity = EXCLUDED.stock_quantity, + total_quantity_available = EXCLUDED.total_quantity_available, thc_percent = EXCLUDED.thc_percent, cbd_percent = EXCLUDED.cbd_percent, + thc_content = EXCLUDED.thc_content, + cbd_content = EXCLUDED.cbd_content, image_url = EXCLUDED.image_url, - local_image_url = EXCLUDED.local_image_url, + primary_image_url = EXCLUDED.primary_image_url, is_on_special = EXCLUDED.is_on_special, - platform_status = EXCLUDED.platform_status, + status = EXCLUDED.status, + description = COALESCE(EXCLUDED.description, store_products.description), + effects = COALESCE(EXCLUDED.effects, store_products.effects), + cannabinoids_v2 = COALESCE(EXCLUDED.cannabinoids_v2, store_products.cannabinoids_v2), + weight = EXCLUDED.weight, + measurements = EXCLUDED.measurements, last_seen_at = NOW(), updated_at = NOW() RETURNING (xmax = 0) as is_new`, [ - dispensaryId, - stateId, - p.external_product_id, - p.brand_id, - p.platform_dispensary_id, - p.enterprise_product_id, - p.id, - p.name, - p.brand_name, - p.category || p.type, - p.subcategory, - p.type, - p.strain_type, - p.description, - p.effects, - p.cannabinoids_v2, - thcPercent, - cbdPercent, - p.thc_content, - p.cbd_content, - isInStock, - stockStatus, - p.total_quantity_available, - p.total_quantity_available, - p.total_kiosk_quantity_available, - p.primary_image_url, - p.local_image_url, - p.local_image_thumb_url, - p.local_image_medium_url, - p.original_image_url, - p.additional_images, - p.special || false, - p.featured || false, - p.medical_only || false, - p.rec_only || false, - p.is_below_threshold || false, - p.is_below_kiosk_threshold || false, - p.status, - p.c_name, - p.weight, - p.options, - p.measurements, - p.first_seen_at || p.updated_at, - p.last_seen_at || p.updated_at, + dispensaryId, // $1 + p.external_product_id, // $2 + p.brand_id, // $3 + p.platform_dispensary_id, // $4 + p.external_product_id, // $5 external_product_id + p.name, // $6 + p.brand_name, // $7 + p.type || p.category, // $8 category_raw + p.subcategory, // $9 + p.strain_type, // $10 + p.description, // $11 + p.effects, // $12 + p.cannabinoids_v2, // $13 + thcPercent, // $14 + cbdPercent, // $15 + p.thc_content, // $16 + p.cbd_content, // $17 + isInStock, // $18 + stockStatus, // $19 + p.total_quantity_available || 0, // $20 stock_quantity + p.total_quantity_available || 0, // $21 + p.primary_image_url, // $22 image_url + p.primary_image_url, // $23 + p.additional_images, // $24 images + p.special || false, // $25 + p.featured || false, // $26 + p.medical_only || false, // $27 + p.rec_only || false, // $28 + p.is_below_threshold || false, // $29 + p.is_below_kiosk_threshold || false, // $30 + p.status, // $31 + p.c_name, // $32 + p.weight, // $33 + p.measurements, // $34 + p.first_seen_at || p.updated_at, // $35 + p.last_seen_at || p.updated_at, // $36 ] ); diff --git a/backend/src/hydration/worker.ts b/backend/src/hydration/worker.ts index a12671b8..d0c2174a 100644 --- a/backend/src/hydration/worker.ts +++ b/backend/src/hydration/worker.ts @@ -107,7 +107,8 @@ export class HydrationWorker { console.log( `[HydrationWorker] ${this.options.dryRun ? '[DryRun] ' : ''}Processed payload ${payload.id}: ` + `${hydrateResult.productsNew} new, ${hydrateResult.productsUpdated} updated, ` + - `${hydrateResult.productsDiscontinued} discontinued, ${hydrateResult.snapshotsCreated} snapshots` + `${hydrateResult.productsDiscontinued} discontinued, ${hydrateResult.snapshotsCreated} snapshots, ` + + `${hydrateResult.variantsUpserted} variants (${hydrateResult.variantSnapshotsCreated} variant snapshots)` ); return { diff --git a/backend/src/index.ts b/backend/src/index.ts index 6d89d0a8..5e7222e0 100755 --- a/backend/src/index.ts +++ b/backend/src/index.ts @@ -13,7 +13,15 @@ dotenv.config(); const app = express(); const PORT = process.env.PORT || 3010; -app.use(cors()); +// CORS configuration - allow requests from any origin with API key auth +// WordPress plugins need to make requests from their own domains +app.use(cors({ + origin: true, // Reflect the request origin + credentials: true, + methods: ['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS'], + allowedHeaders: ['Content-Type', 'Authorization', 'x-api-key', 'X-API-Key'], + exposedHeaders: ['Content-Length', 'X-Request-Id'], +})); app.use(express.json()); // Serve static images when MinIO is not configured @@ -67,7 +75,11 @@ import publicApiRoutes from './routes/public-api'; import usersRoutes from './routes/users'; import staleProcessesRoutes from './routes/stale-processes'; import orchestratorAdminRoutes from './routes/orchestrator-admin'; +import adminDebugRoutes from './routes/admin-debug'; +import intelligenceRoutes from './routes/intelligence'; +import marketsRoutes from './routes/markets'; import workersRoutes from './routes/workers'; +import jobQueueRoutes from './routes/job-queue'; import { createMultiStateRoutes } from './multi-state'; import { trackApiUsage, checkRateLimit } from './middleware/apiTokenTracker'; import { validateWordPressPermissions } from './middleware/wordpressPermissions'; @@ -77,6 +89,7 @@ import { createPortalRoutes } from './portals'; import { createStatesRouter } from './routes/states'; import { createAnalyticsV2Router } from './routes/analytics-v2'; import { createDiscoveryRoutes } from './discovery'; +import pipelineRoutes from './routes/pipeline'; import { getPool } from './db/pool'; // Consumer API routes (findadispo.com, findagram.co) @@ -88,6 +101,7 @@ import consumerDealsRoutes from './routes/consumer-deals'; import eventsRoutes from './routes/events'; import clickAnalyticsRoutes from './routes/click-analytics'; import seoRoutes from './routes/seo'; +import priceAnalyticsRoutes from './routes/price-analytics'; // Mark requests from trusted domains (cannaiq.co, findagram.co, findadispo.com) // These domains can access the API without authentication @@ -135,6 +149,18 @@ app.use('/api/stale-processes', staleProcessesRoutes); // Admin routes - orchestrator actions app.use('/api/admin/orchestrator', orchestratorAdminRoutes); +// Admin routes - debug endpoints (snapshot inspection) +app.use('/api/admin/debug', adminDebugRoutes); +console.log('[AdminDebug] Routes registered at /api/admin/debug'); + +// Admin routes - intelligence (brands, pricing analytics) +app.use('/api/admin/intelligence', intelligenceRoutes); +console.log('[Intelligence] Routes registered at /api/admin/intelligence'); + +// Markets routes - store and product data for admin dashboard +app.use('/api/markets', marketsRoutes); +console.log('[Markets] Routes registered at /api/markets'); + // SEO orchestrator routes app.use('/api/seo', seoRoutes); @@ -142,7 +168,9 @@ app.use('/api/seo', seoRoutes); app.use('/api/workers', workersRoutes); // Monitor routes - aliased from workers for convenience app.use('/api/monitor', workersRoutes); -console.log('[Workers] Routes registered at /api/workers and /api/monitor'); +// Job queue management +app.use('/api/job-queue', jobQueueRoutes); +console.log('[Workers] Routes registered at /api/workers, /api/monitor, and /api/job-queue'); // Phase 3: Analytics V2 - Enhanced analytics with rec/med state segmentation try { @@ -176,6 +204,10 @@ console.log('[Events] Routes registered at /api/events'); app.use('/api/analytics/clicks', clickAnalyticsRoutes); console.log('[ClickAnalytics] Routes registered at /api/analytics/clicks'); +// Price Analytics API - price history, specials, and market comparisons +app.use('/api/analytics/price', priceAnalyticsRoutes); +console.log('[PriceAnalytics] Routes registered at /api/analytics/price'); + // States API routes - cannabis legalization status and targeting try { const statesRouter = createStatesRouter(getPool()); @@ -215,6 +247,10 @@ try { console.warn('[Discovery] Failed to register routes:', error); } +// Pipeline Stage Transitions - Explicit API for moving stores through 6-stage pipeline +app.use('/api/pipeline', pipelineRoutes); +console.log('[Pipeline] Routes registered at /api/pipeline'); + // Platform-specific Discovery Routes // TODO: Rebuild with /platforms/dutchie/ module diff --git a/backend/src/multi-state/routes.ts b/backend/src/multi-state/routes.ts index 9058b134..3d1cf064 100644 --- a/backend/src/multi-state/routes.ts +++ b/backend/src/multi-state/routes.ts @@ -319,12 +319,13 @@ export function createMultiStateRoutes(pool: Pool): Router { // ========================================================================= /** - * GET /api/analytics/compare/brand/:brandId + * GET /api/analytics/compare/brand/:brandIdOrName * Compare a brand across multiple states + * Accepts either numeric brand ID or brand name (URL encoded) */ - router.get('/analytics/compare/brand/:brandId', async (req: Request, res: Response) => { + router.get('/analytics/compare/brand/:brandIdOrName', async (req: Request, res: Response) => { try { - const brandId = parseInt(req.params.brandId); + const { brandIdOrName } = req.params; const statesParam = req.query.states as string; // Parse states - either comma-separated or get all active states @@ -336,7 +337,22 @@ export function createMultiStateRoutes(pool: Pool): Router { states = activeStates.map(s => s.code); } - const comparison = await stateService.compareBrandAcrossStates(brandId, states); + // Check if it's a numeric ID or a brand name + const brandId = parseInt(brandIdOrName); + let comparison; + + if (!isNaN(brandId)) { + // Try by ID first + try { + comparison = await stateService.compareBrandAcrossStates(brandId, states); + } catch (idErr: any) { + // If brand ID not found, try as name + comparison = await stateService.compareBrandByNameAcrossStates(brandIdOrName, states); + } + } else { + // Use brand name directly + comparison = await stateService.compareBrandByNameAcrossStates(decodeURIComponent(brandIdOrName), states); + } res.json({ success: true, diff --git a/backend/src/multi-state/state-query-service.ts b/backend/src/multi-state/state-query-service.ts index 1742a845..f59b2524 100644 --- a/backend/src/multi-state/state-query-service.ts +++ b/backend/src/multi-state/state-query-service.ts @@ -67,18 +67,19 @@ export class StateQueryService { */ async getStateSummary(state: string): Promise { // Get base metrics from materialized view + // Migration 051 uses dispensary_count column (not store_count) const metricsResult = await this.pool.query(` SELECT state, state_name AS "stateName", - dispensary_count AS "storeCount", - dispensary_count AS "dutchieStores", - dispensary_count AS "activeStores", - total_products AS "totalProducts", - in_stock_products AS "inStockProducts", - out_of_stock_products AS "outOfStockProducts", - unique_brands AS "uniqueBrands", - unique_categories AS "uniqueCategories", + COALESCE(dispensary_count, 0) AS "storeCount", + COALESCE(dispensary_count, 0) AS "dutchieStores", + COALESCE(dispensary_count, 0) AS "activeStores", + COALESCE(total_products, 0) AS "totalProducts", + COALESCE(in_stock_products, 0) AS "inStockProducts", + COALESCE(out_of_stock_products, 0) AS "outOfStockProducts", + COALESCE(unique_brands, 0) AS "uniqueBrands", + COALESCE(unique_categories, 0) AS "uniqueCategories", avg_price_rec AS "avgPriceRec", min_price_rec AS "minPriceRec", max_price_rec AS "maxPriceRec", @@ -110,10 +111,24 @@ export class StateQueryService { // Get top categories const topCategories = await this.getCategoriesByState(state, { limit: 5 }); + // Parse numeric values from strings (PostgreSQL returns bigint as string) return { - ...metrics, - recentCrawls: parseInt(crawlResult.rows[0]?.recent_crawls || '0'), - failedCrawls: parseInt(crawlResult.rows[0]?.failed_crawls || '0'), + state: metrics.state, + stateName: metrics.stateName, + storeCount: parseInt(metrics.storeCount || '0', 10), + dutchieStores: parseInt(metrics.dutchieStores || '0', 10), + activeStores: parseInt(metrics.activeStores || '0', 10), + totalProducts: parseInt(metrics.totalProducts || '0', 10), + inStockProducts: parseInt(metrics.inStockProducts || '0', 10), + outOfStockProducts: parseInt(metrics.outOfStockProducts || '0', 10), + uniqueBrands: parseInt(metrics.uniqueBrands || '0', 10), + uniqueCategories: parseInt(metrics.uniqueCategories || '0', 10), + avgPriceRec: metrics.avgPriceRec ? parseFloat(metrics.avgPriceRec) : null, + minPriceRec: metrics.minPriceRec ? parseFloat(metrics.minPriceRec) : null, + maxPriceRec: metrics.maxPriceRec ? parseFloat(metrics.maxPriceRec) : null, + refreshedAt: metrics.refreshedAt, + recentCrawls: parseInt(crawlResult.rows[0]?.recent_crawls || '0', 10), + failedCrawls: parseInt(crawlResult.rows[0]?.failed_crawls || '0', 10), lastCrawlAt: crawlResult.rows[0]?.last_crawl_at || null, topBrands, topCategories, @@ -121,29 +136,49 @@ export class StateQueryService { } /** - * Get metrics for all states + * Get metrics for all states (including states with no data) */ async getAllStateMetrics(): Promise { + // Migration 051 uses dispensary_count column (not store_count) const result = await this.pool.query(` SELECT - state, - state_name AS "stateName", - dispensary_count AS "storeCount", - dispensary_count AS "dutchieStores", - dispensary_count AS "activeStores", - total_products AS "totalProducts", - in_stock_products AS "inStockProducts", - out_of_stock_products AS "outOfStockProducts", - unique_brands AS "uniqueBrands", - unique_categories AS "uniqueCategories", - avg_price_rec AS "avgPriceRec", - min_price_rec AS "minPriceRec", - max_price_rec AS "maxPriceRec", - refreshed_at AS "refreshedAt" - FROM mv_state_metrics - ORDER BY dispensary_count DESC + s.code AS state, + s.name AS "stateName", + COALESCE(m.dispensary_count, 0) AS "storeCount", + COALESCE(m.dispensary_count, 0) AS "dutchieStores", + COALESCE(m.dispensary_count, 0) AS "activeStores", + COALESCE(m.total_products, 0) AS "totalProducts", + COALESCE(m.in_stock_products, 0) AS "inStockProducts", + COALESCE(m.out_of_stock_products, 0) AS "outOfStockProducts", + COALESCE(m.unique_brands, 0) AS "uniqueBrands", + COALESCE(m.unique_categories, 0) AS "uniqueCategories", + m.avg_price_rec AS "avgPriceRec", + m.min_price_rec AS "minPriceRec", + m.max_price_rec AS "maxPriceRec", + m.refreshed_at AS "refreshedAt", + 0 AS "onSpecialProducts" + FROM states s + LEFT JOIN mv_state_metrics m ON s.code = m.state + ORDER BY COALESCE(m.dispensary_count, 0) DESC, s.name ASC `); - return result.rows; + // Parse numeric values from strings (PostgreSQL returns bigint as string) + return result.rows.map((row: any) => ({ + state: row.state, + stateName: row.stateName, + storeCount: parseInt(row.storeCount || '0', 10), + dutchieStores: parseInt(row.dutchieStores || '0', 10), + activeStores: parseInt(row.activeStores || '0', 10), + totalProducts: parseInt(row.totalProducts || '0', 10), + inStockProducts: parseInt(row.inStockProducts || '0', 10), + outOfStockProducts: parseInt(row.outOfStockProducts || '0', 10), + uniqueBrands: parseInt(row.uniqueBrands || '0', 10), + uniqueCategories: parseInt(row.uniqueCategories || '0', 10), + avgPriceRec: row.avgPriceRec ? parseFloat(row.avgPriceRec) : null, + minPriceRec: row.minPriceRec ? parseFloat(row.minPriceRec) : null, + maxPriceRec: row.maxPriceRec ? parseFloat(row.maxPriceRec) : null, + refreshedAt: row.refreshedAt, + onSpecialProducts: parseInt(row.onSpecialProducts || '0', 10), + })); } // ========================================================================= @@ -152,29 +187,37 @@ export class StateQueryService { /** * Get brands present in a specific state + * Uses inline query instead of v_brand_state_presence view for compatibility */ async getBrandsByState(state: string, options: StateQueryOptions = {}): Promise { const { limit = 50, offset = 0, sortBy = 'productCount', sortDir = 'desc' } = options; + // Sort columns must reference the aliased output names with quotes const sortColumn = { - productCount: 'product_count', - storeCount: 'store_count', - avgPrice: 'avg_price', - name: 'brand_name', - }[sortBy] || 'product_count'; + productCount: '"productCount"', + storeCount: '"storeCount"', + avgPrice: '"avgPrice"', + name: '"brandName"', + }[sortBy] || '"productCount"'; + // Inline query that aggregates brand data from store_products and dispensaries + // Works whether or not v_brand_state_presence view exists const result = await this.pool.query(` SELECT - brand_id AS "brandId", - brand_name AS "brandName", - brand_slug AS "brandSlug", - store_count AS "storeCount", - product_count AS "productCount", - avg_price AS "avgPrice", - first_seen_in_state AS "firstSeenInState", - last_seen_in_state AS "lastSeenInState" - FROM v_brand_state_presence - WHERE state = $1 + COALESCE(sp.brand_id, 0) AS "brandId", + sp.brand_name_raw AS "brandName", + LOWER(REPLACE(sp.brand_name_raw, ' ', '-')) AS "brandSlug", + COUNT(DISTINCT d.id) AS "storeCount", + COUNT(DISTINCT sp.id) AS "productCount", + ROUND(AVG(sp.price_rec)::numeric, 2) AS "avgPrice", + MIN(sp.first_seen_at) AS "firstSeenInState", + MAX(sp.last_seen_at) AS "lastSeenInState" + FROM store_products sp + JOIN dispensaries d ON sp.dispensary_id = d.id + WHERE d.state = $1 + AND sp.brand_name_raw IS NOT NULL + AND sp.brand_name_raw != '' + GROUP BY sp.brand_id, sp.brand_name_raw ORDER BY ${sortColumn} ${sortDir === 'asc' ? 'ASC' : 'DESC'} LIMIT $2 OFFSET $3 `, [state, limit, offset]); @@ -184,18 +227,48 @@ export class StateQueryService { /** * Get brand penetration across all states + * Uses inline query instead of fn_brand_state_penetration function for compatibility */ async getBrandStatePenetration(brandId: number): Promise { + // Inline query that calculates brand penetration by state const result = await this.pool.query(` + WITH state_totals AS ( + SELECT + d.state, + s.name AS state_name, + COUNT(DISTINCT d.id) AS total_stores + FROM dispensaries d + JOIN states s ON d.state = s.code + WHERE d.state IS NOT NULL + GROUP BY d.state, s.name + ), + brand_presence AS ( + SELECT + d.state, + COUNT(DISTINCT d.id) AS stores_with_brand, + COUNT(DISTINCT sp.id) AS product_count, + ROUND(AVG(sp.price_rec)::numeric, 2) AS avg_price + FROM store_products sp + JOIN dispensaries d ON sp.dispensary_id = d.id + WHERE (sp.brand_id = $1 OR sp.brand_name_raw = (SELECT name FROM brands WHERE id = $1)) + AND d.state IS NOT NULL + GROUP BY d.state + ) SELECT - state, - state_name AS "stateName", - total_stores AS "totalStores", - stores_with_brand AS "storesWithBrand", - penetration_pct AS "penetrationPct", - product_count AS "productCount", - avg_price AS "avgPrice" - FROM fn_brand_state_penetration($1) + st.state, + st.state_name AS "stateName", + st.total_stores AS "totalStores", + COALESCE(bp.stores_with_brand, 0) AS "storesWithBrand", + CASE + WHEN st.total_stores > 0 + THEN ROUND((COALESCE(bp.stores_with_brand, 0)::numeric / st.total_stores) * 100, 2) + ELSE 0 + END AS "penetrationPct", + COALESCE(bp.product_count, 0) AS "productCount", + bp.avg_price AS "avgPrice" + FROM state_totals st + LEFT JOIN brand_presence bp ON st.state = bp.state + ORDER BY COALESCE(bp.stores_with_brand, 0) DESC `, [brandId]); return result.rows; @@ -257,33 +330,128 @@ export class StateQueryService { }; } + /** + * Compare a brand by name across multiple states + * Used when we only have a brand name (not an ID from the brands table) + */ + async compareBrandByNameAcrossStates( + brandName: string, + states: string[] + ): Promise { + // Get penetration data by brand name + const penetrationResult = await this.pool.query(` + WITH state_totals AS ( + SELECT + d.state, + s.name AS state_name, + COUNT(DISTINCT d.id) AS total_stores + FROM dispensaries d + JOIN states s ON d.state = s.code + WHERE d.state IS NOT NULL + GROUP BY d.state, s.name + ), + brand_presence AS ( + SELECT + d.state, + COUNT(DISTINCT d.id) AS stores_with_brand, + COUNT(DISTINCT sp.id) AS product_count, + ROUND(AVG(sp.price_rec)::numeric, 2) AS avg_price + FROM store_products sp + JOIN dispensaries d ON sp.dispensary_id = d.id + WHERE sp.brand_name_raw ILIKE $1 + AND d.state IS NOT NULL + GROUP BY d.state + ) + SELECT + st.state, + st.state_name AS "stateName", + st.total_stores AS "totalStores", + COALESCE(bp.stores_with_brand, 0) AS "storesWithBrand", + CASE + WHEN st.total_stores > 0 + THEN ROUND((COALESCE(bp.stores_with_brand, 0)::numeric / st.total_stores) * 100, 2) + ELSE 0 + END AS "penetrationPct", + COALESCE(bp.product_count, 0) AS "productCount", + bp.avg_price AS "avgPrice" + FROM state_totals st + LEFT JOIN brand_presence bp ON st.state = bp.state + ORDER BY COALESCE(bp.stores_with_brand, 0) DESC + `, [brandName]); + + // Filter by requested states + const filteredStates = penetrationResult.rows.filter((p: any) => + states.includes(p.state) + ); + + // Calculate national metrics + const nationalResult = await this.pool.query(` + SELECT + COUNT(DISTINCT d.id) AS total_stores, + COUNT(DISTINCT CASE WHEN sp.brand_name_raw ILIKE $1 THEN d.id END) AS stores_with_brand, + AVG(sp.price_rec) FILTER (WHERE sp.brand_name_raw ILIKE $1) AS avg_price + FROM dispensaries d + LEFT JOIN store_products sp ON d.id = sp.dispensary_id + WHERE d.state IS NOT NULL + `, [brandName]); + + const nationalData = nationalResult.rows[0]; + const nationalPenetration = nationalData.total_stores > 0 + ? (nationalData.stores_with_brand / nationalData.total_stores) * 100 + : 0; + + // Find best/worst states + const sortedByPenetration = [...filteredStates].sort( + (a: any, b: any) => parseFloat(b.penetrationPct) - parseFloat(a.penetrationPct) + ); + + return { + brandId: 0, // No ID when using brand name + brandName, + states: filteredStates, + nationalPenetration: Math.round(nationalPenetration * 100) / 100, + nationalAvgPrice: nationalData.avg_price + ? Math.round(parseFloat(nationalData.avg_price) * 100) / 100 + : null, + bestPerformingState: sortedByPenetration[0]?.state || null, + worstPerformingState: sortedByPenetration[sortedByPenetration.length - 1]?.state || null, + }; + } + // ========================================================================= // Category Queries // ========================================================================= /** * Get categories in a specific state + * Uses inline query instead of v_category_state_distribution view for compatibility */ async getCategoriesByState(state: string, options: StateQueryOptions = {}): Promise { const { limit = 50, offset = 0, sortBy = 'productCount', sortDir = 'desc' } = options; + // Sort columns must reference the aliased output names with quotes const sortColumn = { - productCount: 'product_count', - storeCount: 'store_count', - avgPrice: 'avg_price', + productCount: '"productCount"', + storeCount: '"storeCount"', + avgPrice: '"avgPrice"', category: 'category', - }[sortBy] || 'product_count'; + }[sortBy] || '"productCount"'; + // Inline query that aggregates category data from store_products and dispensaries const result = await this.pool.query(` SELECT - category, - product_count AS "productCount", - store_count AS "storeCount", - avg_price AS "avgPrice", - in_stock_count AS "inStockCount", - on_special_count AS "onSpecialCount" - FROM v_category_state_distribution - WHERE state = $1 + sp.category_raw AS category, + COUNT(DISTINCT sp.id) AS "productCount", + COUNT(DISTINCT d.id) AS "storeCount", + ROUND(AVG(sp.price_rec)::numeric, 2) AS "avgPrice", + COUNT(DISTINCT CASE WHEN sp.is_in_stock THEN sp.id END) AS "inStockCount", + 0 AS "onSpecialCount" + FROM store_products sp + JOIN dispensaries d ON sp.dispensary_id = d.id + WHERE d.state = $1 + AND sp.category_raw IS NOT NULL + AND sp.category_raw != '' + GROUP BY sp.category_raw ORDER BY ${sortColumn} ${sortDir === 'asc' ? 'ASC' : 'DESC'} LIMIT $2 OFFSET $3 `, [state, limit, offset]); @@ -293,25 +461,38 @@ export class StateQueryService { /** * Compare a category across multiple states + * Uses inline query instead of v_category_state_distribution view for compatibility */ async compareCategoryAcrossStates( category: string, states: string[] ): Promise { + // Inline query for category distribution by state const result = await this.pool.query(` + WITH category_stats AS ( + SELECT + d.state, + sp.category_raw AS category, + COUNT(DISTINCT sp.id) AS product_count, + COUNT(DISTINCT d.id) AS store_count, + ROUND(AVG(sp.price_rec)::numeric, 2) AS avg_price + FROM store_products sp + JOIN dispensaries d ON sp.dispensary_id = d.id + WHERE sp.category_raw = $1 + AND d.state = ANY($2) + GROUP BY d.state, sp.category_raw + ) SELECT - v.state, + cs.state, s.name AS "stateName", - v.category, - v.product_count AS "productCount", - v.store_count AS "storeCount", - v.avg_price AS "avgPrice", - ROUND(v.product_count::NUMERIC / SUM(v.product_count) OVER () * 100, 2) AS "marketShare" - FROM v_category_state_distribution v - JOIN states s ON v.state = s.code - WHERE v.category = $1 - AND v.state = ANY($2) - ORDER BY v.product_count DESC + cs.category, + cs.product_count AS "productCount", + cs.store_count AS "storeCount", + cs.avg_price AS "avgPrice", + ROUND(cs.product_count::NUMERIC / NULLIF(SUM(cs.product_count) OVER (), 0) * 100, 2) AS "marketShare" + FROM category_stats cs + JOIN states s ON cs.state = s.code + ORDER BY cs.product_count DESC `, [category, states]); // Get national totals @@ -345,41 +526,49 @@ export class StateQueryService { /** * Get stores in a specific state + * Uses inline query for compatibility - does not depend on v_store_state_summary view */ async getStoresByState(state: string, options: StateQueryOptions = {}): Promise { const { limit = 100, offset = 0, includeInactive = false, sortBy = 'productCount', sortDir = 'desc' } = options; + // Sort columns must reference the aliased output names with quotes const sortColumn = { - productCount: 'product_count', - brandCount: 'brand_count', - avgPrice: 'avg_price', - name: 'dispensary_name', + productCount: '"productCount"', + brandCount: '"brandCount"', + avgPrice: '"avgPrice"', + name: '"dispensaryName"', city: 'city', - lastCrawl: 'last_crawl_at', - }[sortBy] || 'product_count'; + lastCrawl: '"lastCrawlAt"', + }[sortBy] || '"productCount"'; - let whereClause = 'WHERE state = $1'; + let whereClause = 'WHERE d.state = $1'; if (!includeInactive) { - whereClause += ` AND crawl_status != 'disabled'`; + // Use stage column instead of crawl_status (which doesn't exist) + whereClause += ` AND (d.stage IS NULL OR d.stage NOT IN ('disabled', 'failing'))`; } + // Inline query that aggregates store data from dispensaries and store_products + // Works whether or not v_store_state_summary view exists + // Uses 'stage' column instead of 'crawl_status' which doesn't exist in this schema const result = await this.pool.query(` SELECT - dispensary_id AS "dispensaryId", - dispensary_name AS "dispensaryName", - dispensary_slug AS "dispensarySlug", - state, - city, - menu_type AS "menuType", - crawl_status AS "crawlStatus", - last_crawl_at AS "lastCrawlAt", - product_count AS "productCount", - in_stock_count AS "inStockCount", - brand_count AS "brandCount", - avg_price AS "avgPrice", - special_count AS "specialCount" - FROM v_store_state_summary + d.id AS "dispensaryId", + d.name AS "dispensaryName", + d.slug AS "dispensarySlug", + d.state, + d.city, + d.menu_type AS "menuType", + d.stage AS "crawlStatus", + d.last_crawl_at AS "lastCrawlAt", + COUNT(DISTINCT sp.id) AS "productCount", + COUNT(DISTINCT CASE WHEN sp.is_in_stock THEN sp.id END) AS "inStockCount", + COUNT(DISTINCT sp.brand_id) AS "brandCount", + ROUND(AVG(sp.price_rec)::numeric, 2) AS "avgPrice", + COUNT(DISTINCT CASE WHEN sp.is_on_special THEN sp.id END) AS "specialCount" + FROM dispensaries d + LEFT JOIN store_products sp ON d.id = sp.dispensary_id ${whereClause} + GROUP BY d.id, d.name, d.slug, d.state, d.city, d.menu_type, d.stage, d.last_crawl_at ORDER BY ${sortColumn} ${sortDir === 'asc' ? 'ASC' : 'DESC'} NULLS LAST LIMIT $2 OFFSET $3 `, [state, limit, offset]); @@ -393,6 +582,7 @@ export class StateQueryService { /** * Get price distribution by state + * Uses inline query instead of fn_national_price_comparison for compatibility */ async getStorePriceDistribution( state: string, @@ -400,44 +590,104 @@ export class StateQueryService { ): Promise { const { category, brandId } = options; + // Build WHERE conditions dynamically + const conditions = ['d.state = $1', 'sp.price_rec IS NOT NULL', 'sp.price_rec > 0']; + const params: any[] = [state]; + let paramIndex = 2; + + if (category) { + conditions.push(`sp.category_raw = $${paramIndex}`); + params.push(category); + paramIndex++; + } + if (brandId) { + conditions.push(`sp.brand_id = $${paramIndex}`); + params.push(brandId); + paramIndex++; + } + const result = await this.pool.query(` - SELECT * FROM fn_national_price_comparison($1, $2) - WHERE state = $3 - `, [category || null, brandId || null, state]); + SELECT + d.state, + s.name AS state_name, + COUNT(DISTINCT sp.id) AS product_count, + ROUND(AVG(sp.price_rec)::numeric, 2) AS avg_price, + MIN(sp.price_rec) AS min_price, + MAX(sp.price_rec) AS max_price, + ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2) AS median_price, + ROUND(STDDEV(sp.price_rec)::numeric, 2) AS price_stddev + FROM dispensaries d + JOIN states s ON d.state = s.code + JOIN store_products sp ON d.id = sp.dispensary_id + WHERE ${conditions.join(' AND ')} + GROUP BY d.state, s.name + ORDER BY avg_price DESC + `, params); return result.rows.map(row => ({ state: row.state, stateName: row.state_name, - productCount: parseInt(row.product_count), - avgPrice: parseFloat(row.avg_price), - minPrice: parseFloat(row.min_price), - maxPrice: parseFloat(row.max_price), - medianPrice: parseFloat(row.median_price), - priceStddev: parseFloat(row.price_stddev), + productCount: parseInt(row.product_count || '0'), + avgPrice: parseFloat(row.avg_price || '0'), + minPrice: parseFloat(row.min_price || '0'), + maxPrice: parseFloat(row.max_price || '0'), + medianPrice: parseFloat(row.median_price || '0'), + priceStddev: parseFloat(row.price_stddev || '0'), })); } /** * Get national price comparison across all states + * Uses inline query instead of fn_national_price_comparison for compatibility */ async getNationalPriceComparison( options: { category?: string; brandId?: number } = {} ): Promise { const { category, brandId } = options; + // Build WHERE conditions dynamically + const conditions = ['d.state IS NOT NULL', 'sp.price_rec IS NOT NULL', 'sp.price_rec > 0']; + const params: any[] = []; + let paramIndex = 1; + + if (category) { + conditions.push(`sp.category_raw = $${paramIndex}`); + params.push(category); + paramIndex++; + } + if (brandId) { + conditions.push(`sp.brand_id = $${paramIndex}`); + params.push(brandId); + paramIndex++; + } + const result = await this.pool.query(` - SELECT * FROM fn_national_price_comparison($1, $2) - `, [category || null, brandId || null]); + SELECT + d.state, + s.name AS state_name, + COUNT(DISTINCT sp.id) AS product_count, + ROUND(AVG(sp.price_rec)::numeric, 2) AS avg_price, + MIN(sp.price_rec) AS min_price, + MAX(sp.price_rec) AS max_price, + ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2) AS median_price, + ROUND(STDDEV(sp.price_rec)::numeric, 2) AS price_stddev + FROM dispensaries d + JOIN states s ON d.state = s.code + JOIN store_products sp ON d.id = sp.dispensary_id + WHERE ${conditions.join(' AND ')} + GROUP BY d.state, s.name + ORDER BY avg_price DESC + `, params); return result.rows.map(row => ({ state: row.state, stateName: row.state_name, - productCount: parseInt(row.product_count), - avgPrice: parseFloat(row.avg_price), - minPrice: parseFloat(row.min_price), - maxPrice: parseFloat(row.max_price), - medianPrice: parseFloat(row.median_price), - priceStddev: parseFloat(row.price_stddev), + productCount: parseInt(row.product_count || '0'), + avgPrice: parseFloat(row.avg_price || '0'), + minPrice: parseFloat(row.min_price || '0'), + maxPrice: parseFloat(row.max_price || '0'), + medianPrice: parseFloat(row.median_price || '0'), + priceStddev: parseFloat(row.price_stddev || '0'), })); } @@ -498,7 +748,7 @@ export class StateQueryService { switch (metric) { case 'stores': query = ` - SELECT state, state_name AS "stateName", dispensary_count AS value, 'stores' AS label + SELECT state, state_name AS "stateName", COALESCE(dispensary_count, 0) AS value, 'stores' AS label FROM mv_state_metrics WHERE state IS NOT NULL ORDER BY state @@ -507,7 +757,7 @@ export class StateQueryService { case 'products': query = ` - SELECT state, state_name AS "stateName", total_products AS value, 'products' AS label + SELECT state, state_name AS "stateName", COALESCE(total_products, 0) AS value, 'products' AS label FROM mv_state_metrics WHERE state IS NOT NULL ORDER BY state @@ -516,7 +766,7 @@ export class StateQueryService { case 'brands': query = ` - SELECT state, state_name AS "stateName", unique_brands AS value, 'brands' AS label + SELECT state, state_name AS "stateName", COALESCE(unique_brands, 0) AS value, 'brands' AS label FROM mv_state_metrics WHERE state IS NOT NULL ORDER BY state @@ -536,10 +786,33 @@ export class StateQueryService { if (!options.brandId) { throw new Error('brandId required for penetration heatmap'); } + // Inline query instead of fn_brand_state_penetration function query = ` - SELECT state, state_name AS "stateName", penetration_pct AS value, 'penetration %' AS label - FROM fn_brand_state_penetration($1) - ORDER BY state + WITH state_totals AS ( + SELECT d.state, s.name AS state_name, COUNT(DISTINCT d.id) AS total_stores + FROM dispensaries d + JOIN states s ON d.state = s.code + WHERE d.state IS NOT NULL + GROUP BY d.state, s.name + ), + brand_presence AS ( + SELECT d.state, COUNT(DISTINCT d.id) AS stores_with_brand + FROM store_products sp + JOIN dispensaries d ON sp.dispensary_id = d.id + WHERE (sp.brand_id = $1 OR sp.brand_name_raw = (SELECT name FROM brands WHERE id = $1)) + AND d.state IS NOT NULL + GROUP BY d.state + ) + SELECT + st.state, + st.state_name AS "stateName", + CASE WHEN st.total_stores > 0 + THEN ROUND((COALESCE(bp.stores_with_brand, 0)::numeric / st.total_stores) * 100, 2) + ELSE 0 END AS value, + 'penetration %' AS label + FROM state_totals st + LEFT JOIN brand_presence bp ON st.state = bp.state + ORDER BY st.state `; params = [options.brandId]; break; diff --git a/backend/src/platforms/dutchie/client.ts b/backend/src/platforms/dutchie/client.ts index 65dd028c..b23f0118 100644 --- a/backend/src/platforms/dutchie/client.ts +++ b/backend/src/platforms/dutchie/client.ts @@ -159,6 +159,7 @@ export const GRAPHQL_HASHES = { GetAddressBasedDispensaryData: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b', ConsumerDispensaries: '0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b', DispensaryInfo: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b', + GetAllCitiesByState: 'ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6', }; // ============================================================ @@ -366,7 +367,7 @@ export function curlGet(url: string, headers: Record, timeout = export interface ExecuteGraphQLOptions { maxRetries?: number; retryOn403?: boolean; - cName: string; + cName?: string; // Optional - used for Referer header, defaults to 'cities' } /** @@ -378,7 +379,7 @@ export async function executeGraphQL( hash: string, options: ExecuteGraphQLOptions ): Promise { - const { maxRetries = 3, retryOn403 = true, cName } = options; + const { maxRetries = 3, retryOn403 = true, cName = 'cities' } = options; const body = { operationName, diff --git a/backend/src/routes/admin-debug.ts b/backend/src/routes/admin-debug.ts new file mode 100644 index 00000000..7a9165e2 --- /dev/null +++ b/backend/src/routes/admin-debug.ts @@ -0,0 +1,168 @@ +/** + * Admin Debug Routes + * + * Debug endpoints for inspecting crawl snapshots and raw payloads. + * Uses canonical store_* tables (not legacy dutchie_* tables). + */ +import { Router, Request, Response } from 'express'; +import { authMiddleware } from '../auth/middleware'; +import { pool } from '../db/pool'; + +const router = Router(); +router.use(authMiddleware); + +/** + * GET /api/admin/debug/stores/:id/snapshots + * List recent snapshots for a store's products + */ +router.get('/stores/:id/snapshots', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const { limit = '50', offset = '0' } = req.query; + + const dispensaryId = parseInt(id, 10); + const limitNum = Math.min(parseInt(limit as string, 10), 200); + const offsetNum = parseInt(offset as string, 10); + + // Get snapshots with product info + const { rows } = await pool.query(` + SELECT + sps.id, + sps.store_product_id as product_id, + COALESCE(sps.name_raw, sp.name_raw, 'Unknown Product') as product_name, + COALESCE(sps.brand_name_raw, sp.brand_name_raw) as brand_name, + sps.captured_at as crawled_at, + COALESCE(sps.stock_status, 'unknown') as stock_status, + sps.price_rec as regular_price, + sps.price_rec_special as sale_price, + sps.raw_data as raw_payload + FROM store_product_snapshots sps + LEFT JOIN store_products sp ON sp.id = sps.store_product_id + WHERE sps.dispensary_id = $1 + ORDER BY sps.captured_at DESC + LIMIT $2 OFFSET $3 + `, [dispensaryId, limitNum, offsetNum]); + + // Get total count + const { rows: countRows } = await pool.query( + `SELECT COUNT(*) as total FROM store_product_snapshots WHERE dispensary_id = $1`, + [dispensaryId] + ); + + res.json({ + snapshots: rows.map((r: any) => ({ + id: r.id, + productId: r.product_id, + productName: r.product_name, + brandName: r.brand_name, + crawledAt: r.crawled_at, + stockStatus: r.stock_status, + regularPrice: r.regular_price ? parseFloat(r.regular_price) : null, + salePrice: r.sale_price ? parseFloat(r.sale_price) : null, + rawPayload: r.raw_payload, + })), + total: parseInt(countRows[0]?.total || '0', 10), + limit: limitNum, + offset: offsetNum, + }); + } catch (error: any) { + console.error('[AdminDebug] Error fetching store snapshots:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/admin/debug/snapshots/:id/raw-payload + * Get the raw payload for a specific snapshot + */ +router.get('/snapshots/:id/raw-payload', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const snapshotId = parseInt(id, 10); + + const { rows } = await pool.query(` + SELECT + sps.id, + sps.store_product_id as product_id, + COALESCE(sps.name_raw, sp.name_raw, 'Unknown Product') as product_name, + sps.dispensary_id, + d.name as dispensary_name, + sps.captured_at as crawled_at, + sps.raw_data as raw_payload + FROM store_product_snapshots sps + LEFT JOIN store_products sp ON sp.id = sps.store_product_id + LEFT JOIN dispensaries d ON d.id = sps.dispensary_id + WHERE sps.id = $1 + `, [snapshotId]); + + if (rows.length === 0) { + return res.status(404).json({ error: 'Snapshot not found' }); + } + + const r = rows[0]; + res.json({ + snapshot: { + id: r.id, + productId: r.product_id, + productName: r.product_name, + dispensaryId: r.dispensary_id, + dispensaryName: r.dispensary_name, + crawledAt: r.crawled_at, + rawPayload: r.raw_payload, + }, + }); + } catch (error: any) { + console.error('[AdminDebug] Error fetching snapshot raw payload:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/admin/debug/products/:id/raw-payload + * Get raw payload and metadata for a specific product + */ +router.get('/products/:id/raw-payload', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const productId = parseInt(id, 10); + + // Query store_products for the product and any raw_payload/metadata + const { rows } = await pool.query(` + SELECT + sp.id, + sp.name_raw as name, + sp.dispensary_id, + d.name as dispensary_name, + sp.raw_payload, + sp.provider_metadata as metadata, + sp.created_at, + sp.updated_at + FROM store_products sp + LEFT JOIN dispensaries d ON d.id = sp.dispensary_id + WHERE sp.id = $1 + `, [productId]); + + if (rows.length === 0) { + return res.status(404).json({ error: 'Product not found' }); + } + + const r = rows[0]; + res.json({ + product: { + id: r.id, + name: r.name, + dispensaryId: r.dispensary_id, + dispensaryName: r.dispensary_name, + rawPayload: r.raw_payload, + metadata: r.metadata, + createdAt: r.created_at, + updatedAt: r.updated_at, + }, + }); + } catch (error: any) { + console.error('[AdminDebug] Error fetching product raw payload:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +export default router; diff --git a/backend/src/routes/analytics.ts b/backend/src/routes/analytics.ts index 5c2e2ea3..194685cb 100755 --- a/backend/src/routes/analytics.ts +++ b/backend/src/routes/analytics.ts @@ -35,11 +35,11 @@ router.get('/overview', async (req, res) => { // Top products const topProductsResult = await pool.query(` - SELECT p.id, p.name, p.price, COUNT(c.id) as click_count + SELECT p.id, p.name_raw as name, p.price_rec as price, COUNT(c.id) as click_count FROM clicks c - JOIN products p ON c.product_id = p.id + JOIN store_products p ON c.product_id = p.id WHERE c.clicked_at >= NOW() - INTERVAL '${parseInt(days as string)} days' - GROUP BY p.id, p.name, p.price + GROUP BY p.id, p.name_raw, p.price_rec ORDER BY click_count DESC LIMIT 10 `); @@ -109,12 +109,12 @@ router.get('/campaigns/:id', async (req, res) => { // Clicks by product in this campaign const byProductResult = await pool.query(` - SELECT p.id, p.name, COUNT(c.id) as clicks + SELECT p.id, p.name_raw as name, COUNT(c.id) as clicks FROM clicks c - JOIN products p ON c.product_id = p.id + JOIN store_products p ON c.product_id = p.id WHERE c.campaign_id = $1 AND c.clicked_at >= NOW() - INTERVAL '${parseInt(days as string)} days' - GROUP BY p.id, p.name + GROUP BY p.id, p.name_raw ORDER BY clicks DESC `, [id]); diff --git a/backend/src/routes/api-permissions.ts b/backend/src/routes/api-permissions.ts index 022b5148..4b4ce61f 100644 --- a/backend/src/routes/api-permissions.ts +++ b/backend/src/routes/api-permissions.ts @@ -154,7 +154,7 @@ router.patch('/:id/toggle', requireRole('superadmin', 'admin'), async (req, res) const result = await pool.query(` UPDATE wp_dutchie_api_permissions - SET is_active = NOT is_active + SET is_active = CASE WHEN is_active = 1 THEN 0 ELSE 1 END WHERE id = $1 RETURNING * `, [id]); diff --git a/backend/src/routes/campaigns.ts b/backend/src/routes/campaigns.ts index f85ed2ee..3c19ff4d 100755 --- a/backend/src/routes/campaigns.ts +++ b/backend/src/routes/campaigns.ts @@ -37,8 +37,22 @@ router.get('/:id', async (req, res) => { } const productsResult = await pool.query(` - SELECT p.*, cp.display_order - FROM products p + SELECT + p.id, + p.dispensary_id, + p.name_raw as name, + p.brand_name_raw as brand, + p.category_raw as category, + p.subcategory_raw as subcategory, + p.price_rec as price, + p.thc_percent, + p.cbd_percent, + p.strain_type, + p.primary_image_url as image_url, + p.stock_status, + p.is_in_stock as in_stock, + cp.display_order + FROM store_products p JOIN campaign_products cp ON p.id = cp.product_id WHERE cp.campaign_id = $1 ORDER BY cp.display_order diff --git a/backend/src/routes/categories.ts b/backend/src/routes/categories.ts index 86c9db20..69f1cb37 100644 --- a/backend/src/routes/categories.ts +++ b/backend/src/routes/categories.ts @@ -11,12 +11,12 @@ router.get('/', async (req, res) => { const { store_id } = req.query; let query = ` - SELECT + SELECT c.*, COUNT(DISTINCT p.id) as product_count, pc.name as parent_name FROM categories c - LEFT JOIN products p ON c.id = p.category_id + LEFT JOIN store_products p ON c.name = p.category_raw LEFT JOIN categories pc ON c.parent_id = pc.id `; @@ -51,11 +51,11 @@ router.get('/tree', async (req, res) => { // Get all categories for the store const result = await pool.query(` - SELECT + SELECT c.*, COUNT(DISTINCT p.id) as product_count FROM categories c - LEFT JOIN products p ON c.id = p.category_id AND p.in_stock = true + LEFT JOIN store_products p ON c.name = p.category_raw AND p.is_in_stock = true AND p.dispensary_id = $1 WHERE c.store_id = $1 GROUP BY c.id ORDER BY c.display_order, c.name diff --git a/backend/src/routes/click-analytics.ts b/backend/src/routes/click-analytics.ts index 2bb1e9d4..e4ff7094 100644 --- a/backend/src/routes/click-analytics.ts +++ b/backend/src/routes/click-analytics.ts @@ -92,9 +92,9 @@ router.get('/brands', async (req: Request, res: Response) => { if (brandIds.length > 0) { const brandNamesResult = await pool.query(` - SELECT DISTINCT brand_name - FROM dutchie_products - WHERE brand_name = ANY($1) + SELECT DISTINCT brand_name_raw as brand_name + FROM store_products + WHERE brand_name_raw = ANY($1) `, [brandIds]); brandNamesResult.rows.forEach(r => { @@ -201,14 +201,14 @@ router.get('/products', async (req: Request, res: Response) => { // Try to match by external_id or id const productDetailsResult = await pool.query(` SELECT - external_id, + provider_product_id as external_id, id::text as product_id, - name, - brand_name, - type, - subcategory - FROM dutchie_products - WHERE external_id = ANY($1) OR id::text = ANY($1) + name_raw as name, + brand_name_raw as brand_name, + category_raw as type, + subcategory_raw as subcategory + FROM store_products + WHERE provider_product_id = ANY($1) OR id::text = ANY($1) `, [productIds]); productDetailsResult.rows.forEach(r => { diff --git a/backend/src/routes/dashboard.ts b/backend/src/routes/dashboard.ts index 20263e3d..60ac9d6c 100755 --- a/backend/src/routes/dashboard.ts +++ b/backend/src/routes/dashboard.ts @@ -26,10 +26,10 @@ router.get('/stats', async (req, res) => { COUNT(*) as total, COUNT(*) FILTER (WHERE stock_status = 'in_stock') as in_stock, COUNT(*) FILTER (WHERE primary_image_url IS NOT NULL) as with_images, - COUNT(DISTINCT brand_name) FILTER (WHERE brand_name IS NOT NULL AND brand_name != '') as unique_brands, + COUNT(DISTINCT brand_name_raw) FILTER (WHERE brand_name_raw IS NOT NULL AND brand_name_raw != '') as unique_brands, COUNT(DISTINCT dispensary_id) as dispensaries_with_products, COUNT(*) FILTER (WHERE created_at >= NOW() - INTERVAL '24 hours') as new_products_24h - FROM dutchie_products + FROM store_products ) SELECT ds.total as store_total, ds.active as store_active, @@ -96,25 +96,25 @@ router.get('/activity', async (req, res) => { const scrapesResult = await pool.query(` SELECT d.name, - d.last_crawled_at as last_scraped_at, + d.last_crawl_at as last_scraped_at, d.product_count FROM dispensaries d - WHERE d.last_crawled_at IS NOT NULL - ORDER BY d.last_crawled_at DESC + WHERE d.last_crawl_at IS NOT NULL + ORDER BY d.last_crawl_at DESC LIMIT $1 `, [limit]); - // Recent products from dutchie_products + // Recent products from store_products (canonical) const productsResult = await pool.query(` SELECT - p.name, - 0 as price, - p.brand_name as brand, - p.thc as thc_percentage, - p.cbd as cbd_percentage, + p.name_raw as name, + p.price_rec as price, + p.brand_name_raw as brand, + p.thc_percent as thc_percentage, + p.cbd_percent as cbd_percentage, d.name as store_name, p.created_at as first_seen_at - FROM dutchie_products p + FROM store_products p JOIN dispensaries d ON p.dispensary_id = d.id ORDER BY p.created_at DESC LIMIT $1 diff --git a/backend/src/routes/dispensaries.ts b/backend/src/routes/dispensaries.ts index 3f0a8cdf..175addc1 100644 --- a/backend/src/routes/dispensaries.ts +++ b/backend/src/routes/dispensaries.ts @@ -374,40 +374,42 @@ router.get('/:slug/products', async (req, res) => { const dispensaryId = dispensaryResult.rows[0].id; - // Build query for products + // Build query for products using canonical store_products table let query = ` SELECT - p.id, - p.name, - p.brand, - p.variant, - p.slug, - p.description, - p.regular_price, - p.sale_price, - p.thc_percentage, - p.cbd_percentage, - p.strain_type, - p.terpenes, - p.effects, - p.flavors, - p.image_url, - p.dutchie_url, - p.in_stock, - p.created_at, - p.updated_at - FROM products p - WHERE p.dispensary_id = $1 + sp.id, + sp.name_raw as name, + sp.brand_name_raw as brand, + sp.description, + COALESCE(sp.stock_quantity, sp.total_quantity_available, 0) as quantity, + sp.price_rec as regular_price, + CASE WHEN sp.price_rec_special IS NOT NULL AND sp.price_rec_special > 0 + THEN sp.price_rec_special + ELSE NULL END as sale_price, + sp.thc_percent as thc_percentage, + sp.cbd_percent as cbd_percentage, + sp.strain_type, + sp.effects, + sp.primary_image_url as image_url, + sp.stock_status, + sp.stock_status = 'in_stock' as in_stock, + sp.is_on_special as on_special, + sp.category_raw as category, + sp.subcategory_raw as subcategory, + sp.created_at, + sp.updated_at + FROM store_products sp + WHERE sp.dispensary_id = $1 `; const params: any[] = [dispensaryId]; if (category) { - query += ` AND p.category = $2`; + query += ` AND sp.category_raw = $2`; params.push(category); } - query += ` ORDER BY p.created_at DESC`; + query += ` ORDER BY sp.name_raw ASC`; const result = await pool.query(query, params); @@ -435,23 +437,23 @@ router.get('/:slug/brands', async (req, res) => { const dispensaryId = dispensaryResult.rows[0].id; - // Build query with optional search filter + // Build query with optional search filter using canonical tables let query = ` SELECT DISTINCT - brand, + brand_name as brand, COUNT(*) as product_count - FROM products - WHERE dispensary_id = $1 AND brand IS NOT NULL + FROM v_products + WHERE dispensary_id = $1 AND brand_name IS NOT NULL `; const params: any[] = [dispensaryId]; // Add search filter if provided if (search) { - query += ` AND brand ILIKE $2`; + query += ` AND brand_name ILIKE $2`; params.push(`%${search}%`); } - query += ` GROUP BY brand ORDER BY product_count DESC, brand ASC`; + query += ` GROUP BY brand_name ORDER BY product_count DESC, brand_name ASC`; const result = await pool.query(query, params); @@ -479,44 +481,48 @@ router.get('/:slug/specials', async (req, res) => { const dispensaryId = dispensaryResult.rows[0].id; - // Build query to get products with discounts + // Build query to get products with specials/discounts using canonical tables let query = ` SELECT p.id, p.name, - p.brand, - p.variant, - p.slug, - p.description, - p.regular_price, - p.sale_price, - p.discount_type, - p.discount_value, - p.thc_percentage, - p.cbd_percentage, + p.brand_name as brand, + p.subcategory as variant, + sp.description, + COALESCE(snap.rec_min_price_cents, 0)::numeric / 100.0 as regular_price, + snap.rec_min_special_price_cents::numeric / 100.0 as sale_price, + snap.discount_percent, + p.thc as thc_percentage, + p.cbd as cbd_percentage, p.strain_type, - p.terpenes, - p.effects, - p.flavors, - p.image_url, - p.dutchie_url, - p.in_stock, + sp.effects, + p.primary_image_url as image_url, + p.stock_status = 'in_stock' as in_stock, + p.stock_status, + true as on_special, p.created_at, p.updated_at - FROM products p + FROM v_products p + JOIN store_products sp ON sp.id = p.id + INNER JOIN LATERAL ( + SELECT rec_min_price_cents, rec_min_special_price_cents, discount_percent, special + FROM v_product_snapshots vps + WHERE vps.store_product_id = p.id + AND (vps.special = true OR vps.rec_min_special_price_cents > 0) + ORDER BY vps.crawled_at DESC + LIMIT 1 + ) snap ON true WHERE p.dispensary_id = $1 - AND p.discount_type IS NOT NULL - AND p.discount_value IS NOT NULL `; const params: any[] = [dispensaryId]; // Add search filter if provided if (search) { - query += ` AND (p.name ILIKE $2 OR p.brand ILIKE $2 OR p.description ILIKE $2)`; + query += ` AND (p.name ILIKE $2 OR p.brand_name ILIKE $2 OR sp.description ILIKE $2)`; params.push(`%${search}%`); } - query += ` ORDER BY p.created_at DESC`; + query += ` ORDER BY p.updated_at DESC`; const result = await pool.query(query, params); diff --git a/backend/src/routes/intelligence.ts b/backend/src/routes/intelligence.ts new file mode 100644 index 00000000..4da15411 --- /dev/null +++ b/backend/src/routes/intelligence.ts @@ -0,0 +1,254 @@ +/** + * Intelligence API Routes + * + * Brand and pricing intelligence endpoints for the CannaiQ admin dashboard. + * Uses canonical store_products table for aggregated analytics. + */ +import { Router, Request, Response } from 'express'; +import { authMiddleware } from '../auth/middleware'; +import { pool } from '../db/pool'; + +const router = Router(); +router.use(authMiddleware); + +/** + * GET /api/admin/intelligence/brands + * List all brands with state presence, store counts, and pricing + */ +router.get('/brands', async (req: Request, res: Response) => { + try { + const { limit = '500', offset = '0' } = req.query; + const limitNum = Math.min(parseInt(limit as string, 10), 1000); + const offsetNum = parseInt(offset as string, 10); + + const { rows } = await pool.query(` + SELECT + sp.brand_name_raw as brand_name, + array_agg(DISTINCT d.state) FILTER (WHERE d.state IS NOT NULL) as states, + COUNT(DISTINCT d.id) as store_count, + COUNT(DISTINCT sp.id) as sku_count, + ROUND(AVG(sp.price_rec)::numeric, 2) FILTER (WHERE sp.price_rec > 0) as avg_price_rec, + ROUND(AVG(sp.price_med)::numeric, 2) FILTER (WHERE sp.price_med > 0) as avg_price_med + FROM store_products sp + JOIN dispensaries d ON sp.dispensary_id = d.id + WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != '' + GROUP BY sp.brand_name_raw + ORDER BY store_count DESC, sku_count DESC + LIMIT $1 OFFSET $2 + `, [limitNum, offsetNum]); + + // Get total count + const { rows: countRows } = await pool.query(` + SELECT COUNT(DISTINCT brand_name_raw) as total + FROM store_products + WHERE brand_name_raw IS NOT NULL AND brand_name_raw != '' + `); + + res.json({ + brands: rows.map((r: any) => ({ + brandName: r.brand_name, + states: r.states || [], + storeCount: parseInt(r.store_count, 10), + skuCount: parseInt(r.sku_count, 10), + avgPriceRec: r.avg_price_rec ? parseFloat(r.avg_price_rec) : null, + avgPriceMed: r.avg_price_med ? parseFloat(r.avg_price_med) : null, + })), + total: parseInt(countRows[0]?.total || '0', 10), + limit: limitNum, + offset: offsetNum, + }); + } catch (error: any) { + console.error('[Intelligence] Error fetching brands:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/admin/intelligence/brands/:brandName/penetration + * Get state-by-state penetration for a specific brand + */ +router.get('/brands/:brandName/penetration', async (req: Request, res: Response) => { + try { + const { brandName } = req.params; + + const { rows } = await pool.query(` + WITH state_totals AS ( + SELECT + d.state, + s.name AS state_name, + COUNT(DISTINCT d.id) AS total_stores + FROM dispensaries d + JOIN states s ON d.state = s.code + WHERE d.state IS NOT NULL + GROUP BY d.state, s.name + ), + brand_presence AS ( + SELECT + d.state, + COUNT(DISTINCT d.id) AS stores_with_brand, + COUNT(DISTINCT sp.id) AS product_count, + ROUND(AVG(sp.price_rec)::numeric, 2) AS avg_price + FROM store_products sp + JOIN dispensaries d ON sp.dispensary_id = d.id + WHERE sp.brand_name_raw ILIKE $1 + AND d.state IS NOT NULL + GROUP BY d.state + ) + SELECT + st.state, + st.state_name AS "stateName", + st.total_stores AS "totalStores", + COALESCE(bp.stores_with_brand, 0) AS "storesWithBrand", + CASE + WHEN st.total_stores > 0 + THEN ROUND((COALESCE(bp.stores_with_brand, 0)::numeric / st.total_stores) * 100, 2) + ELSE 0 + END AS "penetrationPct", + COALESCE(bp.product_count, 0) AS "productCount", + bp.avg_price AS "avgPrice" + FROM state_totals st + LEFT JOIN brand_presence bp ON st.state = bp.state + WHERE COALESCE(bp.stores_with_brand, 0) > 0 + ORDER BY COALESCE(bp.stores_with_brand, 0) DESC + `, [brandName]); + + // Calculate national metrics + const { rows: nationalRows } = await pool.query(` + SELECT + COUNT(DISTINCT d.id) AS total_stores, + COUNT(DISTINCT CASE WHEN sp.brand_name_raw ILIKE $1 THEN d.id END) AS stores_with_brand, + AVG(sp.price_rec) FILTER (WHERE sp.brand_name_raw ILIKE $1) AS avg_price + FROM dispensaries d + LEFT JOIN store_products sp ON d.id = sp.dispensary_id + WHERE d.state IS NOT NULL + `, [brandName]); + + const national = nationalRows[0]; + const nationalPenetration = national.total_stores > 0 + ? (national.stores_with_brand / national.total_stores) * 100 + : 0; + + res.json({ + brandName, + states: rows, + nationalPenetration: Math.round(nationalPenetration * 100) / 100, + nationalAvgPrice: national.avg_price + ? Math.round(parseFloat(national.avg_price) * 100) / 100 + : null, + bestPerformingState: rows[0]?.state || null, + worstPerformingState: rows[rows.length - 1]?.state || null, + }); + } catch (error: any) { + console.error('[Intelligence] Error fetching brand penetration:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/admin/intelligence/pricing + * Get pricing analytics by category + */ +router.get('/pricing', async (req: Request, res: Response) => { + try { + const { rows: categoryRows } = await pool.query(` + SELECT + sp.category_raw as category, + ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price, + MIN(sp.price_rec) FILTER (WHERE sp.price_rec > 0) as min_price, + MAX(sp.price_rec) as max_price, + ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2) + FILTER (WHERE sp.price_rec > 0) as median_price, + COUNT(*) as product_count + FROM store_products sp + WHERE sp.category_raw IS NOT NULL AND sp.price_rec > 0 + GROUP BY sp.category_raw + ORDER BY product_count DESC + `); + + const { rows: stateRows } = await pool.query(` + SELECT + d.state, + ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price, + MIN(sp.price_rec) FILTER (WHERE sp.price_rec > 0) as min_price, + MAX(sp.price_rec) as max_price, + COUNT(DISTINCT sp.id) as product_count + FROM store_products sp + JOIN dispensaries d ON sp.dispensary_id = d.id + WHERE d.state IS NOT NULL AND sp.price_rec > 0 + GROUP BY d.state + ORDER BY avg_price DESC + `); + + res.json({ + byCategory: categoryRows.map((r: any) => ({ + category: r.category, + avgPrice: r.avg_price ? parseFloat(r.avg_price) : null, + minPrice: r.min_price ? parseFloat(r.min_price) : null, + maxPrice: r.max_price ? parseFloat(r.max_price) : null, + medianPrice: r.median_price ? parseFloat(r.median_price) : null, + productCount: parseInt(r.product_count, 10), + })), + byState: stateRows.map((r: any) => ({ + state: r.state, + avgPrice: r.avg_price ? parseFloat(r.avg_price) : null, + minPrice: r.min_price ? parseFloat(r.min_price) : null, + maxPrice: r.max_price ? parseFloat(r.max_price) : null, + productCount: parseInt(r.product_count, 10), + })), + }); + } catch (error: any) { + console.error('[Intelligence] Error fetching pricing:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/admin/intelligence/stores + * Get store intelligence summary + */ +router.get('/stores', async (req: Request, res: Response) => { + try { + const { rows: storeRows } = await pool.query(` + SELECT + d.id, + d.name, + d.dba_name, + d.city, + d.state, + d.menu_type, + d.crawl_enabled, + COUNT(DISTINCT sp.id) as product_count, + COUNT(DISTINCT sp.brand_name_raw) as brand_count, + ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price, + MAX(sp.updated_at) as last_product_update + FROM dispensaries d + LEFT JOIN store_products sp ON sp.dispensary_id = d.id + WHERE d.state IS NOT NULL + GROUP BY d.id, d.name, d.dba_name, d.city, d.state, d.menu_type, d.crawl_enabled + ORDER BY product_count DESC + LIMIT 200 + `); + + res.json({ + stores: storeRows.map((r: any) => ({ + id: r.id, + name: r.name, + dbaName: r.dba_name, + city: r.city, + state: r.state, + menuType: r.menu_type, + crawlEnabled: r.crawl_enabled, + productCount: parseInt(r.product_count || '0', 10), + brandCount: parseInt(r.brand_count || '0', 10), + avgPrice: r.avg_price ? parseFloat(r.avg_price) : null, + lastProductUpdate: r.last_product_update, + })), + total: storeRows.length, + }); + } catch (error: any) { + console.error('[Intelligence] Error fetching stores:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +export default router; diff --git a/backend/src/routes/job-queue.ts b/backend/src/routes/job-queue.ts new file mode 100644 index 00000000..fdb07cf2 --- /dev/null +++ b/backend/src/routes/job-queue.ts @@ -0,0 +1,467 @@ +/** + * Job Queue Management API Routes + * + * Endpoints for viewing and managing the crawl job queue: + * GET /api/job-queue - List all jobs (with filters) + * GET /api/job-queue/stats - Queue statistics + * GET /api/job-queue/:id - Get single job details + * PUT /api/job-queue/:id/priority - Update job priority + * POST /api/job-queue/:id/cancel - Cancel a pending job + * POST /api/job-queue/:id/retry - Retry a failed job + * POST /api/job-queue/bulk-priority - Bulk update priorities + * POST /api/job-queue/pause - Pause queue processing + * POST /api/job-queue/resume - Resume queue processing + */ + +import { Router, Request, Response } from 'express'; +import { pool } from '../db/pool'; + +const router = Router(); + +// In-memory queue state (would be in Redis in production) +let queuePaused = false; + +/** + * GET /api/job-queue - List jobs with filters + */ +router.get('/', async (req: Request, res: Response) => { + try { + const { + status = 'pending', + limit = '50', + offset = '0', + job_type, + dispensary_id, + sort_by = 'priority', + sort_order = 'desc' + } = req.query; + + let query = ` + SELECT + j.id, + j.dispensary_id, + d.name as dispensary_name, + d.city, + d.state, + j.job_type, + j.trigger_type, + j.priority, + j.status, + j.scheduled_at, + j.started_at, + j.completed_at, + j.duration_ms, + j.products_found, + j.error_message, + j.retry_count, + j.max_retries, + j.worker_id, + j.locked_by, + j.created_at + FROM dispensary_crawl_jobs j + LEFT JOIN dispensaries d ON d.id = j.dispensary_id + WHERE 1=1 + `; + const params: any[] = []; + let paramIndex = 1; + + if (status && status !== 'all') { + params.push(status); + query += ` AND j.status = $${paramIndex++}`; + } + + if (job_type) { + params.push(job_type); + query += ` AND j.job_type = $${paramIndex++}`; + } + + if (dispensary_id) { + params.push(dispensary_id); + query += ` AND j.dispensary_id = $${paramIndex++}`; + } + + // Sorting + const validSortColumns = ['priority', 'created_at', 'scheduled_at', 'dispensary_name']; + const sortCol = validSortColumns.includes(sort_by as string) ? sort_by : 'priority'; + const sortDir = sort_order === 'asc' ? 'ASC' : 'DESC'; + + if (sortCol === 'dispensary_name') { + query += ` ORDER BY d.name ${sortDir} NULLS LAST`; + } else { + query += ` ORDER BY j.${sortCol} ${sortDir} NULLS LAST`; + } + + // Add secondary sort by created_at for consistent ordering + if (sortCol !== 'created_at') { + query += `, j.created_at ASC`; + } + + params.push(parseInt(limit as string)); + query += ` LIMIT $${paramIndex++}`; + + params.push(parseInt(offset as string)); + query += ` OFFSET $${paramIndex++}`; + + const { rows } = await pool.query(query, params); + + // Get total count for pagination + let countQuery = ` + SELECT COUNT(*) as total + FROM dispensary_crawl_jobs j + WHERE 1=1 + `; + const countParams: any[] = []; + let countParamIndex = 1; + + if (status && status !== 'all') { + countParams.push(status); + countQuery += ` AND j.status = $${countParamIndex++}`; + } + if (job_type) { + countParams.push(job_type); + countQuery += ` AND j.job_type = $${countParamIndex++}`; + } + if (dispensary_id) { + countParams.push(dispensary_id); + countQuery += ` AND j.dispensary_id = $${countParamIndex++}`; + } + + const countResult = await pool.query(countQuery, countParams); + const total = parseInt(countResult.rows[0].total); + + res.json({ + success: true, + jobs: rows, + total, + limit: parseInt(limit as string), + offset: parseInt(offset as string), + queue_paused: queuePaused + }); + } catch (error: any) { + console.error('[JobQueue] Error listing jobs:', error); + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * GET /api/job-queue/stats - Queue statistics + */ +router.get('/stats', async (_req: Request, res: Response) => { + try { + const { rows } = await pool.query(` + SELECT + COUNT(*) FILTER (WHERE status = 'pending') as pending_count, + COUNT(*) FILTER (WHERE status = 'running') as running_count, + COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours') as completed_24h, + COUNT(*) FILTER (WHERE status = 'failed' AND completed_at > NOW() - INTERVAL '24 hours') as failed_24h, + COUNT(*) FILTER (WHERE status = 'cancelled') as cancelled_count, + AVG(duration_ms) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours') as avg_duration_ms, + MAX(priority) FILTER (WHERE status = 'pending') as max_priority, + MIN(created_at) FILTER (WHERE status = 'pending') as oldest_pending + FROM dispensary_crawl_jobs + `); + + const stats = rows[0]; + + // Get jobs by type + const { rows: byType } = await pool.query(` + SELECT job_type, COUNT(*) as count + FROM dispensary_crawl_jobs + WHERE status = 'pending' + GROUP BY job_type + ORDER BY count DESC + `); + + // Get top priority jobs + const { rows: topPriority } = await pool.query(` + SELECT + j.id, + j.dispensary_id, + d.name as dispensary_name, + j.job_type, + j.priority, + j.created_at + FROM dispensary_crawl_jobs j + LEFT JOIN dispensaries d ON d.id = j.dispensary_id + WHERE j.status = 'pending' + ORDER BY j.priority DESC, j.created_at ASC + LIMIT 5 + `); + + // Estimate wait time based on avg processing rate + const pendingCount = parseInt(stats.pending_count) || 0; + const avgDuration = parseFloat(stats.avg_duration_ms) || 30000; // default 30s + const runningCount = parseInt(stats.running_count) || 1; + const estimatedWaitMs = (pendingCount * avgDuration) / Math.max(runningCount, 1); + + res.json({ + success: true, + stats: { + pending: parseInt(stats.pending_count) || 0, + running: parseInt(stats.running_count) || 0, + completed_24h: parseInt(stats.completed_24h) || 0, + failed_24h: parseInt(stats.failed_24h) || 0, + cancelled: parseInt(stats.cancelled_count) || 0, + avg_duration_ms: Math.round(parseFloat(stats.avg_duration_ms)) || null, + max_priority: parseInt(stats.max_priority) || 0, + oldest_pending: stats.oldest_pending, + estimated_wait_ms: Math.round(estimatedWaitMs), + queue_paused: queuePaused + }, + by_type: byType, + top_priority: topPriority + }); + } catch (error: any) { + console.error('[JobQueue] Error getting stats:', error); + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * GET /api/job-queue/:id - Get single job + */ +router.get('/:id', async (req: Request, res: Response) => { + try { + const { id } = req.params; + + const { rows } = await pool.query(` + SELECT + j.*, + d.name as dispensary_name, + d.city, + d.state, + d.menu_url + FROM dispensary_crawl_jobs j + LEFT JOIN dispensaries d ON d.id = j.dispensary_id + WHERE j.id = $1 + `, [id]); + + if (rows.length === 0) { + return res.status(404).json({ success: false, error: 'Job not found' }); + } + + res.json({ success: true, job: rows[0] }); + } catch (error: any) { + console.error('[JobQueue] Error getting job:', error); + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * PUT /api/job-queue/:id/priority - Update job priority + */ +router.put('/:id/priority', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const { priority } = req.body; + + if (typeof priority !== 'number' || priority < 0 || priority > 100) { + return res.status(400).json({ + success: false, + error: 'Priority must be a number between 0 and 100' + }); + } + + const { rows } = await pool.query(` + UPDATE dispensary_crawl_jobs + SET priority = $1, updated_at = NOW() + WHERE id = $2 AND status = 'pending' + RETURNING id, priority, status + `, [priority, id]); + + if (rows.length === 0) { + return res.status(404).json({ + success: false, + error: 'Job not found or not in pending status' + }); + } + + res.json({ success: true, job: rows[0] }); + } catch (error: any) { + console.error('[JobQueue] Error updating priority:', error); + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * POST /api/job-queue/:id/cancel - Cancel a pending job + */ +router.post('/:id/cancel', async (req: Request, res: Response) => { + try { + const { id } = req.params; + + const { rows } = await pool.query(` + UPDATE dispensary_crawl_jobs + SET status = 'cancelled', completed_at = NOW(), updated_at = NOW() + WHERE id = $1 AND status = 'pending' + RETURNING id, status + `, [id]); + + if (rows.length === 0) { + return res.status(404).json({ + success: false, + error: 'Job not found or not in pending status' + }); + } + + res.json({ success: true, job: rows[0], message: 'Job cancelled' }); + } catch (error: any) { + console.error('[JobQueue] Error cancelling job:', error); + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * POST /api/job-queue/:id/retry - Retry a failed job + */ +router.post('/:id/retry', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const { priority } = req.body; + + const { rows } = await pool.query(` + UPDATE dispensary_crawl_jobs + SET + status = 'pending', + priority = COALESCE($2, priority), + error_message = NULL, + started_at = NULL, + completed_at = NULL, + duration_ms = NULL, + worker_id = NULL, + locked_by = NULL, + locked_at = NULL, + retry_count = retry_count + 1, + updated_at = NOW() + WHERE id = $1 AND status IN ('failed', 'cancelled') + RETURNING id, status, priority, retry_count + `, [id, priority]); + + if (rows.length === 0) { + return res.status(404).json({ + success: false, + error: 'Job not found or not in failed/cancelled status' + }); + } + + res.json({ success: true, job: rows[0], message: 'Job queued for retry' }); + } catch (error: any) { + console.error('[JobQueue] Error retrying job:', error); + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * POST /api/job-queue/bulk-priority - Bulk update priorities + */ +router.post('/bulk-priority', async (req: Request, res: Response) => { + try { + const { jobs } = req.body; // Array of { id, priority } + + if (!Array.isArray(jobs) || jobs.length === 0) { + return res.status(400).json({ + success: false, + error: 'jobs array is required' + }); + } + + const client = await pool.connect(); + try { + await client.query('BEGIN'); + + let updated = 0; + for (const job of jobs) { + if (typeof job.id === 'number' && typeof job.priority === 'number') { + const result = await client.query(` + UPDATE dispensary_crawl_jobs + SET priority = $1, updated_at = NOW() + WHERE id = $2 AND status = 'pending' + `, [job.priority, job.id]); + updated += result.rowCount || 0; + } + } + + await client.query('COMMIT'); + res.json({ success: true, updated, message: `Updated ${updated} jobs` }); + } catch (err) { + await client.query('ROLLBACK'); + throw err; + } finally { + client.release(); + } + } catch (error: any) { + console.error('[JobQueue] Error bulk updating priorities:', error); + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * POST /api/job-queue/enqueue - Add a new job to the queue + */ +router.post('/enqueue', async (req: Request, res: Response) => { + try { + const { dispensary_id, job_type = 'dutchie_product_crawl', priority = 0 } = req.body; + + if (!dispensary_id) { + return res.status(400).json({ success: false, error: 'dispensary_id is required' }); + } + + // Check if job already pending for this dispensary + const existing = await pool.query(` + SELECT id FROM dispensary_crawl_jobs + WHERE dispensary_id = $1 AND job_type = $2 AND status = 'pending' + `, [dispensary_id, job_type]); + + if (existing.rows.length > 0) { + // Update priority if higher + await pool.query(` + UPDATE dispensary_crawl_jobs + SET priority = GREATEST(priority, $1), updated_at = NOW() + WHERE id = $2 + `, [priority, existing.rows[0].id]); + + return res.json({ + success: true, + job_id: existing.rows[0].id, + message: 'Job already queued, priority updated' + }); + } + + const { rows } = await pool.query(` + INSERT INTO dispensary_crawl_jobs (dispensary_id, job_type, priority, trigger_type) + VALUES ($1, $2, $3, 'manual') + RETURNING id + `, [dispensary_id, job_type, priority]); + + res.json({ success: true, job_id: rows[0].id, message: 'Job enqueued' }); + } catch (error: any) { + console.error('[JobQueue] Error enqueuing job:', error); + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * POST /api/job-queue/pause - Pause queue processing + */ +router.post('/pause', async (_req: Request, res: Response) => { + queuePaused = true; + res.json({ success: true, queue_paused: true, message: 'Queue paused' }); +}); + +/** + * POST /api/job-queue/resume - Resume queue processing + */ +router.post('/resume', async (_req: Request, res: Response) => { + queuePaused = false; + res.json({ success: true, queue_paused: false, message: 'Queue resumed' }); +}); + +/** + * GET /api/job-queue/paused - Check if queue is paused + */ +router.get('/paused', async (_req: Request, res: Response) => { + res.json({ success: true, queue_paused: queuePaused }); +}); + +export default router; +export { queuePaused }; diff --git a/backend/src/routes/markets.ts b/backend/src/routes/markets.ts new file mode 100644 index 00000000..63fa2d59 --- /dev/null +++ b/backend/src/routes/markets.ts @@ -0,0 +1,667 @@ +/** + * Markets API Routes + * + * Provider-agnostic store and product endpoints for the CannaiQ admin dashboard. + * Queries the dispensaries and dutchie_products tables directly. + */ +import { Router, Request, Response } from 'express'; +import { authMiddleware } from '../auth/middleware'; +import { pool } from '../db/pool'; + +const router = Router(); +router.use(authMiddleware); + +/** + * GET /api/markets/dashboard + * Dashboard summary with counts for dispensaries, products, brands, etc. + */ +router.get('/dashboard', async (req: Request, res: Response) => { + try { + // Get dispensary count + const { rows: dispRows } = await pool.query( + `SELECT COUNT(*) as count FROM dispensaries` + ); + + // Get product count from store_products (canonical) or fallback to dutchie_products + const { rows: productRows } = await pool.query(` + SELECT COUNT(*) as count FROM store_products + `); + + // Get brand count + const { rows: brandRows } = await pool.query(` + SELECT COUNT(DISTINCT brand_name_raw) as count + FROM store_products + WHERE brand_name_raw IS NOT NULL + `); + + // Get category count + const { rows: categoryRows } = await pool.query(` + SELECT COUNT(DISTINCT category_raw) as count + FROM store_products + WHERE category_raw IS NOT NULL + `); + + // Get snapshot count in last 24 hours + const { rows: snapshotRows } = await pool.query(` + SELECT COUNT(*) as count + FROM store_product_snapshots + WHERE captured_at >= NOW() - INTERVAL '24 hours' + `); + + // Get last crawl time + const { rows: lastCrawlRows } = await pool.query(` + SELECT MAX(completed_at) as last_crawl + FROM crawl_orchestration_traces + WHERE success = true + `); + + // Get failed job count (jobs in last 24h that failed) + const { rows: failedRows } = await pool.query(` + SELECT COUNT(*) as count + FROM crawl_orchestration_traces + WHERE success = false + AND started_at >= NOW() - INTERVAL '24 hours' + `); + + res.json({ + dispensaryCount: parseInt(dispRows[0]?.count || '0', 10), + productCount: parseInt(productRows[0]?.count || '0', 10), + brandCount: parseInt(brandRows[0]?.count || '0', 10), + categoryCount: parseInt(categoryRows[0]?.count || '0', 10), + snapshotCount24h: parseInt(snapshotRows[0]?.count || '0', 10), + lastCrawlTime: lastCrawlRows[0]?.last_crawl || null, + failedJobCount: parseInt(failedRows[0]?.count || '0', 10), + }); + } catch (error: any) { + console.error('[Markets] Error fetching dashboard:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/markets/stores + * List all stores from the dispensaries table + */ +router.get('/stores', async (req: Request, res: Response) => { + try { + const { city, hasPlatformId, limit = '100', offset = '0' } = req.query; + + let whereClause = 'WHERE 1=1'; + const params: any[] = []; + let paramIndex = 1; + + if (city) { + whereClause += ` AND d.city ILIKE $${paramIndex}`; + params.push(`%${city}%`); + paramIndex++; + } + + if (hasPlatformId === 'true') { + whereClause += ` AND d.platform_dispensary_id IS NOT NULL`; + } else if (hasPlatformId === 'false') { + whereClause += ` AND d.platform_dispensary_id IS NULL`; + } + + params.push(parseInt(limit as string, 10), parseInt(offset as string, 10)); + + const { rows } = await pool.query(` + SELECT + d.id, + d.name, + d.dba_name, + d.city, + d.state, + d.address1 as address, + d.zipcode as zip, + d.phone, + d.website, + d.menu_url, + d.menu_type, + d.platform_dispensary_id, + d.crawl_enabled, + d.dutchie_verified, + d.last_crawl_at, + d.product_count, + d.created_at, + d.updated_at + FROM dispensaries d + ${whereClause} + ORDER BY d.name + LIMIT $${paramIndex} OFFSET $${paramIndex + 1} + `, params); + + // Get total count + const { rows: countRows } = await pool.query( + `SELECT COUNT(*) as total FROM dispensaries d ${whereClause}`, + params.slice(0, -2) + ); + + res.json({ + stores: rows, + total: parseInt(countRows[0]?.total || '0', 10), + }); + } catch (error: any) { + console.error('[Markets] Error fetching stores:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/markets/stores/:id + * Get a single store by ID + */ +router.get('/stores/:id', async (req: Request, res: Response) => { + try { + const { id } = req.params; + + const { rows } = await pool.query(` + SELECT + d.id, + d.name, + d.dba_name, + d.city, + d.state, + d.address1 as address, + d.zipcode as zip, + d.phone, + d.website, + d.menu_url, + d.menu_type, + d.platform_dispensary_id, + d.crawl_enabled, + d.dutchie_verified, + d.last_crawl_at, + d.product_count, + d.created_at, + d.updated_at + FROM dispensaries d + WHERE d.id = $1 + `, [parseInt(id, 10)]); + + if (rows.length === 0) { + return res.status(404).json({ error: 'Store not found' }); + } + + res.json(rows[0]); + } catch (error: any) { + console.error('[Markets] Error fetching store:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/markets/stores/:id/summary + * Get store summary with aggregated metrics, brands, and categories + */ +router.get('/stores/:id/summary', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const dispensaryId = parseInt(id, 10); + + // Get dispensary info + const { rows: dispRows } = await pool.query(` + SELECT + d.id, + d.name, + d.dba_name, + d.c_name as company_name, + d.city, + d.state, + d.address1 as address, + d.zipcode as zip, + d.phone, + d.website, + d.menu_url, + d.menu_type, + d.platform_dispensary_id, + d.crawl_enabled, + d.last_crawl_at + FROM dispensaries d + WHERE d.id = $1 + `, [dispensaryId]); + + if (dispRows.length === 0) { + return res.status(404).json({ error: 'Store not found' }); + } + + const dispensary = dispRows[0]; + + // Get product counts using canonical store_products table + const { rows: countRows } = await pool.query(` + SELECT + COUNT(*) as total, + COUNT(*) FILTER (WHERE stock_status = 'in_stock') as in_stock, + COUNT(*) FILTER (WHERE stock_status = 'out_of_stock') as out_of_stock, + COUNT(*) FILTER (WHERE stock_status NOT IN ('in_stock', 'out_of_stock') OR stock_status IS NULL) as unknown, + COUNT(*) FILTER (WHERE stock_status = 'missing_from_feed') as missing_from_feed + FROM store_products + WHERE dispensary_id = $1 + `, [dispensaryId]); + + const counts = countRows[0] || {}; + + // Get brands using canonical table + const { rows: brandRows } = await pool.query(` + SELECT brand_name_raw as brand_name, COUNT(*) as product_count + FROM store_products + WHERE dispensary_id = $1 AND brand_name_raw IS NOT NULL + GROUP BY brand_name_raw + ORDER BY product_count DESC, brand_name_raw + `, [dispensaryId]); + + // Get categories using canonical table + const { rows: categoryRows } = await pool.query(` + SELECT category_raw as type, subcategory_raw as subcategory, COUNT(*) as product_count + FROM store_products + WHERE dispensary_id = $1 + GROUP BY category_raw, subcategory_raw + ORDER BY product_count DESC + `, [dispensaryId]); + + // Get last crawl info from job_run_logs or crawl_orchestration_traces + const { rows: crawlRows } = await pool.query(` + SELECT + completed_at, + CASE WHEN success THEN 'completed' ELSE 'failed' END as status, + error_message + FROM crawl_orchestration_traces + WHERE dispensary_id = $1 + ORDER BY completed_at DESC + LIMIT 1 + `, [dispensaryId]); + + const lastCrawl = crawlRows.length > 0 ? crawlRows[0] : null; + + res.json({ + dispensary, + totalProducts: parseInt(counts.total || '0', 10), + inStockCount: parseInt(counts.in_stock || '0', 10), + outOfStockCount: parseInt(counts.out_of_stock || '0', 10), + unknownStockCount: parseInt(counts.unknown || '0', 10), + missingFromFeedCount: parseInt(counts.missing_from_feed || '0', 10), + brands: brandRows, + brandCount: brandRows.length, + categories: categoryRows, + categoryCount: categoryRows.length, + lastCrawl, + }); + } catch (error: any) { + console.error('[Markets] Error fetching store summary:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/markets/stores/:id/products + * Get products for a store with filtering and pagination + */ +router.get('/stores/:id/products', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const { + stockStatus, + type, + subcategory, + brandName, + search, + limit = '25', + offset = '0' + } = req.query; + + const dispensaryId = parseInt(id, 10); + + let whereClause = 'WHERE sp.dispensary_id = $1'; + const params: any[] = [dispensaryId]; + let paramIndex = 2; + + if (stockStatus) { + whereClause += ` AND sp.stock_status = $${paramIndex}`; + params.push(stockStatus); + paramIndex++; + } + + if (type) { + whereClause += ` AND sp.category_raw = $${paramIndex}`; + params.push(type); + paramIndex++; + } + + if (subcategory) { + whereClause += ` AND sp.subcategory_raw = $${paramIndex}`; + params.push(subcategory); + paramIndex++; + } + + if (brandName) { + whereClause += ` AND sp.brand_name_raw ILIKE $${paramIndex}`; + params.push(`%${brandName}%`); + paramIndex++; + } + + if (search) { + whereClause += ` AND (sp.name_raw ILIKE $${paramIndex} OR sp.brand_name_raw ILIKE $${paramIndex})`; + params.push(`%${search}%`); + paramIndex++; + } + + const limitNum = Math.min(parseInt(limit as string, 10), 100); + const offsetNum = parseInt(offset as string, 10); + params.push(limitNum, offsetNum); + + // Get products with latest snapshot data using canonical tables + const { rows } = await pool.query(` + SELECT + sp.id, + sp.external_product_id as external_id, + sp.name_raw as name, + sp.brand_name_raw as brand, + sp.category_raw as type, + sp.subcategory_raw as subcategory, + sp.strain_type, + sp.stock_status, + sp.stock_status = 'in_stock' as in_stock, + sp.stock_status != 'missing_from_feed' as is_present_in_feed, + sp.stock_status = 'missing_from_feed' as missing_from_feed, + sp.thc_percent as thc_percentage, + sp.cbd_percent as cbd_percentage, + sp.primary_image_url as image_url, + sp.description, + sp.total_quantity_available as total_quantity, + sp.first_seen_at, + sp.last_seen_at, + sp.updated_at, + ( + SELECT jsonb_build_object( + 'regular_price', COALESCE(sps.price_rec, 0)::numeric, + 'sale_price', CASE WHEN sps.price_rec_special > 0 + THEN sps.price_rec_special::numeric + ELSE NULL END, + 'med_price', COALESCE(sps.price_med, 0)::numeric, + 'med_sale_price', CASE WHEN sps.price_med_special > 0 + THEN sps.price_med_special::numeric + ELSE NULL END, + 'snapshot_at', sps.captured_at + ) + FROM store_product_snapshots sps + WHERE sps.store_product_id = sp.id + ORDER BY sps.captured_at DESC + LIMIT 1 + ) as pricing + FROM store_products sp + ${whereClause} + ORDER BY sp.name_raw + LIMIT $${paramIndex} OFFSET $${paramIndex + 1} + `, params); + + // Flatten pricing into the product object + const products = rows.map((row: any) => { + const pricing = row.pricing || {}; + return { + ...row, + regular_price: pricing.regular_price || null, + sale_price: pricing.sale_price || null, + med_price: pricing.med_price || null, + med_sale_price: pricing.med_sale_price || null, + snapshot_at: pricing.snapshot_at || null, + pricing: undefined, // Remove the nested object + }; + }); + + // Get total count + const { rows: countRows } = await pool.query( + `SELECT COUNT(*) as total FROM store_products sp ${whereClause}`, + params.slice(0, -2) + ); + + res.json({ + products, + total: parseInt(countRows[0]?.total || '0', 10), + limit: limitNum, + offset: offsetNum, + }); + } catch (error: any) { + console.error('[Markets] Error fetching store products:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/markets/stores/:id/brands + * Get brands for a store + */ +router.get('/stores/:id/brands', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const dispensaryId = parseInt(id, 10); + + const { rows } = await pool.query(` + SELECT brand_name_raw as brand, COUNT(*) as product_count + FROM store_products + WHERE dispensary_id = $1 AND brand_name_raw IS NOT NULL + GROUP BY brand_name_raw + ORDER BY product_count DESC, brand_name_raw + `, [dispensaryId]); + + res.json({ brands: rows }); + } catch (error: any) { + console.error('[Markets] Error fetching store brands:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/markets/stores/:id/categories + * Get categories for a store + */ +router.get('/stores/:id/categories', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const dispensaryId = parseInt(id, 10); + + const { rows } = await pool.query(` + SELECT category_raw as type, subcategory_raw as subcategory, COUNT(*) as product_count + FROM store_products + WHERE dispensary_id = $1 + GROUP BY category_raw, subcategory_raw + ORDER BY product_count DESC + `, [dispensaryId]); + + res.json({ categories: rows }); + } catch (error: any) { + console.error('[Markets] Error fetching store categories:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * POST /api/markets/stores/:id/crawl + * Trigger a crawl for a store (alias for existing crawl endpoint) + */ +router.post('/stores/:id/crawl', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const dispensaryId = parseInt(id, 10); + + // Verify store exists and has platform_dispensary_id + const { rows } = await pool.query(` + SELECT id, name, platform_dispensary_id, menu_type + FROM dispensaries + WHERE id = $1 + `, [dispensaryId]); + + if (rows.length === 0) { + return res.status(404).json({ error: 'Store not found' }); + } + + const store = rows[0]; + + if (!store.platform_dispensary_id) { + return res.status(400).json({ + error: 'Store does not have a platform ID resolved. Cannot crawl.', + store: { id: store.id, name: store.name, menu_type: store.menu_type } + }); + } + + // Insert a job into the crawl queue + await pool.query(` + INSERT INTO crawl_jobs (dispensary_id, job_type, status, created_at) + VALUES ($1, 'dutchie_product_crawl', 'pending', NOW()) + `, [dispensaryId]); + + res.json({ + success: true, + message: `Crawl queued for ${store.name}`, + store: { id: store.id, name: store.name } + }); + } catch (error: any) { + console.error('[Markets] Error triggering crawl:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/markets/brands + * List all brands with product counts and store presence + */ +router.get('/brands', async (req: Request, res: Response) => { + try { + const { search, limit = '100', offset = '0', sortBy = 'products' } = req.query; + const limitNum = Math.min(parseInt(limit as string, 10), 500); + const offsetNum = parseInt(offset as string, 10); + + let whereClause = 'WHERE brand_name_raw IS NOT NULL AND brand_name_raw != \'\''; + const params: any[] = []; + let paramIndex = 1; + + if (search) { + whereClause += ` AND brand_name_raw ILIKE $${paramIndex}`; + params.push(`%${search}%`); + paramIndex++; + } + + // Determine sort column + let orderBy = 'product_count DESC'; + if (sortBy === 'stores') { + orderBy = 'store_count DESC'; + } else if (sortBy === 'name') { + orderBy = 'brand_name ASC'; + } + + params.push(limitNum, offsetNum); + + const { rows } = await pool.query(` + SELECT + brand_name_raw as brand_name, + COUNT(*) as product_count, + COUNT(DISTINCT dispensary_id) as store_count, + AVG(price_rec) FILTER (WHERE price_rec > 0) as avg_price, + array_agg(DISTINCT category_raw) FILTER (WHERE category_raw IS NOT NULL) as categories, + MIN(first_seen_at) as first_seen_at, + MAX(last_seen_at) as last_seen_at + FROM store_products + ${whereClause} + GROUP BY brand_name_raw + ORDER BY ${orderBy} + LIMIT $${paramIndex} OFFSET $${paramIndex + 1} + `, params); + + // Get total count + const { rows: countRows } = await pool.query(` + SELECT COUNT(DISTINCT brand_name_raw) as total + FROM store_products + ${whereClause} + `, params.slice(0, -2)); + + // Calculate summary stats + const { rows: summaryRows } = await pool.query(` + SELECT + COUNT(DISTINCT brand_name_raw) as total_brands, + AVG(product_count) as avg_products_per_brand + FROM ( + SELECT brand_name_raw, COUNT(*) as product_count + FROM store_products + WHERE brand_name_raw IS NOT NULL AND brand_name_raw != '' + GROUP BY brand_name_raw + ) brand_counts + `); + + res.json({ + brands: rows.map((r: any, idx: number) => ({ + id: idx + 1 + offsetNum, + name: r.brand_name, + normalized_name: null, + product_count: parseInt(r.product_count, 10), + store_count: parseInt(r.store_count, 10), + avg_price: r.avg_price ? parseFloat(r.avg_price) : null, + categories: r.categories || [], + is_portfolio: false, + first_seen_at: r.first_seen_at, + last_seen_at: r.last_seen_at, + })), + total: parseInt(countRows[0]?.total || '0', 10), + summary: { + total_brands: parseInt(summaryRows[0]?.total_brands || '0', 10), + portfolio_brands: 0, + avg_products_per_brand: Math.round(parseFloat(summaryRows[0]?.avg_products_per_brand || '0')), + top_categories: [], + }, + limit: limitNum, + offset: offsetNum, + }); + } catch (error: any) { + console.error('[Markets] Error fetching brands:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/markets/categories + * List all categories with product counts + */ +router.get('/categories', async (req: Request, res: Response) => { + try { + const { search, limit = '100' } = req.query; + const limitNum = Math.min(parseInt(limit as string, 10), 500); + + let whereClause = 'WHERE category_raw IS NOT NULL AND category_raw != \'\''; + const params: any[] = []; + let paramIndex = 1; + + if (search) { + whereClause += ` AND category_raw ILIKE $${paramIndex}`; + params.push(`%${search}%`); + paramIndex++; + } + + params.push(limitNum); + + const { rows } = await pool.query(` + SELECT + category_raw as name, + COUNT(*) as product_count, + COUNT(DISTINCT dispensary_id) as store_count, + AVG(price_rec) FILTER (WHERE price_rec > 0) as avg_price + FROM store_products + ${whereClause} + GROUP BY category_raw + ORDER BY product_count DESC + LIMIT $${paramIndex} + `, params); + + res.json({ + categories: rows.map((r: any, idx: number) => ({ + id: idx + 1, + name: r.name, + product_count: parseInt(r.product_count, 10), + store_count: parseInt(r.store_count, 10), + avg_price: r.avg_price ? parseFloat(r.avg_price) : null, + })), + total: rows.length, + }); + } catch (error: any) { + console.error('[Markets] Error fetching categories:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +export default router; diff --git a/backend/src/routes/orchestrator-admin.ts b/backend/src/routes/orchestrator-admin.ts index 49afacce..32d7dffb 100644 --- a/backend/src/routes/orchestrator-admin.ts +++ b/backend/src/routes/orchestrator-admin.ts @@ -24,37 +24,22 @@ const router = Router(); */ router.get('/metrics', async (_req: Request, res: Response) => { try { - // Get aggregate metrics + // Get aggregate metrics using 7-stage pipeline const { rows: metrics } = await pool.query(` SELECT - (SELECT COUNT(*) FROM dutchie_products) as total_products, - (SELECT COUNT(DISTINCT brand_name) FROM dutchie_products WHERE brand_name IS NOT NULL) as total_brands, - (SELECT COUNT(*) FROM dispensaries WHERE state = 'AZ') as total_stores, - ( - SELECT COUNT(*) - FROM dispensary_crawler_profiles dcp - WHERE dcp.enabled = true - AND (dcp.status = 'production' OR (dcp.config->>'status')::text = 'production') - ) as healthy_count, - ( - SELECT COUNT(*) - FROM dispensary_crawler_profiles dcp - WHERE dcp.enabled = true - AND (dcp.status = 'sandbox' OR (dcp.config->>'status')::text = 'sandbox') - ) as sandbox_count, - ( - SELECT COUNT(*) - FROM dispensary_crawler_profiles dcp - WHERE dcp.enabled = true - AND (dcp.status = 'needs_manual' OR (dcp.config->>'status')::text = 'needs_manual') - ) as needs_manual_count, - ( - SELECT COUNT(*) - FROM dispensary_crawler_profiles dcp - JOIN dispensaries d ON d.id = dcp.dispensary_id - WHERE d.state = 'AZ' - AND dcp.status = 'needs_manual' - ) as failing_count + (SELECT COUNT(*) FROM store_products) as total_products, + (SELECT COUNT(DISTINCT brand_name_raw) FROM store_products WHERE brand_name_raw IS NOT NULL) as total_brands, + (SELECT COUNT(*) FROM dispensaries WHERE menu_type = 'dutchie' AND crawl_enabled = true) as total_stores, + -- Stage counts from dispensaries table (7-stage pipeline) + (SELECT COUNT(*) FROM dispensaries WHERE stage = 'discovered') as discovered_count, + (SELECT COUNT(*) FROM dispensaries WHERE stage = 'validated') as validated_count, + (SELECT COUNT(*) FROM dispensaries WHERE stage = 'promoted') as promoted_count, + (SELECT COUNT(*) FROM dispensaries WHERE stage = 'sandbox') as sandbox_count, + (SELECT COUNT(*) FROM dispensaries WHERE stage = 'hydrating') as hydrating_count, + (SELECT COUNT(*) FROM dispensaries WHERE stage = 'production') as production_count, + (SELECT COUNT(*) FROM dispensaries WHERE stage = 'failing') as failing_count, + -- Discovery pipeline counts + (SELECT COUNT(*) FROM dutchie_discovery_locations WHERE stage = 'discovered' AND active = true) as discovery_pending `); const row = metrics[0] || {}; @@ -63,13 +48,22 @@ router.get('/metrics', async (_req: Request, res: Response) => { total_products: parseInt(row.total_products || '0', 10), total_brands: parseInt(row.total_brands || '0', 10), total_stores: parseInt(row.total_stores || '0', 10), - // Placeholder sentiment values - these would come from actual analytics - market_sentiment: 'neutral', - market_direction: 'stable', - // Health counts - healthy_count: parseInt(row.healthy_count || '0', 10), + // 7-Stage Pipeline Counts + stages: { + discovered: parseInt(row.discovered_count || '0', 10), + validated: parseInt(row.validated_count || '0', 10), + promoted: parseInt(row.promoted_count || '0', 10), + sandbox: parseInt(row.sandbox_count || '0', 10), + hydrating: parseInt(row.hydrating_count || '0', 10), + production: parseInt(row.production_count || '0', 10), + failing: parseInt(row.failing_count || '0', 10), + }, + // Discovery pipeline + discovery_pending: parseInt(row.discovery_pending || '0', 10), + // Legacy compatibility + healthy_count: parseInt(row.production_count || '0', 10), sandbox_count: parseInt(row.sandbox_count || '0', 10), - needs_manual_count: parseInt(row.needs_manual_count || '0', 10), + needs_manual_count: parseInt(row.failing_count || '0', 10), failing_count: parseInt(row.failing_count || '0', 10), }); } catch (error: any) { @@ -157,9 +151,14 @@ router.get('/stores', async (req: Request, res: Response) => { d.platform_dispensary_id, d.last_crawl_at, d.crawl_enabled, + d.stage, + d.stage_changed_at, + d.first_crawl_at, + d.last_successful_crawl_at, dcp.id as profile_id, dcp.profile_key, - COALESCE(dcp.status, dcp.config->>'status', 'legacy') as crawler_status, + dcp.consecutive_successes, + dcp.consecutive_failures, ( SELECT MAX(cot.completed_at) FROM crawl_orchestration_traces cot @@ -172,8 +171,8 @@ router.get('/stores', async (req: Request, res: Response) => { ) as last_failure_at, ( SELECT COUNT(*) - FROM dutchie_products dp - WHERE dp.dispensary_id = d.id + FROM store_products sp + WHERE sp.dispensary_id = d.id ) as product_count FROM dispensaries d LEFT JOIN dispensary_crawler_profiles dcp @@ -197,10 +196,17 @@ router.get('/stores', async (req: Request, res: Response) => { state: r.state, provider: r.provider || 'unknown', provider_raw: r.provider || null, - provider_display: getProviderDisplayName(r.provider), + // Admin routes show actual provider names (not anonymized) + provider_display: r.provider || 'Unknown', platformDispensaryId: r.platform_dispensary_id, crawlEnabled: r.crawl_enabled ?? false, - status: r.crawler_status || (r.platform_dispensary_id ? 'legacy' : 'pending'), + // Use stage from dispensaries table (6-stage pipeline) + stage: r.stage || 'discovered', + stageChangedAt: r.stage_changed_at, + firstCrawlAt: r.first_crawl_at, + lastSuccessfulCrawlAt: r.last_successful_crawl_at, + consecutiveSuccesses: r.consecutive_successes || 0, + consecutiveFailures: r.consecutive_failures || 0, profileId: r.profile_id, profileKey: r.profile_key, lastCrawlAt: r.last_crawl_at, @@ -438,4 +444,392 @@ router.get('/crawl-traces/:traceId', async (req: Request, res: Response) => { } }); +// ============================================================ +// STATUS MANAGEMENT +// ============================================================ + +// 6-Stage Pipeline Statuses +const VALID_STAGES = ['discovered', 'validated', 'promoted', 'sandbox', 'production', 'failing'] as const; + +/** + * POST /api/admin/orchestrator/stores/:id/stage + * Manually update the stage for a store (use /api/pipeline for proper transitions) + * Body: { stage: 'discovered' | 'validated' | 'promoted' | 'sandbox' | 'production' | 'failing', reason?: string } + */ +router.post('/stores/:id/stage', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const { stage: status, reason } = req.body; + + if (!status || !VALID_STAGES.includes(status)) { + return res.status(400).json({ + error: `Invalid stage. Must be one of: ${VALID_STAGES.join(', ')}`, + }); + } + + const dispensaryId = parseInt(id, 10); + + // Get current profile and status + const { rows: profileRows } = await pool.query(` + SELECT dcp.id, dcp.status as current_status, d.name as dispensary_name + FROM dispensary_crawler_profiles dcp + JOIN dispensaries d ON d.id = dcp.dispensary_id + WHERE dcp.dispensary_id = $1 AND dcp.enabled = true + ORDER BY dcp.updated_at DESC + LIMIT 1 + `, [dispensaryId]); + + if (profileRows.length === 0) { + return res.status(404).json({ error: 'No crawler profile found for this store' }); + } + + const profileId = profileRows[0].id; + const currentStatus = profileRows[0].current_status; + const dispensaryName = profileRows[0].dispensary_name; + + // Update the status + await pool.query(` + UPDATE dispensary_crawler_profiles + SET + status = $1, + status_reason = $2, + status_changed_at = CURRENT_TIMESTAMP, + updated_at = CURRENT_TIMESTAMP + WHERE id = $3 + `, [status, reason || `Manual status change to ${status}`, profileId]); + + // Create status alert + const severity = status === 'production' ? 'info' + : status === 'needs_manual' ? 'warning' + : status === 'failing' ? 'error' + : 'info'; + + await pool.query(` + INSERT INTO crawler_status_alerts + (dispensary_id, profile_id, alert_type, severity, message, previous_status, new_status, metadata) + VALUES ($1, $2, 'status_change', $3, $4, $5, $6, $7) + `, [ + dispensaryId, + profileId, + severity, + `${dispensaryName}: Status changed from ${currentStatus || 'unknown'} to ${status}`, + currentStatus, + status, + JSON.stringify({ reason, changedBy: 'admin_api' }), + ]); + + res.json({ + success: true, + dispensaryId, + profileId, + previousStatus: currentStatus, + newStatus: status, + message: `Status updated to ${status}`, + }); + } catch (error: any) { + console.error('[OrchestratorAdmin] Error updating status:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/admin/orchestrator/alerts + * Get recent status alerts for the dashboard + * Query params: + * - severity: Filter by severity (info, warning, error, critical) + * - acknowledged: Filter by acknowledged status (true/false) + * - limit: Max results (default 50) + */ +router.get('/alerts', async (req: Request, res: Response) => { + try { + const { severity, acknowledged, dispensary_id, limit = '50' } = req.query; + + let whereClause = 'WHERE 1=1'; + const params: any[] = []; + let paramIndex = 1; + + if (severity) { + whereClause += ` AND csa.severity = $${paramIndex}`; + params.push(severity); + paramIndex++; + } + + if (acknowledged === 'true') { + whereClause += ' AND csa.acknowledged = true'; + } else if (acknowledged === 'false') { + whereClause += ' AND csa.acknowledged = false'; + } + + if (dispensary_id) { + whereClause += ` AND csa.dispensary_id = $${paramIndex}`; + params.push(parseInt(dispensary_id as string, 10)); + paramIndex++; + } + + params.push(parseInt(limit as string, 10)); + + const { rows } = await pool.query(` + SELECT + csa.*, + d.name as dispensary_name, + d.city, + d.state + FROM crawler_status_alerts csa + LEFT JOIN dispensaries d ON csa.dispensary_id = d.id + ${whereClause} + ORDER BY csa.created_at DESC + LIMIT $${paramIndex} + `, params); + + // Get unacknowledged count by severity + const { rows: countRows } = await pool.query(` + SELECT severity, COUNT(*) as count + FROM crawler_status_alerts + WHERE acknowledged = false + GROUP BY severity + `); + + const unacknowledgedCounts = countRows.reduce((acc: Record, row: any) => { + acc[row.severity] = parseInt(row.count, 10); + return acc; + }, {}); + + res.json({ + alerts: rows.map((r: any) => ({ + id: r.id, + dispensaryId: r.dispensary_id, + dispensaryName: r.dispensary_name, + city: r.city, + state: r.state, + profileId: r.profile_id, + alertType: r.alert_type, + severity: r.severity, + message: r.message, + previousStatus: r.previous_status, + newStatus: r.new_status, + errorDetails: r.error_details, + metadata: r.metadata, + acknowledged: r.acknowledged, + acknowledgedAt: r.acknowledged_at, + acknowledgedBy: r.acknowledged_by, + createdAt: r.created_at, + })), + unacknowledgedCounts, + }); + } catch (error: any) { + console.error('[OrchestratorAdmin] Error fetching alerts:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * POST /api/admin/orchestrator/alerts/:id/acknowledge + * Acknowledge an alert + */ +router.post('/alerts/:id/acknowledge', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const { acknowledgedBy = 'admin' } = req.body; + + await pool.query(` + UPDATE crawler_status_alerts + SET acknowledged = true, acknowledged_at = CURRENT_TIMESTAMP, acknowledged_by = $1 + WHERE id = $2 + `, [acknowledgedBy, parseInt(id, 10)]); + + res.json({ success: true, alertId: parseInt(id, 10) }); + } catch (error: any) { + console.error('[OrchestratorAdmin] Error acknowledging alert:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * POST /api/admin/orchestrator/alerts/acknowledge-all + * Acknowledge all unacknowledged alerts (optionally filtered) + */ +router.post('/alerts/acknowledge-all', async (req: Request, res: Response) => { + try { + const { severity, dispensaryId, acknowledgedBy = 'admin' } = req.body; + + let whereClause = 'WHERE acknowledged = false'; + const params: any[] = [acknowledgedBy]; + let paramIndex = 2; + + if (severity) { + whereClause += ` AND severity = $${paramIndex}`; + params.push(severity); + paramIndex++; + } + + if (dispensaryId) { + whereClause += ` AND dispensary_id = $${paramIndex}`; + params.push(dispensaryId); + paramIndex++; + } + + const result = await pool.query(` + UPDATE crawler_status_alerts + SET acknowledged = true, acknowledged_at = CURRENT_TIMESTAMP, acknowledged_by = $1 + ${whereClause} + `, params); + + res.json({ success: true, acknowledgedCount: result.rowCount }); + } catch (error: any) { + console.error('[OrchestratorAdmin] Error acknowledging alerts:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * POST /api/admin/orchestrator/crawl-outcome + * Record a crawl outcome and update status based on success/failure + * This endpoint is called by the crawler after each crawl attempt + */ +router.post('/crawl-outcome', async (req: Request, res: Response) => { + try { + const { + dispensaryId, + success, + productsFound = 0, + error, + metadata = {}, + } = req.body; + + if (!dispensaryId) { + return res.status(400).json({ error: 'dispensaryId is required' }); + } + + // Get current profile + const { rows: profileRows } = await pool.query(` + SELECT + dcp.id, + dcp.status, + dcp.consecutive_successes, + dcp.consecutive_failures, + d.name as dispensary_name + FROM dispensary_crawler_profiles dcp + JOIN dispensaries d ON d.id = dcp.dispensary_id + WHERE dcp.dispensary_id = $1 AND dcp.enabled = true + ORDER BY dcp.updated_at DESC + LIMIT 1 + `, [dispensaryId]); + + if (profileRows.length === 0) { + return res.status(404).json({ error: 'No crawler profile found' }); + } + + const profile = profileRows[0]; + const currentStatus = profile.status; + let newStatus = currentStatus; + let statusChanged = false; + let consecutiveSuccesses = profile.consecutive_successes || 0; + let consecutiveFailures = profile.consecutive_failures || 0; + + if (success) { + consecutiveSuccesses++; + consecutiveFailures = 0; + + // Auto-promote from sandbox to production after 3 consecutive successes + if (currentStatus === 'sandbox' && consecutiveSuccesses >= 3) { + newStatus = 'production'; + statusChanged = true; + } + // Auto-recover from needs_manual/failing after 2 consecutive successes + else if ((currentStatus === 'needs_manual' || currentStatus === 'failing') && consecutiveSuccesses >= 2) { + newStatus = 'production'; + statusChanged = true; + } + } else { + consecutiveFailures++; + consecutiveSuccesses = 0; + + // Demote to needs_manual after 2 consecutive failures + if (currentStatus === 'production' && consecutiveFailures >= 2) { + newStatus = 'needs_manual'; + statusChanged = true; + } + // Demote to failing after 5 consecutive failures + else if (currentStatus === 'needs_manual' && consecutiveFailures >= 5) { + newStatus = 'failing'; + statusChanged = true; + } + // Keep sandbox as sandbox even with failures (needs manual intervention to fix) + else if (currentStatus === 'sandbox' && consecutiveFailures >= 3) { + newStatus = 'needs_manual'; + statusChanged = true; + } + } + + // Update profile + await pool.query(` + UPDATE dispensary_crawler_profiles + SET + consecutive_successes = $1, + consecutive_failures = $2, + status = $3, + status_reason = CASE WHEN $4 THEN $5 ELSE status_reason END, + status_changed_at = CASE WHEN $4 THEN CURRENT_TIMESTAMP ELSE status_changed_at END, + updated_at = CURRENT_TIMESTAMP + WHERE id = $6 + `, [ + consecutiveSuccesses, + consecutiveFailures, + newStatus, + statusChanged, + statusChanged ? (success ? 'Auto-promoted after consecutive successes' : `Auto-demoted after ${consecutiveFailures} consecutive failures`) : null, + profile.id, + ]); + + // Create alert if status changed or error occurred + if (statusChanged) { + const severity = newStatus === 'production' ? 'info' + : newStatus === 'needs_manual' ? 'warning' + : 'error'; + + await pool.query(` + INSERT INTO crawler_status_alerts + (dispensary_id, profile_id, alert_type, severity, message, previous_status, new_status, metadata) + VALUES ($1, $2, 'status_change', $3, $4, $5, $6, $7) + `, [ + dispensaryId, + profile.id, + severity, + `${profile.dispensary_name}: ${success ? 'Promoted' : 'Demoted'} from ${currentStatus} to ${newStatus}`, + currentStatus, + newStatus, + JSON.stringify({ productsFound, consecutiveSuccesses, consecutiveFailures, ...metadata }), + ]); + } else if (!success && error) { + // Log crawl error as alert + await pool.query(` + INSERT INTO crawler_status_alerts + (dispensary_id, profile_id, alert_type, severity, message, error_details, metadata) + VALUES ($1, $2, 'crawl_error', $3, $4, $5, $6) + `, [ + dispensaryId, + profile.id, + consecutiveFailures >= 2 ? 'warning' : 'info', + `${profile.dispensary_name}: Crawl failed - ${error}`, + JSON.stringify({ error, stack: metadata.stack }), + JSON.stringify({ consecutiveFailures, ...metadata }), + ]); + } + + res.json({ + success: true, + dispensaryId, + profileId: profile.id, + statusChanged, + previousStatus: currentStatus, + newStatus, + consecutiveSuccesses, + consecutiveFailures, + }); + } catch (error: any) { + console.error('[OrchestratorAdmin] Error recording crawl outcome:', error.message); + res.status(500).json({ error: error.message }); + } +}); + export default router; diff --git a/backend/src/routes/pipeline.ts b/backend/src/routes/pipeline.ts new file mode 100644 index 00000000..fd9c105e --- /dev/null +++ b/backend/src/routes/pipeline.ts @@ -0,0 +1,1084 @@ +/** + * Pipeline Stage Transition Routes + * + * Explicit API endpoints for moving stores through the 6-stage pipeline: + * 1. discovered β†’ validated (POST /validate) + * 2. validated β†’ promoted (POST /promote) + * 3. promoted β†’ sandbox (POST /crawl) + * 4. sandbox β†’ production (POST /approve) + * 5. production β†’ failing (auto on crawl failure) + * 6. failing β†’ sandbox (POST /retry) + * + * Each endpoint: + * - Does the work for that stage + * - Validates success + * - Updates status only after completion + * - Logs to stage_transitions table + */ + +import { Router, Request, Response } from 'express'; +import { pool } from '../db/pool'; + +const router = Router(); + +// Valid stages +const STAGES = ['discovered', 'validated', 'promoted', 'sandbox', 'production', 'failing'] as const; +type Stage = typeof STAGES[number]; + +// ============================================================ +// HELPER FUNCTIONS +// ============================================================ + +/** + * Log a stage transition to the audit table + */ +async function logTransition( + entityType: 'discovery_location' | 'dispensary', + entityId: number, + fromStage: string | null, + toStage: string, + triggerType: 'api' | 'scheduler' | 'manual' | 'auto', + triggerEndpoint: string, + success: boolean, + errorMessage?: string, + metadata?: Record, + durationMs?: number +): Promise { + const result = await pool.query(` + INSERT INTO stage_transitions + (entity_type, entity_id, from_stage, to_stage, trigger_type, trigger_endpoint, success, error_message, metadata, duration_ms) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) + RETURNING id + `, [ + entityType, + entityId, + fromStage, + toStage, + triggerType, + triggerEndpoint, + success, + errorMessage || null, + metadata ? JSON.stringify(metadata) : null, + durationMs || null, + ]); + return result.rows[0].id; +} + +/** + * Create a status alert for the dashboard + */ +async function createAlert( + dispensaryId: number | null, + profileId: number | null, + alertType: string, + severity: 'info' | 'warning' | 'error', + message: string, + fromStage?: string | null, + toStage?: string | null, + metadata?: Record +): Promise { + await pool.query(` + INSERT INTO crawler_status_alerts + (dispensary_id, profile_id, alert_type, severity, message, previous_status, new_status, metadata) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8) + `, [ + dispensaryId, + profileId, + alertType, + severity, + message, + fromStage || null, + toStage || null, + metadata ? JSON.stringify(metadata) : null, + ]); +} + +// ============================================================ +// STAGE 1 β†’ 2: VALIDATE +// discovered β†’ validated +// ============================================================ + +/** + * POST /api/pipeline/discovery/:id/validate + * Validate a discovered location - check required fields + */ +router.post('/discovery/:id/validate', async (req: Request, res: Response) => { + const startTime = Date.now(); + const { id } = req.params; + const locationId = parseInt(id, 10); + + try { + // Get the discovery location + const { rows } = await pool.query(` + SELECT * FROM dutchie_discovery_locations WHERE id = $1 + `, [locationId]); + + if (rows.length === 0) { + return res.status(404).json({ error: 'Discovery location not found' }); + } + + const loc = rows[0]; + + if (loc.stage !== 'discovered') { + return res.status(400).json({ + error: `Cannot validate: current stage is '${loc.stage}', expected 'discovered'`, + }); + } + + // Validate required fields + const errors: string[] = []; + if (!loc.platform_location_id) errors.push('Missing platform_location_id'); + if (!loc.name || loc.name.trim() === '') errors.push('Missing name'); + if (!loc.city || loc.city.trim() === '') errors.push('Missing city'); + if (!loc.state_code || loc.state_code.trim() === '') errors.push('Missing state_code'); + if (!loc.platform_menu_url) errors.push('Missing platform_menu_url'); + + if (errors.length > 0) { + // Update to failing stage + await pool.query(` + UPDATE dutchie_discovery_locations + SET stage = 'failing', notes = $1, updated_at = CURRENT_TIMESTAMP + WHERE id = $2 + `, [errors.join('; '), locationId]); + + await logTransition( + 'discovery_location', locationId, 'discovered', 'failing', + 'api', '/api/pipeline/discovery/:id/validate', false, + errors.join('; '), { errors }, Date.now() - startTime + ); + + return res.status(400).json({ + success: false, + stage: 'failing', + errors, + }); + } + + // Update to validated stage + await pool.query(` + UPDATE dutchie_discovery_locations + SET stage = 'validated', updated_at = CURRENT_TIMESTAMP + WHERE id = $1 + `, [locationId]); + + await logTransition( + 'discovery_location', locationId, 'discovered', 'validated', + 'api', '/api/pipeline/discovery/:id/validate', true, + undefined, { name: loc.name, city: loc.city, state: loc.state_code }, + Date.now() - startTime + ); + + res.json({ + success: true, + locationId, + stage: 'validated', + name: loc.name, + }); + } catch (error: any) { + console.error('[Pipeline] Validate error:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * POST /api/pipeline/discovery/validate-batch + * Validate all discovered locations (or filtered by state) + */ +router.post('/discovery/validate-batch', async (req: Request, res: Response) => { + const { stateCode, limit = 100 } = req.body; + + try { + let query = ` + SELECT id FROM dutchie_discovery_locations + WHERE stage = 'discovered' + `; + const params: any[] = []; + + if (stateCode) { + query += ` AND state_code = $1`; + params.push(stateCode); + } + + query += ` ORDER BY first_seen_at LIMIT $${params.length + 1}`; + params.push(limit); + + const { rows } = await pool.query(query, params); + + const results = { + processed: 0, + validated: 0, + failed: 0, + errors: [] as Array<{ id: number; errors: string[] }>, + }; + + for (const row of rows) { + // Call validate endpoint internally + const validateResult = await validateSingleLocation(row.id); + results.processed++; + + if (validateResult.success) { + results.validated++; + } else { + results.failed++; + results.errors.push({ id: row.id, errors: validateResult.errors || [] }); + } + } + + res.json(results); + } catch (error: any) { + console.error('[Pipeline] Validate batch error:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +// Internal helper for batch validation +async function validateSingleLocation(locationId: number): Promise<{ success: boolean; errors?: string[] }> { + const { rows } = await pool.query(` + SELECT * FROM dutchie_discovery_locations WHERE id = $1 + `, [locationId]); + + if (rows.length === 0) return { success: false, errors: ['Not found'] }; + + const loc = rows[0]; + const errors: string[] = []; + + if (!loc.platform_location_id) errors.push('Missing platform_location_id'); + if (!loc.name || loc.name.trim() === '') errors.push('Missing name'); + if (!loc.city || loc.city.trim() === '') errors.push('Missing city'); + if (!loc.state_code || loc.state_code.trim() === '') errors.push('Missing state_code'); + if (!loc.platform_menu_url) errors.push('Missing platform_menu_url'); + + const newStage = errors.length === 0 ? 'validated' : 'failing'; + + await pool.query(` + UPDATE dutchie_discovery_locations + SET stage = $1, notes = $2, updated_at = CURRENT_TIMESTAMP + WHERE id = $3 + `, [newStage, errors.length > 0 ? errors.join('; ') : null, locationId]); + + await logTransition( + 'discovery_location', locationId, loc.stage, newStage, + 'scheduler', '/api/pipeline/discovery/validate-batch', + errors.length === 0, errors.length > 0 ? errors.join('; ') : undefined + ); + + return { success: errors.length === 0, errors: errors.length > 0 ? errors : undefined }; +} + +// ============================================================ +// STAGE 2 β†’ 3: PROMOTE +// validated β†’ promoted +// ============================================================ + +/** + * POST /api/pipeline/discovery/:id/promote + * Promote a validated location to dispensaries table + */ +router.post('/discovery/:id/promote', async (req: Request, res: Response) => { + const startTime = Date.now(); + const { id } = req.params; + const locationId = parseInt(id, 10); + + try { + // Get the discovery location + const { rows } = await pool.query(` + SELECT * FROM dutchie_discovery_locations WHERE id = $1 + `, [locationId]); + + if (rows.length === 0) { + return res.status(404).json({ error: 'Discovery location not found' }); + } + + const loc = rows[0]; + + if (loc.stage !== 'validated') { + return res.status(400).json({ + error: `Cannot promote: current stage is '${loc.stage}', expected 'validated'`, + }); + } + + // Generate slug + const slug = (loc.platform_slug || `${loc.name}-${loc.city}-${loc.state_code}`) + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-|-$/g, '') + .substring(0, 100); + + // Upsert to dispensaries + const upsertResult = await pool.query(` + INSERT INTO dispensaries ( + platform, name, slug, city, state, address1, postal_code, + latitude, longitude, timezone, platform_dispensary_id, + menu_url, menu_type, offer_pickup, offer_delivery, + is_medical, is_recreational, country, stage, stage_changed_at, + crawl_enabled, dutchie_discovery_id, created_at, updated_at + ) VALUES ( + $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, 'dutchie', + $13, $14, $15, $16, $17, 'promoted', CURRENT_TIMESTAMP, + true, $18, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP + ) + ON CONFLICT (platform_dispensary_id) WHERE platform_dispensary_id IS NOT NULL + DO UPDATE SET + name = EXCLUDED.name, + city = EXCLUDED.city, + state = EXCLUDED.state, + menu_url = EXCLUDED.menu_url, + stage = 'promoted', + stage_changed_at = CURRENT_TIMESTAMP, + updated_at = CURRENT_TIMESTAMP + RETURNING id, (xmax = 0) AS inserted + `, [ + loc.platform || 'dutchie', + loc.name, + slug, + loc.city, + loc.state_code, + loc.address_line1, + loc.postal_code, + loc.latitude, + loc.longitude, + loc.timezone, + loc.platform_location_id, + loc.platform_menu_url, + loc.offers_pickup ?? true, + loc.offers_delivery ?? false, + loc.is_medical ?? false, + loc.is_recreational ?? true, + loc.country || 'United States', + locationId, + ]); + + const dispensaryId = upsertResult.rows[0].id; + const wasCreated = upsertResult.rows[0].inserted; + + // Create crawler profile + const profileKey = loc.name + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-|-$/g, '') + .substring(0, 50); + + await pool.query(` + INSERT INTO dispensary_crawler_profiles ( + dispensary_id, profile_name, profile_key, crawler_type, status, + status_reason, config, enabled, consecutive_successes, consecutive_failures, + created_at, updated_at + ) VALUES ( + $1, $2, $3, 'dutchie', 'promoted', 'Promoted from discovery', + $4::jsonb, true, 0, 0, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP + ) + ON CONFLICT (dispensary_id) WHERE enabled = true + DO UPDATE SET + status = 'promoted', + status_reason = 'Re-promoted from discovery', + updated_at = CURRENT_TIMESTAMP + `, [ + dispensaryId, + loc.name, + profileKey, + JSON.stringify({ + platformDispensaryId: loc.platform_location_id, + useBothModes: true, + downloadImages: true, + trackStock: true, + }), + ]); + + // Update discovery location + await pool.query(` + UPDATE dutchie_discovery_locations + SET stage = 'promoted', dispensary_id = $1, verified_at = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP + WHERE id = $2 + `, [dispensaryId, locationId]); + + await logTransition( + 'discovery_location', locationId, 'validated', 'promoted', + 'api', '/api/pipeline/discovery/:id/promote', true, + undefined, { dispensaryId, wasCreated, name: loc.name }, + Date.now() - startTime + ); + + await createAlert( + dispensaryId, null, 'stage_change', 'info', + `${loc.name} promoted to dispensaries table`, + 'validated', 'promoted', { locationId, wasCreated } + ); + + res.json({ + success: true, + locationId, + dispensaryId, + stage: 'promoted', + action: wasCreated ? 'created' : 'updated', + name: loc.name, + }); + } catch (error: any) { + console.error('[Pipeline] Promote error:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * POST /api/pipeline/discovery/promote-batch + * Promote all validated locations (or filtered by state) + */ +router.post('/discovery/promote-batch', async (req: Request, res: Response) => { + const { stateCode, limit = 100 } = req.body; + + try { + let query = ` + SELECT id FROM dutchie_discovery_locations + WHERE stage = 'validated' + `; + const params: any[] = []; + + if (stateCode) { + query += ` AND state_code = $1`; + params.push(stateCode); + } + + query += ` ORDER BY first_seen_at LIMIT $${params.length + 1}`; + params.push(limit); + + const { rows } = await pool.query(query, params); + + const results = { + processed: 0, + promoted: 0, + failed: 0, + dispensaryIds: [] as number[], + }; + + for (const row of rows) { + try { + const promoteResult = await promoteSingleLocation(row.id); + results.processed++; + if (promoteResult.success) { + results.promoted++; + if (promoteResult.dispensaryId) { + results.dispensaryIds.push(promoteResult.dispensaryId); + } + } else { + results.failed++; + } + } catch (e) { + results.processed++; + results.failed++; + } + } + + res.json(results); + } catch (error: any) { + console.error('[Pipeline] Promote batch error:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +// Internal helper for batch promotion +async function promoteSingleLocation(locationId: number): Promise<{ success: boolean; dispensaryId?: number }> { + // Simplified version - reuses logic from promote endpoint + const { rows } = await pool.query(` + SELECT * FROM dutchie_discovery_locations WHERE id = $1 AND stage = 'validated' + `, [locationId]); + + if (rows.length === 0) return { success: false }; + + const loc = rows[0]; + const slug = (loc.platform_slug || `${loc.name}-${loc.city}-${loc.state_code}`) + .toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, '').substring(0, 100); + + const upsertResult = await pool.query(` + INSERT INTO dispensaries ( + platform, name, slug, city, state, platform_dispensary_id, + menu_url, menu_type, stage, stage_changed_at, crawl_enabled, + dutchie_discovery_id, created_at, updated_at + ) VALUES ( + 'dutchie', $1, $2, $3, $4, $5, $6, 'dutchie', 'promoted', + CURRENT_TIMESTAMP, true, $7, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP + ) + ON CONFLICT (platform_dispensary_id) WHERE platform_dispensary_id IS NOT NULL + DO UPDATE SET stage = 'promoted', stage_changed_at = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP + RETURNING id + `, [loc.name, slug, loc.city, loc.state_code, loc.platform_location_id, loc.platform_menu_url, locationId]); + + const dispensaryId = upsertResult.rows[0].id; + + await pool.query(` + UPDATE dutchie_discovery_locations + SET stage = 'promoted', dispensary_id = $1, updated_at = CURRENT_TIMESTAMP + WHERE id = $2 + `, [dispensaryId, locationId]); + + await logTransition( + 'discovery_location', locationId, 'validated', 'promoted', + 'scheduler', '/api/pipeline/discovery/promote-batch', true, + undefined, { dispensaryId } + ); + + return { success: true, dispensaryId }; +} + +// ============================================================ +// STAGE 3 β†’ 4: CRAWL (First Crawl) +// promoted β†’ sandbox +// ============================================================ + +/** + * POST /api/pipeline/stores/:id/crawl + * Attempt first crawl for a promoted store + */ +router.post('/stores/:id/crawl', async (req: Request, res: Response) => { + const startTime = Date.now(); + const { id } = req.params; + const dispensaryId = parseInt(id, 10); + + try { + // Get the dispensary + const { rows } = await pool.query(` + SELECT d.*, dcp.id as profile_id, dcp.config + FROM dispensaries d + LEFT JOIN dispensary_crawler_profiles dcp ON dcp.dispensary_id = d.id AND dcp.enabled = true + WHERE d.id = $1 + `, [dispensaryId]); + + if (rows.length === 0) { + return res.status(404).json({ error: 'Dispensary not found' }); + } + + const disp = rows[0]; + + if (disp.stage !== 'promoted') { + return res.status(400).json({ + error: `Cannot crawl: current stage is '${disp.stage}', expected 'promoted'`, + }); + } + + if (!disp.platform_dispensary_id) { + return res.status(400).json({ error: 'Missing platform_dispensary_id' }); + } + + // TODO: Actually call the Dutchie GraphQL API to fetch products + // For now, we'll just transition to sandbox and mark first_crawl_at + // The actual crawl will be implemented in the crawler module + + // Update to sandbox stage + await pool.query(` + UPDATE dispensaries + SET stage = 'sandbox', stage_changed_at = CURRENT_TIMESTAMP, first_crawl_at = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP + WHERE id = $1 + `, [dispensaryId]); + + await pool.query(` + UPDATE dispensary_crawler_profiles + SET status = 'sandbox', status_reason = 'First crawl initiated', updated_at = CURRENT_TIMESTAMP + WHERE dispensary_id = $1 AND enabled = true + `, [dispensaryId]); + + await logTransition( + 'dispensary', dispensaryId, 'promoted', 'sandbox', + 'api', '/api/pipeline/stores/:id/crawl', true, + undefined, { name: disp.name, platformId: disp.platform_dispensary_id }, + Date.now() - startTime + ); + + await createAlert( + dispensaryId, disp.profile_id, 'stage_change', 'info', + `${disp.name} moved to sandbox - first crawl initiated`, + 'promoted', 'sandbox' + ); + + res.json({ + success: true, + dispensaryId, + stage: 'sandbox', + name: disp.name, + message: 'First crawl initiated - store is now in sandbox', + }); + } catch (error: any) { + console.error('[Pipeline] Crawl error:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * POST /api/pipeline/stores/crawl-batch + * Initiate first crawl for all promoted stores (or filtered by state) + */ +router.post('/stores/crawl-batch', async (req: Request, res: Response) => { + const { stateCode, limit = 50 } = req.body; + + try { + let query = ` + SELECT id FROM dispensaries + WHERE stage = 'promoted' AND platform_dispensary_id IS NOT NULL + `; + const params: any[] = []; + + if (stateCode) { + query += ` AND state = $1`; + params.push(stateCode); + } + + query += ` ORDER BY created_at LIMIT $${params.length + 1}`; + params.push(limit); + + const { rows } = await pool.query(query, params); + + const results = { + processed: 0, + crawled: 0, + failed: 0, + }; + + for (const row of rows) { + try { + await pool.query(` + UPDATE dispensaries + SET stage = 'sandbox', stage_changed_at = CURRENT_TIMESTAMP, first_crawl_at = CURRENT_TIMESTAMP + WHERE id = $1 + `, [row.id]); + + await pool.query(` + UPDATE dispensary_crawler_profiles + SET status = 'sandbox', status_reason = 'First crawl initiated (batch)' + WHERE dispensary_id = $1 AND enabled = true + `, [row.id]); + + await logTransition( + 'dispensary', row.id, 'promoted', 'sandbox', + 'scheduler', '/api/pipeline/stores/crawl-batch', true + ); + + results.crawled++; + } catch (e) { + results.failed++; + } + results.processed++; + } + + res.json(results); + } catch (error: any) { + console.error('[Pipeline] Crawl batch error:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +// ============================================================ +// STAGE 4 β†’ 5: APPROVE +// sandbox β†’ production +// ============================================================ + +/** + * POST /api/pipeline/stores/:id/approve + * Approve a sandbox store for production (requires products) + */ +router.post('/stores/:id/approve', async (req: Request, res: Response) => { + const startTime = Date.now(); + const { id } = req.params; + const { force = false } = req.body; + const dispensaryId = parseInt(id, 10); + + try { + // Get the dispensary with product count + const { rows } = await pool.query(` + SELECT d.*, + dcp.id as profile_id, + (SELECT COUNT(*) FROM store_products sp WHERE sp.dispensary_id = d.id) as product_count + FROM dispensaries d + LEFT JOIN dispensary_crawler_profiles dcp ON dcp.dispensary_id = d.id AND dcp.enabled = true + WHERE d.id = $1 + `, [dispensaryId]); + + if (rows.length === 0) { + return res.status(404).json({ error: 'Dispensary not found' }); + } + + const disp = rows[0]; + + if (disp.stage !== 'sandbox') { + return res.status(400).json({ + error: `Cannot approve: current stage is '${disp.stage}', expected 'sandbox'`, + }); + } + + const productCount = parseInt(disp.product_count || '0', 10); + + // Require products unless force=true + if (productCount === 0 && !force) { + return res.status(400).json({ + error: 'Cannot approve: no products found. Use force=true to override.', + productCount, + }); + } + + // Update to production stage + await pool.query(` + UPDATE dispensaries + SET stage = 'production', stage_changed_at = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP + WHERE id = $1 + `, [dispensaryId]); + + await pool.query(` + UPDATE dispensary_crawler_profiles + SET status = 'production', status_reason = 'Approved for production', consecutive_successes = 1, consecutive_failures = 0, updated_at = CURRENT_TIMESTAMP + WHERE dispensary_id = $1 AND enabled = true + `, [dispensaryId]); + + await logTransition( + 'dispensary', dispensaryId, 'sandbox', 'production', + 'api', '/api/pipeline/stores/:id/approve', true, + undefined, { name: disp.name, productCount, forced: force }, + Date.now() - startTime + ); + + await createAlert( + dispensaryId, disp.profile_id, 'stage_change', 'info', + `${disp.name} approved for production with ${productCount} products`, + 'sandbox', 'production', { productCount } + ); + + res.json({ + success: true, + dispensaryId, + stage: 'production', + name: disp.name, + productCount, + }); + } catch (error: any) { + console.error('[Pipeline] Approve error:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * POST /api/pipeline/stores/approve-batch + * Approve all sandbox stores that have products + */ +router.post('/stores/approve-batch', async (req: Request, res: Response) => { + const { stateCode, minProducts = 1, limit = 50 } = req.body; + + try { + let query = ` + SELECT d.id, d.name, + (SELECT COUNT(*) FROM store_products sp WHERE sp.dispensary_id = d.id) as product_count + FROM dispensaries d + WHERE d.stage = 'sandbox' + `; + const params: any[] = []; + + if (stateCode) { + query += ` AND d.state = $1`; + params.push(stateCode); + } + + query += ` ORDER BY d.first_crawl_at LIMIT $${params.length + 1}`; + params.push(limit); + + const { rows } = await pool.query(query, params); + + const results = { + processed: 0, + approved: 0, + skipped: 0, + skippedReasons: [] as Array<{ id: number; name: string; productCount: number }>, + }; + + for (const row of rows) { + const productCount = parseInt(row.product_count || '0', 10); + results.processed++; + + if (productCount < minProducts) { + results.skipped++; + results.skippedReasons.push({ id: row.id, name: row.name, productCount }); + continue; + } + + await pool.query(` + UPDATE dispensaries + SET stage = 'production', stage_changed_at = CURRENT_TIMESTAMP + WHERE id = $1 + `, [row.id]); + + await pool.query(` + UPDATE dispensary_crawler_profiles + SET status = 'production', status_reason = 'Auto-approved (batch)' + WHERE dispensary_id = $1 AND enabled = true + `, [row.id]); + + await logTransition( + 'dispensary', row.id, 'sandbox', 'production', + 'scheduler', '/api/pipeline/stores/approve-batch', true, + undefined, { productCount } + ); + + results.approved++; + } + + res.json(results); + } catch (error: any) { + console.error('[Pipeline] Approve batch error:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +// ============================================================ +// STAGE 5 β†’ 6: FAIL +// production β†’ failing (called on crawl error) +// ============================================================ + +/** + * POST /api/pipeline/stores/:id/fail + * Mark a store as failing (usually called by crawler on error) + */ +router.post('/stores/:id/fail', async (req: Request, res: Response) => { + const startTime = Date.now(); + const { id } = req.params; + const { error: errorMessage, consecutiveFailures = 1 } = req.body; + const dispensaryId = parseInt(id, 10); + + try { + const { rows } = await pool.query(` + SELECT d.*, dcp.id as profile_id, dcp.consecutive_failures + FROM dispensaries d + LEFT JOIN dispensary_crawler_profiles dcp ON dcp.dispensary_id = d.id AND dcp.enabled = true + WHERE d.id = $1 + `, [dispensaryId]); + + if (rows.length === 0) { + return res.status(404).json({ error: 'Dispensary not found' }); + } + + const disp = rows[0]; + const previousStage = disp.stage; + + // Only production stores can move to failing + if (previousStage !== 'production' && previousStage !== 'sandbox') { + return res.status(400).json({ + error: `Cannot fail: current stage is '${previousStage}'`, + }); + } + + await pool.query(` + UPDATE dispensaries + SET stage = 'failing', stage_changed_at = CURRENT_TIMESTAMP + WHERE id = $1 + `, [dispensaryId]); + + await pool.query(` + UPDATE dispensary_crawler_profiles + SET status = 'failing', status_reason = $1, consecutive_failures = consecutive_failures + 1, consecutive_successes = 0 + WHERE dispensary_id = $2 AND enabled = true + `, [errorMessage || 'Crawl failed', dispensaryId]); + + await logTransition( + 'dispensary', dispensaryId, previousStage, 'failing', + 'api', '/api/pipeline/stores/:id/fail', true, + errorMessage, { consecutiveFailures }, Date.now() - startTime + ); + + await createAlert( + dispensaryId, disp.profile_id, 'crawl_error', 'error', + `${disp.name} moved to failing: ${errorMessage || 'Unknown error'}`, + previousStage, 'failing', { error: errorMessage } + ); + + res.json({ + success: true, + dispensaryId, + stage: 'failing', + previousStage, + }); + } catch (error: any) { + console.error('[Pipeline] Fail error:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +// ============================================================ +// STAGE 6 β†’ 4: RETRY +// failing β†’ sandbox (manual retry) +// ============================================================ + +/** + * POST /api/pipeline/stores/:id/retry + * Retry a failing store (moves back to sandbox) + */ +router.post('/stores/:id/retry', async (req: Request, res: Response) => { + const startTime = Date.now(); + const { id } = req.params; + const dispensaryId = parseInt(id, 10); + + try { + const { rows } = await pool.query(` + SELECT d.*, dcp.id as profile_id + FROM dispensaries d + LEFT JOIN dispensary_crawler_profiles dcp ON dcp.dispensary_id = d.id AND dcp.enabled = true + WHERE d.id = $1 + `, [dispensaryId]); + + if (rows.length === 0) { + return res.status(404).json({ error: 'Dispensary not found' }); + } + + const disp = rows[0]; + + if (disp.stage !== 'failing') { + return res.status(400).json({ + error: `Cannot retry: current stage is '${disp.stage}', expected 'failing'`, + }); + } + + await pool.query(` + UPDATE dispensaries + SET stage = 'sandbox', stage_changed_at = CURRENT_TIMESTAMP + WHERE id = $1 + `, [dispensaryId]); + + await pool.query(` + UPDATE dispensary_crawler_profiles + SET status = 'sandbox', status_reason = 'Manual retry', consecutive_failures = 0, consecutive_successes = 0 + WHERE dispensary_id = $1 AND enabled = true + `, [dispensaryId]); + + await logTransition( + 'dispensary', dispensaryId, 'failing', 'sandbox', + 'api', '/api/pipeline/stores/:id/retry', true, + undefined, { name: disp.name }, Date.now() - startTime + ); + + await createAlert( + dispensaryId, disp.profile_id, 'stage_change', 'info', + `${disp.name} moved back to sandbox for retry`, + 'failing', 'sandbox' + ); + + res.json({ + success: true, + dispensaryId, + stage: 'sandbox', + name: disp.name, + }); + } catch (error: any) { + console.error('[Pipeline] Retry error:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +// ============================================================ +// PIPELINE STATS +// ============================================================ + +/** + * GET /api/pipeline/stats + * Get counts for each stage + */ +router.get('/stats', async (_req: Request, res: Response) => { + try { + // Discovery locations by stage + const { rows: discoveryStats } = await pool.query(` + SELECT stage, COUNT(*) as count + FROM dutchie_discovery_locations + WHERE active = true + GROUP BY stage + `); + + // Dispensaries by stage + const { rows: dispensaryStats } = await pool.query(` + SELECT stage, COUNT(*) as count + FROM dispensaries + WHERE crawl_enabled = true + GROUP BY stage + `); + + // By state for dispensaries + const { rows: byState } = await pool.query(` + SELECT state, stage, COUNT(*) as count + FROM dispensaries + WHERE crawl_enabled = true AND state IS NOT NULL + GROUP BY state, stage + ORDER BY state, stage + `); + + res.json({ + discovery: discoveryStats.reduce((acc: Record, r: any) => { + acc[r.stage || 'unknown'] = parseInt(r.count, 10); + return acc; + }, {}), + dispensaries: dispensaryStats.reduce((acc: Record, r: any) => { + acc[r.stage || 'unknown'] = parseInt(r.count, 10); + return acc; + }, {}), + byState: byState.map((r: any) => ({ + state: r.state, + stage: r.stage, + count: parseInt(r.count, 10), + })), + }); + } catch (error: any) { + console.error('[Pipeline] Stats error:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +/** + * GET /api/pipeline/transitions + * Get recent stage transitions + */ +router.get('/transitions', async (req: Request, res: Response) => { + try { + const { limit = '50', entityType, toStage } = req.query; + + let whereClause = 'WHERE 1=1'; + const params: any[] = []; + let paramIndex = 1; + + if (entityType) { + whereClause += ` AND entity_type = $${paramIndex}`; + params.push(entityType); + paramIndex++; + } + + if (toStage) { + whereClause += ` AND to_stage = $${paramIndex}`; + params.push(toStage); + paramIndex++; + } + + params.push(parseInt(limit as string, 10)); + + const { rows } = await pool.query(` + SELECT st.*, + CASE + WHEN st.entity_type = 'dispensary' THEN (SELECT name FROM dispensaries WHERE id = st.entity_id) + WHEN st.entity_type = 'discovery_location' THEN (SELECT name FROM dutchie_discovery_locations WHERE id = st.entity_id) + END as entity_name + FROM stage_transitions st + ${whereClause} + ORDER BY st.created_at DESC + LIMIT $${paramIndex} + `, params); + + res.json({ + transitions: rows.map((r: any) => ({ + id: r.id, + entityType: r.entity_type, + entityId: r.entity_id, + entityName: r.entity_name, + fromStage: r.from_stage, + toStage: r.to_stage, + triggerType: r.trigger_type, + triggerEndpoint: r.trigger_endpoint, + success: r.success, + errorMessage: r.error_message, + metadata: r.metadata, + durationMs: r.duration_ms, + createdAt: r.created_at, + })), + }); + } catch (error: any) { + console.error('[Pipeline] Transitions error:', error.message); + res.status(500).json({ error: error.message }); + } +}); + +export default router; diff --git a/backend/src/routes/price-analytics.ts b/backend/src/routes/price-analytics.ts new file mode 100644 index 00000000..31cafb1b --- /dev/null +++ b/backend/src/routes/price-analytics.ts @@ -0,0 +1,472 @@ +/** + * Price Analytics API Routes + * + * Endpoints for price history, specials, and price comparison analytics + * Uses the new product_variants and product_variant_snapshots tables + */ + +import { Router, Request, Response } from 'express'; +import { pool } from '../db/pool'; + +const router = Router(); + +// ============================================================ +// PRICE HISTORY +// ============================================================ + +/** + * GET /api/price-analytics/products/:id/history + * Get price and stock history for a product variant + * + * Query params: + * - days: Number of days to look back (default: 30, max: 90) + * - option: Specific variant option (e.g., "1g", "3.5g") + */ +router.get('/products/:id/history', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const { days = '30', option } = req.query; + const daysNum = Math.min(parseInt(days as string, 10) || 30, 90); + + // Get product info + const productResult = await pool.query(` + SELECT + sp.id, sp.name_raw as name, sp.brand_name_raw as brand, + sp.category_raw as category, sp.dispensary_id, + d.name as dispensary_name + FROM store_products sp + JOIN dispensaries d ON d.id = sp.dispensary_id + WHERE sp.id = $1 + `, [id]); + + if (productResult.rows.length === 0) { + return res.status(404).json({ error: 'Product not found' }); + } + + const product = productResult.rows[0]; + + // Get variant history + let variantQuery = ` + SELECT + pv.id as variant_id, + pv.option, + pvs.price_rec, + pvs.price_med, + pvs.price_rec_special, + pvs.price_med_special, + pvs.quantity, + pvs.in_stock, + pvs.is_on_special, + pvs.captured_at + FROM product_variant_snapshots pvs + JOIN product_variants pv ON pv.id = pvs.product_variant_id + WHERE pv.store_product_id = $1 + AND pvs.captured_at >= NOW() - ($2 || ' days')::INTERVAL + `; + const params: any[] = [id, daysNum]; + + if (option) { + variantQuery += ` AND pv.option = $3`; + params.push(option); + } + + variantQuery += ` ORDER BY pv.option, pvs.captured_at ASC`; + + const historyResult = await pool.query(variantQuery, params); + + // Get current variants + const currentResult = await pool.query(` + SELECT + id, option, price_rec, price_med, price_rec_special, price_med_special, + quantity, in_stock, is_on_special, last_price_change_at, last_stock_change_at + FROM product_variants + WHERE store_product_id = $1 + ORDER BY option + `, [id]); + + // Get sale stats using the function + const saleStatsResult = await pool.query(` + SELECT + pv.option, + (get_variant_sale_stats(pv.id, $2)).* + FROM product_variants pv + WHERE pv.store_product_id = $1 + `, [id, daysNum]); + + // Group history by variant + const historyByVariant: Record = {}; + for (const row of historyResult.rows) { + if (!historyByVariant[row.option]) { + historyByVariant[row.option] = []; + } + historyByVariant[row.option].push({ + price_rec: row.price_rec ? parseFloat(row.price_rec) : null, + price_med: row.price_med ? parseFloat(row.price_med) : null, + price_rec_special: row.price_rec_special ? parseFloat(row.price_rec_special) : null, + quantity: row.quantity, + in_stock: row.in_stock, + is_on_special: row.is_on_special, + captured_at: row.captured_at, + }); + } + + res.json({ + product: { + id: product.id, + name: product.name, + brand: product.brand, + category: product.category, + dispensary_id: product.dispensary_id, + dispensary_name: product.dispensary_name, + }, + current_variants: currentResult.rows.map((v: any) => ({ + ...v, + price_rec: v.price_rec ? parseFloat(v.price_rec) : null, + price_med: v.price_med ? parseFloat(v.price_med) : null, + price_rec_special: v.price_rec_special ? parseFloat(v.price_rec_special) : null, + })), + history: historyByVariant, + sale_stats: saleStatsResult.rows.reduce((acc: any, row: any) => { + acc[row.option] = { + total_snapshots: parseInt(row.total_snapshots), + times_on_special: parseInt(row.times_on_special), + special_frequency_pct: row.special_frequency_pct ? parseFloat(row.special_frequency_pct) : 0, + avg_discount_pct: row.avg_discount_pct ? parseFloat(row.avg_discount_pct) : null, + min_price: row.min_price ? parseFloat(row.min_price) : null, + max_price: row.max_price ? parseFloat(row.max_price) : null, + avg_price: row.avg_price ? parseFloat(row.avg_price) : null, + }; + return acc; + }, {}), + days: daysNum, + }); + } catch (error: any) { + console.error('Product history error:', error); + res.status(500).json({ error: 'Failed to fetch product history', message: error.message }); + } +}); + +// ============================================================ +// CURRENT SPECIALS +// ============================================================ + +/** + * GET /api/price-analytics/specials + * Get all products currently on special + * + * Query params: + * - state: Filter by state code + * - city: Filter by city + * - category: Filter by category + * - min_discount: Minimum discount percentage + * - limit: Max results (default: 100, max: 500) + * - offset: Pagination offset + */ +router.get('/specials', async (req: Request, res: Response) => { + try { + const { + state, + city, + category, + min_discount = '0', + limit = '100', + offset = '0', + } = req.query; + + const limitNum = Math.min(parseInt(limit as string, 10) || 100, 500); + const offsetNum = parseInt(offset as string, 10) || 0; + const minDiscountNum = parseFloat(min_discount as string) || 0; + + let whereClause = `WHERE pv.is_on_special = TRUE AND pv.in_stock = TRUE`; + const params: any[] = []; + let paramIndex = 1; + + if (state) { + whereClause += ` AND d.state = $${paramIndex}`; + params.push(state); + paramIndex++; + } + + if (city) { + whereClause += ` AND LOWER(d.city) LIKE LOWER($${paramIndex})`; + params.push(`%${city}%`); + paramIndex++; + } + + if (category) { + whereClause += ` AND LOWER(sp.category_raw) = LOWER($${paramIndex})`; + params.push(category); + paramIndex++; + } + + // Calculate discount and filter + const discountCalc = `ROUND(((pv.price_rec - pv.price_rec_special) / NULLIF(pv.price_rec, 0)) * 100, 1)`; + + if (minDiscountNum > 0) { + whereClause += ` AND ${discountCalc} >= $${paramIndex}`; + params.push(minDiscountNum); + paramIndex++; + } + + params.push(limitNum, offsetNum); + + const { rows: specials } = await pool.query(` + SELECT + pv.id as variant_id, + sp.id as product_id, + sp.name_raw as product_name, + sp.brand_name_raw as brand_name, + sp.category_raw as category, + sp.image_url, + d.id as dispensary_id, + d.name as dispensary_name, + d.city, + d.state, + pv.option, + pv.price_rec, + pv.price_rec_special, + ${discountCalc} as discount_percent, + pv.quantity, + pv.last_seen_at + FROM product_variants pv + JOIN store_products sp ON sp.id = pv.store_product_id + JOIN dispensaries d ON d.id = pv.dispensary_id + ${whereClause} + AND pv.price_rec_special IS NOT NULL + AND pv.price_rec_special < pv.price_rec + ORDER BY ${discountCalc} DESC + LIMIT $${paramIndex} OFFSET $${paramIndex + 1} + `, params); + + // Get count + const countParams = params.slice(0, -2); + const { rows: countRows } = await pool.query(` + SELECT COUNT(*) as total + FROM product_variants pv + JOIN store_products sp ON sp.id = pv.store_product_id + JOIN dispensaries d ON d.id = pv.dispensary_id + ${whereClause} + AND pv.price_rec_special IS NOT NULL + AND pv.price_rec_special < pv.price_rec + `, countParams); + + res.json({ + specials: specials.map((s: any) => ({ + ...s, + price_rec: s.price_rec ? parseFloat(s.price_rec) : null, + price_rec_special: s.price_rec_special ? parseFloat(s.price_rec_special) : null, + discount_percent: s.discount_percent ? parseFloat(s.discount_percent) : null, + })), + pagination: { + total: parseInt(countRows[0]?.total || '0', 10), + limit: limitNum, + offset: offsetNum, + has_more: offsetNum + specials.length < parseInt(countRows[0]?.total || '0', 10), + }, + }); + } catch (error: any) { + console.error('Specials error:', error); + res.status(500).json({ error: 'Failed to fetch specials', message: error.message }); + } +}); + +// ============================================================ +// PRICE COMPARISON +// ============================================================ + +/** + * GET /api/price-analytics/compare + * Compare prices for a product across stores + * + * Query params: + * - name: Product name to search + * - option: Variant option (e.g., "1g", "3.5g") + * - state: Filter by state + * - limit: Max results (default: 50) + */ +router.get('/compare', async (req: Request, res: Response) => { + try { + const { name, option, state, limit = '50' } = req.query; + + if (!name) { + return res.status(400).json({ error: 'Product name is required' }); + } + + const limitNum = Math.min(parseInt(limit as string, 10) || 50, 200); + + let whereClause = `WHERE sp.name_raw ILIKE $1 AND pv.in_stock = TRUE`; + const params: any[] = [`%${name}%`]; + let paramIndex = 2; + + if (option) { + whereClause += ` AND pv.option = $${paramIndex}`; + params.push(option); + paramIndex++; + } + + if (state) { + whereClause += ` AND d.state = $${paramIndex}`; + params.push(state); + paramIndex++; + } + + params.push(limitNum); + + const { rows } = await pool.query(` + SELECT + sp.id as product_id, + sp.name_raw as product_name, + sp.brand_name_raw as brand_name, + sp.category_raw as category, + sp.image_url, + d.id as dispensary_id, + d.name as dispensary_name, + d.city, + d.state, + pv.option, + pv.price_rec, + pv.price_rec_special, + pv.is_on_special, + pv.quantity, + COALESCE(pv.price_rec_special, pv.price_rec) as effective_price, + RANK() OVER (PARTITION BY pv.option ORDER BY COALESCE(pv.price_rec_special, pv.price_rec) ASC) as price_rank + FROM product_variants pv + JOIN store_products sp ON sp.id = pv.store_product_id + JOIN dispensaries d ON d.id = pv.dispensary_id + ${whereClause} + AND (pv.price_rec IS NOT NULL OR pv.price_rec_special IS NOT NULL) + ORDER BY pv.option, effective_price ASC + LIMIT $${paramIndex} + `, params); + + // Group by option + const byOption: Record = {}; + for (const row of rows) { + if (!byOption[row.option]) { + byOption[row.option] = []; + } + byOption[row.option].push({ + product_id: row.product_id, + product_name: row.product_name, + brand_name: row.brand_name, + category: row.category, + image_url: row.image_url, + dispensary_id: row.dispensary_id, + dispensary_name: row.dispensary_name, + city: row.city, + state: row.state, + price_rec: row.price_rec ? parseFloat(row.price_rec) : null, + price_rec_special: row.price_rec_special ? parseFloat(row.price_rec_special) : null, + effective_price: row.effective_price ? parseFloat(row.effective_price) : null, + is_on_special: row.is_on_special, + quantity: row.quantity, + price_rank: parseInt(row.price_rank), + }); + } + + // Calculate stats per option + const stats: Record = {}; + for (const [opt, items] of Object.entries(byOption)) { + const prices = items.map((i: any) => i.effective_price).filter((p: any) => p !== null); + stats[opt] = { + count: items.length, + min_price: Math.min(...prices), + max_price: Math.max(...prices), + avg_price: prices.reduce((a: number, b: number) => a + b, 0) / prices.length, + cheapest_store: items[0]?.dispensary_name, + on_special_count: items.filter((i: any) => i.is_on_special).length, + }; + } + + res.json({ + search_term: name, + results: byOption, + stats, + options: Object.keys(byOption), + }); + } catch (error: any) { + console.error('Price compare error:', error); + res.status(500).json({ error: 'Failed to compare prices', message: error.message }); + } +}); + +// ============================================================ +// MARKET SUMMARY +// ============================================================ + +/** + * GET /api/price-analytics/market-summary + * Get overall market analytics summary + */ +router.get('/market-summary', async (req: Request, res: Response) => { + try { + const { state } = req.query; + + let stateFilter = ''; + const params: any[] = []; + if (state) { + stateFilter = 'WHERE d.state = $1'; + params.push(state); + } + + // Get variant counts + const variantStats = await pool.query(` + SELECT + COUNT(DISTINCT pv.id) as total_variants, + COUNT(DISTINCT pv.id) FILTER (WHERE pv.is_on_special) as on_special, + COUNT(DISTINCT pv.id) FILTER (WHERE pv.in_stock) as in_stock, + COUNT(DISTINCT pv.store_product_id) as total_products, + COUNT(DISTINCT pv.dispensary_id) as total_stores + FROM product_variants pv + JOIN dispensaries d ON d.id = pv.dispensary_id + ${stateFilter} + `, params); + + // Get category breakdown + const categoryStats = await pool.query(` + SELECT + sp.category_raw as category, + COUNT(DISTINCT pv.id) as variant_count, + AVG(COALESCE(pv.price_rec_special, pv.price_rec)) as avg_price, + COUNT(DISTINCT pv.id) FILTER (WHERE pv.is_on_special) as on_special_count + FROM product_variants pv + JOIN store_products sp ON sp.id = pv.store_product_id + JOIN dispensaries d ON d.id = pv.dispensary_id + ${stateFilter} + GROUP BY sp.category_raw + ORDER BY variant_count DESC + LIMIT 10 + `, params); + + // Get recent price changes (last 24h) + const recentChanges = await pool.query(` + SELECT COUNT(*) as price_changes_24h + FROM product_variants pv + JOIN dispensaries d ON d.id = pv.dispensary_id + ${stateFilter ? stateFilter + ' AND' : 'WHERE'} + pv.last_price_change_at >= NOW() - INTERVAL '24 hours' + `, params); + + res.json({ + summary: { + total_variants: parseInt(variantStats.rows[0]?.total_variants || '0'), + on_special: parseInt(variantStats.rows[0]?.on_special || '0'), + in_stock: parseInt(variantStats.rows[0]?.in_stock || '0'), + total_products: parseInt(variantStats.rows[0]?.total_products || '0'), + total_stores: parseInt(variantStats.rows[0]?.total_stores || '0'), + price_changes_24h: parseInt(recentChanges.rows[0]?.price_changes_24h || '0'), + }, + categories: categoryStats.rows.map((c: any) => ({ + category: c.category || 'Unknown', + variant_count: parseInt(c.variant_count), + avg_price: c.avg_price ? parseFloat(c.avg_price).toFixed(2) : null, + on_special_count: parseInt(c.on_special_count), + })), + }); + } catch (error: any) { + console.error('Market summary error:', error); + res.status(500).json({ error: 'Failed to fetch market summary', message: error.message }); + } +}); + +export default router; diff --git a/backend/src/routes/products.ts b/backend/src/routes/products.ts index 443e39a8..771be3cf 100755 --- a/backend/src/routes/products.ts +++ b/backend/src/routes/products.ts @@ -101,10 +101,27 @@ router.get('/', async (req, res) => { const sortDirection = (sort_order as string).toLowerCase() === 'asc' ? 'ASC' : 'DESC'; let query = ` - SELECT p.*, s.name as store_name, c.name as category_name - FROM products p - LEFT JOIN stores s ON p.store_id = s.id - LEFT JOIN categories c ON p.category_id = c.id + SELECT + p.id, + p.dispensary_id as store_id, + p.name_raw as name, + p.brand_name_raw as brand, + p.category_raw as category_name, + p.subcategory_raw as subcategory, + p.description, + p.price_rec as price, + p.thc_percent as thc_percentage, + p.cbd_percent as cbd_percentage, + p.strain_type, + p.primary_image_url as image_url, + p.stock_status, + p.stock_status = 'in_stock' as in_stock, + p.created_at, + p.updated_at, + p.last_seen_at, + d.name as store_name + FROM store_products p + LEFT JOIN dispensaries d ON p.dispensary_id = d.id WHERE 1=1 `; const params: any[] = []; @@ -112,61 +129,60 @@ router.get('/', async (req, res) => { // Store filter if (store_id) { - query += ` AND p.store_id = $${paramCount}`; + query += ` AND p.dispensary_id = $${paramCount}`; params.push(store_id); paramCount++; } - // Category filter + // Category filter (uses category name now) if (category_id) { - query += ` AND p.category_id = $${paramCount}`; + query += ` AND p.category_raw = $${paramCount}`; params.push(category_id); paramCount++; } // Stock filter if (in_stock !== undefined) { - query += ` AND p.in_stock = $${paramCount}`; - params.push(in_stock === 'true'); - paramCount++; + const inStockVal = in_stock === 'true'; + query += inStockVal ? ` AND p.stock_status = 'in_stock'` : ` AND p.stock_status != 'in_stock'`; } // Search filter if (search) { - query += ` AND (p.name ILIKE $${paramCount} OR p.brand ILIKE $${paramCount} OR p.description ILIKE $${paramCount})`; + query += ` AND (p.name_raw ILIKE $${paramCount} OR p.brand_name_raw ILIKE $${paramCount} OR p.description ILIKE $${paramCount})`; params.push(`%${search}%`); paramCount++; } // Brand filter if (brand) { - query += ` AND p.brand ILIKE $${paramCount}`; + query += ` AND p.brand_name_raw ILIKE $${paramCount}`; params.push(`%${brand}%`); paramCount++; } // Price range filter if (min_price) { - query += ` AND p.price >= $${paramCount}`; + query += ` AND p.price_rec >= $${paramCount}`; params.push(parseFloat(min_price as string)); paramCount++; } if (max_price) { - query += ` AND p.price <= $${paramCount}`; + query += ` AND p.price_rec <= $${paramCount}`; params.push(parseFloat(max_price as string)); paramCount++; } // THC range filter if (min_thc) { - query += ` AND p.thc_percentage >= $${paramCount}`; + query += ` AND p.thc_percent >= $${paramCount}`; params.push(parseFloat(min_thc as string)); paramCount++; } if (max_thc) { - query += ` AND p.thc_percentage <= $${paramCount}`; + query += ` AND p.thc_percent <= $${paramCount}`; params.push(parseFloat(max_thc as string)); paramCount++; } @@ -199,60 +215,59 @@ router.get('/', async (req, res) => { } // Get total count (reuse same filters) - let countQuery = `SELECT COUNT(*) FROM products p WHERE 1=1`; + let countQuery = `SELECT COUNT(*) FROM store_products p WHERE 1=1`; const countParams: any[] = []; let countParamCount = 1; if (store_id) { - countQuery += ` AND p.store_id = $${countParamCount}`; + countQuery += ` AND p.dispensary_id = $${countParamCount}`; countParams.push(store_id); countParamCount++; } if (category_id) { - countQuery += ` AND p.category_id = $${countParamCount}`; + countQuery += ` AND p.category_raw = $${countParamCount}`; countParams.push(category_id); countParamCount++; } if (in_stock !== undefined) { - countQuery += ` AND p.in_stock = $${countParamCount}`; - countParams.push(in_stock === 'true'); - countParamCount++; + const inStockVal = in_stock === 'true'; + countQuery += inStockVal ? ` AND p.stock_status = 'in_stock'` : ` AND p.stock_status != 'in_stock'`; } if (search) { - countQuery += ` AND (p.name ILIKE $${countParamCount} OR p.brand ILIKE $${countParamCount} OR p.description ILIKE $${countParamCount})`; + countQuery += ` AND (p.name_raw ILIKE $${countParamCount} OR p.brand_name_raw ILIKE $${countParamCount} OR p.description ILIKE $${countParamCount})`; countParams.push(`%${search}%`); countParamCount++; } if (brand) { - countQuery += ` AND p.brand ILIKE $${countParamCount}`; + countQuery += ` AND p.brand_name_raw ILIKE $${countParamCount}`; countParams.push(`%${brand}%`); countParamCount++; } if (min_price) { - countQuery += ` AND p.price >= $${countParamCount}`; + countQuery += ` AND p.price_rec >= $${countParamCount}`; countParams.push(parseFloat(min_price as string)); countParamCount++; } if (max_price) { - countQuery += ` AND p.price <= $${countParamCount}`; + countQuery += ` AND p.price_rec <= $${countParamCount}`; countParams.push(parseFloat(max_price as string)); countParamCount++; } if (min_thc) { - countQuery += ` AND p.thc_percentage >= $${countParamCount}`; + countQuery += ` AND p.thc_percent >= $${countParamCount}`; countParams.push(parseFloat(min_thc as string)); countParamCount++; } if (max_thc) { - countQuery += ` AND p.thc_percentage <= $${countParamCount}`; + countQuery += ` AND p.thc_percent <= $${countParamCount}`; countParams.push(parseFloat(max_thc as string)); countParamCount++; } @@ -271,7 +286,7 @@ router.get('/', async (req, res) => { if (store_id) { const storeResult = await pool.query( - 'SELECT id, name, last_scraped_at FROM stores WHERE id = $1', + 'SELECT id, name, last_crawled_at as last_scraped_at FROM dispensaries WHERE id = $1', [store_id] ); if (storeResult.rows.length > 0) { @@ -322,10 +337,27 @@ router.get('/:id', async (req, res) => { const { fields } = req.query; const result = await pool.query(` - SELECT p.*, s.name as store_name, c.name as category_name - FROM products p - LEFT JOIN stores s ON p.store_id = s.id - LEFT JOIN categories c ON p.category_id = c.id + SELECT + p.id, + p.dispensary_id as store_id, + p.name_raw as name, + p.brand_name_raw as brand, + p.category_raw as category_name, + p.subcategory_raw as subcategory, + p.description, + p.price_rec as price, + p.thc_percent as thc_percentage, + p.cbd_percent as cbd_percentage, + p.strain_type, + p.primary_image_url as image_url, + p.stock_status, + p.stock_status = 'in_stock' as in_stock, + p.created_at, + p.updated_at, + p.last_seen_at, + d.name as store_name + FROM store_products p + LEFT JOIN dispensaries d ON p.dispensary_id = d.id WHERE p.id = $1 `, [id]); @@ -359,18 +391,18 @@ router.get('/meta/brands', async (req, res) => { const { store_id } = req.query; let query = ` - SELECT DISTINCT brand - FROM products - WHERE brand IS NOT NULL AND brand != '' + SELECT DISTINCT brand_name_raw as brand + FROM store_products + WHERE brand_name_raw IS NOT NULL AND brand_name_raw != '' `; const params: any[] = []; if (store_id) { - query += ' AND store_id = $1'; + query += ' AND dispensary_id = $1'; params.push(store_id); } - query += ' ORDER BY brand'; + query += ' ORDER BY brand_name_raw'; const result = await pool.query(query, params); const brands = result.rows.map((row: { brand: string }) => row.brand); @@ -389,16 +421,16 @@ router.get('/meta/price-range', async (req, res) => { let query = ` SELECT - MIN(price) as min_price, - MAX(price) as max_price, - AVG(price) as avg_price - FROM products - WHERE price IS NOT NULL + MIN(price_rec) as min_price, + MAX(price_rec) as max_price, + AVG(price_rec) as avg_price + FROM store_products + WHERE price_rec IS NOT NULL `; const params: any[] = []; if (store_id) { - query += ' AND store_id = $1'; + query += ' AND dispensary_id = $1'; params.push(store_id); } @@ -415,4 +447,133 @@ router.get('/meta/price-range', async (req, res) => { } }); +// Get product stats - inventory movement, price history, etc. +router.get('/:id/stats', async (req, res) => { + try { + const { id } = req.params; + + // Get current product info + const productResult = await pool.query(` + SELECT id, name_raw as name, stock_quantity, total_quantity_available, + price_rec, price_rec_special, price_med, price_med_special, + first_seen_at, last_seen_at + FROM store_products + WHERE id = $1 + `, [id]); + + if (productResult.rows.length === 0) { + return res.status(404).json({ error: 'Product not found' }); + } + + const product = productResult.rows[0]; + const currentQty = product.stock_quantity || product.total_quantity_available || 0; + const currentPrice = parseFloat(product.price_rec) || 0; + + // Get snapshot history for the last 30 days + const historyResult = await pool.query(` + SELECT + DATE(crawled_at) as date, + AVG(COALESCE(stock_quantity, total_quantity_available, 0)) as avg_quantity, + MIN(COALESCE(stock_quantity, total_quantity_available, 0)) as min_quantity, + MAX(COALESCE(stock_quantity, total_quantity_available, 0)) as max_quantity, + AVG(price_rec) as avg_price, + MIN(price_rec) as min_price, + MAX(price_rec) as max_price, + COUNT(*) as snapshot_count + FROM store_product_snapshots + WHERE store_product_id = $1 + AND crawled_at >= NOW() - INTERVAL '30 days' + GROUP BY DATE(crawled_at) + ORDER BY date DESC + `, [id]); + + // Calculate inventory movement stats + const history = historyResult.rows; + const today = history[0] || null; + const weekAgo = history.find((h: any) => { + const date = new Date(h.date); + const diff = (Date.now() - date.getTime()) / (1000 * 60 * 60 * 24); + return diff >= 6 && diff <= 8; + }); + const monthAgo = history.find((h: any) => { + const date = new Date(h.date); + const diff = (Date.now() - date.getTime()) / (1000 * 60 * 60 * 24); + return diff >= 27 && diff <= 31; + }); + + // Inventory movement calculations + const inventoryStats = { + current: currentQty, + daily: today ? { + change: currentQty - (parseFloat(today.avg_quantity) || 0), + start: parseFloat(today.avg_quantity) || 0, + end: currentQty + } : null, + weekly: weekAgo ? { + change: currentQty - (parseFloat(weekAgo.avg_quantity) || 0), + start: parseFloat(weekAgo.avg_quantity) || 0, + end: currentQty, + percent_change: weekAgo.avg_quantity > 0 + ? ((currentQty - parseFloat(weekAgo.avg_quantity)) / parseFloat(weekAgo.avg_quantity) * 100).toFixed(1) + : null + } : null, + monthly: monthAgo ? { + change: currentQty - (parseFloat(monthAgo.avg_quantity) || 0), + start: parseFloat(monthAgo.avg_quantity) || 0, + end: currentQty, + percent_change: monthAgo.avg_quantity > 0 + ? ((currentQty - parseFloat(monthAgo.avg_quantity)) / parseFloat(monthAgo.avg_quantity) * 100).toFixed(1) + : null + } : null + }; + + // Price movement calculations + const priceStats = { + current: currentPrice, + weekly: weekAgo ? { + change: currentPrice - (parseFloat(weekAgo.avg_price) || 0), + start: parseFloat(weekAgo.avg_price) || 0, + end: currentPrice, + percent_change: weekAgo.avg_price > 0 + ? ((currentPrice - parseFloat(weekAgo.avg_price)) / parseFloat(weekAgo.avg_price) * 100).toFixed(1) + : null + } : null, + monthly: monthAgo ? { + change: currentPrice - (parseFloat(monthAgo.avg_price) || 0), + start: parseFloat(monthAgo.avg_price) || 0, + end: currentPrice, + percent_change: monthAgo.avg_price > 0 + ? ((currentPrice - parseFloat(monthAgo.avg_price)) / parseFloat(monthAgo.avg_price) * 100).toFixed(1) + : null + } : null + }; + + // Get total snapshots count + const snapshotCountResult = await pool.query(` + SELECT COUNT(*) as total_snapshots + FROM store_product_snapshots + WHERE store_product_id = $1 + `, [id]); + + res.json({ + product_id: parseInt(id), + product_name: product.name, + first_seen: product.first_seen_at, + last_seen: product.last_seen_at, + total_snapshots: parseInt(snapshotCountResult.rows[0].total_snapshots), + inventory: inventoryStats, + price: priceStats, + history: history.slice(0, 30).map((h: any) => ({ + date: h.date, + avg_quantity: parseFloat(h.avg_quantity) || 0, + avg_price: parseFloat(h.avg_price) || 0, + snapshots: parseInt(h.snapshot_count) + })) + }); + } catch (error) { + console.error('Error fetching product stats:', error); + res.status(500).json({ error: 'Failed to fetch product stats' }); + } +}); + export default router; diff --git a/backend/src/routes/public-api.ts b/backend/src/routes/public-api.ts index c4a6b6b1..f8736ca4 100644 --- a/backend/src/routes/public-api.ts +++ b/backend/src/routes/public-api.ts @@ -313,6 +313,8 @@ function getScopedDispensaryId(req: PublicApiRequest): { dispensaryId: number | * - dispensary_id: (internal keys only) Filter by specific dispensary * - sort_by: Sort field (name, price, thc, updated) (default: name) * - sort_dir: Sort direction (asc, desc) (default: asc) + * - pricing_type: Price type to return (rec, med, all) (default: rec) + * - include_variants: Include per-variant pricing/inventory (true/false) (default: false) */ router.get('/products', async (req: PublicApiRequest, res: Response) => { try { @@ -341,7 +343,9 @@ router.get('/products', async (req: PublicApiRequest, res: Response) => { limit = '100', offset = '0', sort_by = 'name', - sort_dir = 'asc' + sort_dir = 'asc', + pricing_type = 'rec', + include_variants = 'false' } = req.query; // Build query @@ -367,9 +371,9 @@ router.get('/products', async (req: PublicApiRequest, res: Response) => { whereClause += ` AND p.stock_status = 'in_stock'`; } - // Filter by category (maps to 'type' in dutchie_az) + // Filter by category if (category) { - whereClause += ` AND LOWER(p.type) = LOWER($${paramIndex})`; + whereClause += ` AND LOWER(p.category) = LOWER($${paramIndex})`; params.push(category); paramIndex++; } @@ -390,19 +394,19 @@ router.get('/products', async (req: PublicApiRequest, res: Response) => { // Filter by THC range if (min_thc) { - whereClause += ` AND CAST(NULLIF(p.thc, '') AS NUMERIC) >= $${paramIndex}`; + whereClause += ` AND p.thc_percent >= $${paramIndex}`; params.push(parseFloat(min_thc as string)); paramIndex++; } if (max_thc) { - whereClause += ` AND CAST(NULLIF(p.thc, '') AS NUMERIC) <= $${paramIndex}`; + whereClause += ` AND p.thc_percent <= $${paramIndex}`; params.push(parseFloat(max_thc as string)); paramIndex++; } // Filter by on special if (on_special === 'true' || on_special === '1') { - whereClause += ` AND s.special = TRUE`; + whereClause += ` AND s.is_on_special = TRUE`; } // Search by name or brand @@ -416,15 +420,16 @@ router.get('/products', async (req: PublicApiRequest, res: Response) => { const limitNum = Math.min(parseInt(limit as string, 10) || 100, 500); const offsetNum = parseInt(offset as string, 10) || 0; - // Build ORDER BY clause + // Build ORDER BY clause (use pricing_type for price sorting) const sortDirection = sort_dir === 'desc' ? 'DESC' : 'ASC'; let orderBy = 'p.name ASC'; switch (sort_by) { case 'price': - orderBy = `s.rec_min_price_cents ${sortDirection} NULLS LAST`; + const sortPriceCol = pricing_type === 'med' ? 's.price_med' : 's.price_rec'; + orderBy = `${sortPriceCol} ${sortDirection} NULLS LAST`; break; case 'thc': - orderBy = `CAST(NULLIF(p.thc, '') AS NUMERIC) ${sortDirection} NULLS LAST`; + orderBy = `p.thc_percent ${sortDirection} NULLS LAST`; break; case 'updated': orderBy = `p.updated_at ${sortDirection}`; @@ -436,80 +441,91 @@ router.get('/products', async (req: PublicApiRequest, res: Response) => { params.push(limitNum, offsetNum); + // Determine which price column to use for filtering based on pricing_type + const priceColumn = pricing_type === 'med' ? 's.price_med' : 's.price_rec'; + // Query products with latest snapshot data - // Note: Price filters use HAVING clause since they reference the snapshot subquery + // Uses store_products + v_product_snapshots (canonical tables with raw_data) const { rows: products } = await pool.query(` SELECT p.id, p.dispensary_id, - p.external_product_id as dutchie_id, + p.provider_product_id as dutchie_id, p.name, p.brand_name as brand, - p.type as category, + p.category, p.subcategory, p.strain_type, p.stock_status, - p.thc, - p.cbd, - p.primary_image_url as image_url, - p.images, - p.effects, + p.thc_percent as thc, + p.cbd_percent as cbd, + p.image_url, p.created_at, p.updated_at, - s.rec_min_price_cents, - s.rec_max_price_cents, - s.rec_min_special_price_cents, - s.med_min_price_cents, - s.med_max_price_cents, - s.med_min_special_price_cents, - s.total_quantity_available, - s.options, - s.special, - s.crawled_at as snapshot_at - FROM dutchie_products p + s.price_rec, + s.price_med, + s.price_rec_special, + s.price_med_special, + s.stock_quantity as total_quantity_available, + s.is_on_special as special, + s.captured_at as snapshot_at, + ${include_variants === 'true' || include_variants === '1' ? "s.raw_data->'POSMetaData'->'children' as variants_raw" : 'NULL as variants_raw'} + FROM store_products p LEFT JOIN LATERAL ( - SELECT * FROM dutchie_product_snapshots - WHERE dutchie_product_id = p.id - ORDER BY crawled_at DESC + SELECT * FROM v_product_snapshots + WHERE store_product_id = p.id + ORDER BY captured_at DESC LIMIT 1 ) s ON true ${whereClause} - ${min_price ? `AND (s.rec_min_price_cents / 100.0) >= ${parseFloat(min_price as string)}` : ''} - ${max_price ? `AND (s.rec_min_price_cents / 100.0) <= ${parseFloat(max_price as string)}` : ''} + ${min_price ? `AND ${priceColumn} >= ${parseFloat(min_price as string)}` : ''} + ${max_price ? `AND ${priceColumn} <= ${parseFloat(max_price as string)}` : ''} ORDER BY ${orderBy} LIMIT $${paramIndex} OFFSET $${paramIndex + 1} `, params); // Get total count for pagination (include price filters if specified) const { rows: countRows } = await pool.query(` - SELECT COUNT(*) as total FROM dutchie_products p + SELECT COUNT(*) as total FROM store_products p LEFT JOIN LATERAL ( - SELECT rec_min_price_cents, special FROM dutchie_product_snapshots - WHERE dutchie_product_id = p.id - ORDER BY crawled_at DESC + SELECT price_rec, price_med, is_on_special FROM v_product_snapshots + WHERE store_product_id = p.id + ORDER BY captured_at DESC LIMIT 1 ) s ON true ${whereClause} - ${min_price ? `AND (s.rec_min_price_cents / 100.0) >= ${parseFloat(min_price as string)}` : ''} - ${max_price ? `AND (s.rec_min_price_cents / 100.0) <= ${parseFloat(max_price as string)}` : ''} + ${min_price ? `AND ${priceColumn} >= ${parseFloat(min_price as string)}` : ''} + ${max_price ? `AND ${priceColumn} <= ${parseFloat(max_price as string)}` : ''} `, params.slice(0, -2)); - // Transform products to backward-compatible format + // Helper to format variants from raw Dutchie data + const formatVariants = (variantsRaw: any[]) => { + if (!variantsRaw || !Array.isArray(variantsRaw)) return []; + return variantsRaw.map((v: any) => ({ + option: v.option || v.key || '', + price_rec: v.recPrice || v.price || null, + price_med: v.medPrice || null, + price_rec_special: v.recSpecialPrice || null, + price_med_special: v.medSpecialPrice || null, + quantity: v.quantityAvailable ?? v.quantity ?? null, + in_stock: (v.quantityAvailable ?? v.quantity ?? 0) > 0, + sku: v.canonicalSKU || null, + canonical_id: v.canonicalID || null, + })); + }; + + // Transform products with pricing_type support const transformedProducts = products.map((p) => { - let imageUrl = p.image_url; - if (!imageUrl && p.images && Array.isArray(p.images) && p.images.length > 0) { - const firstImage = p.images[0]; - imageUrl = typeof firstImage === 'string' ? firstImage : firstImage?.url; - } + // Select price based on pricing_type + const useRecPricing = pricing_type !== 'med'; + const regularPrice = useRecPricing + ? (p.price_rec ? parseFloat(p.price_rec).toFixed(2) : null) + : (p.price_med ? parseFloat(p.price_med).toFixed(2) : null); + const salePrice = useRecPricing + ? (p.price_rec_special ? parseFloat(p.price_rec_special).toFixed(2) : null) + : (p.price_med_special ? parseFloat(p.price_med_special).toFixed(2) : null); - const regularPrice = p.rec_min_price_cents - ? (p.rec_min_price_cents / 100).toFixed(2) - : null; - const salePrice = p.rec_min_special_price_cents - ? (p.rec_min_special_price_cents / 100).toFixed(2) - : null; - - return { + const result: any = { id: p.id, dispensary_id: p.dispensary_id, dutchie_id: p.dutchie_id, @@ -523,16 +539,36 @@ router.get('/products', async (req: PublicApiRequest, res: Response) => { sale_price: salePrice, thc_percentage: p.thc ? parseFloat(p.thc) : null, cbd_percentage: p.cbd ? parseFloat(p.cbd) : null, - image_url: imageUrl || null, + image_url: p.image_url || null, in_stock: p.stock_status === 'in_stock', on_special: p.special || false, - effects: p.effects || [], - options: p.options || [], quantity_available: p.total_quantity_available || 0, created_at: p.created_at, updated_at: p.updated_at, - snapshot_at: p.snapshot_at + snapshot_at: p.snapshot_at, + pricing_type: pricing_type, }; + + // Include both pricing if pricing_type is 'all' + if (pricing_type === 'all') { + result.pricing = { + rec: { + price: p.price_rec ? parseFloat(p.price_rec).toFixed(2) : null, + special_price: p.price_rec_special ? parseFloat(p.price_rec_special).toFixed(2) : null, + }, + med: { + price: p.price_med ? parseFloat(p.price_med).toFixed(2) : null, + special_price: p.price_med_special ? parseFloat(p.price_med_special).toFixed(2) : null, + } + }; + } + + // Include variants if requested + if (include_variants === 'true' || include_variants === '1') { + result.variants = formatVariants(p.variants_raw); + } + + return result; }); res.json({ @@ -578,10 +614,10 @@ router.get('/products/:id', async (req: PublicApiRequest, res: Response) => { s.options, s.special, s.crawled_at as snapshot_at - FROM dutchie_products p + FROM v_products p LEFT JOIN LATERAL ( - SELECT * FROM dutchie_product_snapshots - WHERE dutchie_product_id = p.id + SELECT * FROM v_product_snapshots + WHERE store_product_id = p.id ORDER BY crawled_at DESC LIMIT 1 ) s ON true @@ -682,7 +718,7 @@ router.get('/categories', async (req: PublicApiRequest, res: Response) => { subcategory, COUNT(*) as product_count, COUNT(*) FILTER (WHERE stock_status = 'in_stock') as in_stock_count - FROM dutchie_products + FROM v_products ${whereClause} GROUP BY type, subcategory ORDER BY type, subcategory @@ -737,7 +773,7 @@ router.get('/brands', async (req: PublicApiRequest, res: Response) => { brand_name as brand, COUNT(*) as product_count, COUNT(*) FILTER (WHERE stock_status = 'in_stock') as in_stock_count - FROM dutchie_products + FROM v_products ${whereClause} GROUP BY brand_name ORDER BY product_count DESC @@ -813,10 +849,10 @@ router.get('/specials', async (req: PublicApiRequest, res: Response) => { s.options, p.updated_at, s.crawled_at as snapshot_at - FROM dutchie_products p + FROM v_products p INNER JOIN LATERAL ( - SELECT * FROM dutchie_product_snapshots - WHERE dutchie_product_id = p.id + SELECT * FROM v_product_snapshots + WHERE store_product_id = p.id ORDER BY crawled_at DESC LIMIT 1 ) s ON true @@ -829,10 +865,10 @@ router.get('/specials', async (req: PublicApiRequest, res: Response) => { const countParams = params.slice(0, -2); const { rows: countRows } = await pool.query(` SELECT COUNT(*) as total - FROM dutchie_products p + FROM v_products p INNER JOIN LATERAL ( - SELECT special FROM dutchie_product_snapshots - WHERE dutchie_product_id = p.id + SELECT special FROM v_product_snapshots + WHERE store_product_id = p.id ORDER BY crawled_at DESC LIMIT 1 ) s ON true @@ -934,7 +970,7 @@ router.get('/dispensaries', async (req: PublicApiRequest, res: Response) => { COUNT(*) as product_count, COUNT(*) FILTER (WHERE stock_status = 'in_stock') as in_stock_count, MAX(updated_at) as last_updated - FROM dutchie_products + FROM v_products WHERE dispensary_id = d.id ) pc ON true WHERE d.id = $1 @@ -1041,7 +1077,7 @@ router.get('/dispensaries', async (req: PublicApiRequest, res: Response) => { COUNT(*) as product_count, COUNT(*) FILTER (WHERE stock_status = 'in_stock') as in_stock_count, MAX(updated_at) as last_updated - FROM dutchie_products + FROM v_products WHERE dispensary_id = d.id ) pc ON true ${whereClause} @@ -1055,7 +1091,7 @@ router.get('/dispensaries', async (req: PublicApiRequest, res: Response) => { FROM dispensaries d LEFT JOIN LATERAL ( SELECT COUNT(*) as product_count - FROM dutchie_products + FROM v_products WHERE dispensary_id = d.id ) pc ON true ${whereClause} @@ -1206,10 +1242,10 @@ router.get('/search', async (req: PublicApiRequest, res: Response) => { WHEN LOWER(p.brand_name) LIKE '%' || LOWER($${relevanceParamIndex}) || '%' THEN 60 ELSE 50 END as relevance - FROM dutchie_products p + FROM v_products p LEFT JOIN LATERAL ( - SELECT * FROM dutchie_product_snapshots - WHERE dutchie_product_id = p.id + SELECT * FROM v_product_snapshots + WHERE store_product_id = p.id ORDER BY crawled_at DESC LIMIT 1 ) s ON true @@ -1222,7 +1258,7 @@ router.get('/search', async (req: PublicApiRequest, res: Response) => { const countParams = params.slice(0, paramIndex - 3); // Remove relevance, limit, offset const { rows: countRows } = await pool.query(` SELECT COUNT(*) as total - FROM dutchie_products p + FROM v_products p ${whereClause} `, countParams); @@ -1306,7 +1342,7 @@ router.get('/menu', async (req: PublicApiRequest, res: Response) => { type as category, COUNT(*) as total, COUNT(*) FILTER (WHERE stock_status = 'in_stock') as in_stock - FROM dutchie_products + FROM v_products ${whereClause} AND type IS NOT NULL GROUP BY type ORDER BY total DESC @@ -1320,17 +1356,17 @@ router.get('/menu', async (req: PublicApiRequest, res: Response) => { COUNT(DISTINCT brand_name) as brand_count, COUNT(DISTINCT type) as category_count, MAX(updated_at) as last_updated - FROM dutchie_products + FROM v_products ${whereClause} `, params); // Get specials count const { rows: specialsCount } = await pool.query(` SELECT COUNT(*) as count - FROM dutchie_products p + FROM v_products p INNER JOIN LATERAL ( - SELECT special FROM dutchie_product_snapshots - WHERE dutchie_product_id = p.id + SELECT special FROM v_product_snapshots + WHERE store_product_id = p.id ORDER BY crawled_at DESC LIMIT 1 ) s ON true diff --git a/backend/src/routes/scraper-monitor.ts b/backend/src/routes/scraper-monitor.ts index 8f0025e8..53b3a0fd 100644 --- a/backend/src/routes/scraper-monitor.ts +++ b/backend/src/routes/scraper-monitor.ts @@ -92,9 +92,9 @@ router.get('/history', async (req, res) => { dcj.error_message, ( SELECT COUNT(*) - FROM products p - WHERE p.dispensary_id = d.id - AND p.last_seen_at >= NOW() - INTERVAL '7 days' + FROM store_products sp + WHERE sp.dispensary_id = d.id + AND sp.last_seen_at >= NOW() - INTERVAL '7 days' ) as product_count FROM dispensary_crawl_jobs dcj JOIN dispensaries d ON d.id = dcj.dispensary_id diff --git a/backend/src/routes/seo.ts b/backend/src/routes/seo.ts index 895a02d7..bfaaa77f 100644 --- a/backend/src/routes/seo.ts +++ b/backend/src/routes/seo.ts @@ -10,6 +10,25 @@ import { getPool } from '../db/pool'; import { authMiddleware } from '../auth/middleware'; import { ContentValidator } from '../utils/ContentValidator'; import { generateSeoPageWithClaude } from '../services/seoGenerator'; +import { + getAllSettings, + setSetting, + setMultipleSettings, + resetToDefaults, + ensureSettingsExist, + DEFAULT_SETTINGS, +} from '../seo/settings'; +import { + applyTemplateVariables, + getTemplateForPageType, + generatePreview, + generatePageContent, + regenerateContent, + getAllTemplates, + validateTemplate, + MOCK_DATA, + PageType, +} from '../seo/template-engine'; const router = Router(); @@ -160,10 +179,12 @@ router.get('/pages', authMiddleware, async (req: Request, res: Response) => { const metricsResult = await pool.query(` SELECT COUNT(DISTINCT d.id) as dispensary_count, COUNT(DISTINCT p.id) as product_count, - COUNT(DISTINCT p.brand_name) as brand_count + COUNT(DISTINCT p.brand_name_raw) as brand_count FROM dispensaries d - LEFT JOIN dutchie_products p ON p.dispensary_id = d.id + LEFT JOIN store_products p ON p.dispensary_id = d.id WHERE d.state = $1 + AND d.menu_type = 'dutchie' + AND d.platform_dispensary_id IS NOT NULL `, [stateCode]); const m = metricsResult.rows[0]; metrics = { @@ -199,11 +220,13 @@ router.post('/sync-state-pages', authMiddleware, async (req: Request, res: Respo try { const pool = getPool(); - // Get all states that have dispensaries + // Get all states that have active/crawlable dispensaries const statesResult = await pool.query(` SELECT DISTINCT state, COUNT(*) as dispensary_count FROM dispensaries WHERE state IS NOT NULL AND state != '' + AND menu_type = 'dutchie' + AND platform_dispensary_id IS NOT NULL GROUP BY state HAVING COUNT(*) > 0 ORDER BY state @@ -245,6 +268,45 @@ router.post('/sync-state-pages', authMiddleware, async (req: Request, res: Respo } }); +/** + * GET /api/seo/state-metrics - Get all state metrics for SEO dashboard + */ +router.get('/state-metrics', authMiddleware, async (req: Request, res: Response) => { + try { + const pool = getPool(); + + const result = await pool.query(` + SELECT + d.state as state_code, + COALESCE(s.name, d.state) as state_name, + COUNT(DISTINCT d.id) as dispensary_count, + COUNT(DISTINCT sp.id) as product_count, + COUNT(DISTINCT sp.brand_name_raw) FILTER (WHERE sp.brand_name_raw IS NOT NULL) as brand_count + FROM dispensaries d + LEFT JOIN states s ON d.state = s.code + LEFT JOIN store_products sp ON sp.dispensary_id = d.id + WHERE d.state IS NOT NULL AND d.state != '' + AND d.menu_type = 'dutchie' + AND d.platform_dispensary_id IS NOT NULL + GROUP BY d.state, s.name + ORDER BY dispensary_count DESC + `); + + const states = result.rows.map(row => ({ + stateCode: row.state_code, + stateName: row.state_name || row.state_code, + dispensaryCount: parseInt(row.dispensary_count, 10) || 0, + productCount: parseInt(row.product_count, 10) || 0, + brandCount: parseInt(row.brand_count, 10) || 0, + })); + + res.json({ states }); + } catch (error: any) { + console.error('[SEO] Error fetching state metrics:', error.message); + res.status(500).json({ error: 'Failed to fetch state metrics' }); + } +}); + /** * GET /api/seo/state/:stateCode - State SEO data with metrics */ @@ -257,16 +319,20 @@ router.get('/state/:stateCode', async (req: Request, res: Response) => { const metricsResult = await pool.query(` SELECT COUNT(DISTINCT d.id) as dispensary_count, COUNT(DISTINCT p.id) as product_count, - COUNT(DISTINCT p.brand_name) as brand_count + COUNT(DISTINCT p.brand_name_raw) as brand_count FROM dispensaries d - LEFT JOIN dutchie_products p ON p.dispensary_id = d.id - WHERE d.state = $1`, [code]); + LEFT JOIN store_products p ON p.dispensary_id = d.id + WHERE d.state = $1 + AND d.menu_type = 'dutchie' + AND d.platform_dispensary_id IS NOT NULL`, [code]); const brandsResult = await pool.query(` - SELECT brand_name, COUNT(*) as product_count - FROM dutchie_products p JOIN dispensaries d ON p.dispensary_id = d.id - WHERE d.state = $1 AND p.brand_name IS NOT NULL - GROUP BY brand_name ORDER BY product_count DESC LIMIT 10`, [code]); + SELECT brand_name_raw as brand_name, COUNT(*) as product_count + FROM store_products p JOIN dispensaries d ON p.dispensary_id = d.id + WHERE d.state = $1 AND p.brand_name_raw IS NOT NULL + AND d.menu_type = 'dutchie' + AND d.platform_dispensary_id IS NOT NULL + GROUP BY brand_name_raw ORDER BY product_count DESC LIMIT 10`, [code]); const metrics = metricsResult.rows[0]; const response = ContentValidator.sanitizeContent({ @@ -359,4 +425,259 @@ router.get('/public/content', async (req: Request, res: Response) => { } }); +// ============================================================================ +// SEO Settings Endpoints +// ============================================================================ + +/** + * GET /api/seo/settings - Get all SEO settings + */ +router.get('/settings', authMiddleware, async (req: Request, res: Response) => { + try { + // Ensure settings exist on first access + await ensureSettingsExist(); + + const settings = await getAllSettings(); + res.json({ settings }); + } catch (error: any) { + console.error('[SEO] Error fetching settings:', error.message); + res.status(500).json({ error: 'Failed to fetch SEO settings' }); + } +}); + +/** + * POST /api/seo/settings - Save a single setting + */ +router.post('/settings', authMiddleware, async (req: Request, res: Response) => { + try { + const { key, value } = req.body; + + if (!key || typeof key !== 'string') { + return res.status(400).json({ error: 'key is required' }); + } + + if (value === undefined) { + return res.status(400).json({ error: 'value is required' }); + } + + await setSetting(key, value); + + res.json({ success: true, key, value }); + } catch (error: any) { + console.error('[SEO] Error saving setting:', error.message); + res.status(500).json({ error: 'Failed to save SEO setting' }); + } +}); + +/** + * POST /api/seo/settings/bulk - Save multiple settings at once + */ +router.post('/settings/bulk', authMiddleware, async (req: Request, res: Response) => { + try { + const { settings } = req.body; + + if (!settings || typeof settings !== 'object') { + return res.status(400).json({ error: 'settings object is required' }); + } + + await setMultipleSettings(settings); + + res.json({ success: true, count: Object.keys(settings).length }); + } catch (error: any) { + console.error('[SEO] Error saving bulk settings:', error.message); + res.status(500).json({ error: 'Failed to save SEO settings' }); + } +}); + +/** + * POST /api/seo/settings/reset - Reset all settings to defaults + */ +router.post('/settings/reset', authMiddleware, async (req: Request, res: Response) => { + try { + const settings = await resetToDefaults(); + + res.json({ + success: true, + message: 'Settings reset to defaults', + settings, + }); + } catch (error: any) { + console.error('[SEO] Error resetting settings:', error.message); + res.status(500).json({ error: 'Failed to reset SEO settings' }); + } +}); + +/** + * GET /api/seo/settings/defaults - Get default settings (without modifying DB) + */ +router.get('/settings/defaults', authMiddleware, async (req: Request, res: Response) => { + res.json({ settings: DEFAULT_SETTINGS }); +}); + +/** + * GET /api/seo/settings/preview - Preview merged prompt with sample variables + */ +router.post('/settings/preview', authMiddleware, async (req: Request, res: Response) => { + try { + const { template, variables } = req.body; + + if (!template || typeof template !== 'string') { + return res.status(400).json({ error: 'template is required' }); + } + + // Sample variables for preview + const sampleVariables: Record = { + page_type: 'state', + subject: 'Arizona Dispensaries', + focus_areas: 'local stores, product variety, pricing', + tone: 'informational', + length: 'medium', + state_name: 'Arizona', + state_code: 'AZ', + state_code_lower: 'az', + dispensary_count: '150', + improvement_areas: 'SEO keywords, local relevance', + ...variables, + }; + + let preview = template; + for (const [key, value] of Object.entries(sampleVariables)) { + preview = preview.replace(new RegExp(`{{${key}}}`, 'g'), value); + } + + res.json({ preview, variables: sampleVariables }); + } catch (error: any) { + console.error('[SEO] Error generating preview:', error.message); + res.status(500).json({ error: 'Failed to generate preview' }); + } +}); + +// ============================================================================ +// Template Library Endpoints +// ============================================================================ + +/** + * GET /api/seo/templates - Get all templates with metadata + */ +router.get('/templates', authMiddleware, async (req: Request, res: Response) => { + try { + const templates = await getAllTemplates(); + res.json({ templates }); + } catch (error: any) { + console.error('[SEO] Error fetching templates:', error.message); + res.status(500).json({ error: 'Failed to fetch templates' }); + } +}); + +/** + * POST /api/seo/templates/preview - Preview a template with mock data by page type + */ +router.post('/templates/preview', authMiddleware, async (req: Request, res: Response) => { + try { + const { pageType, customTemplate } = req.body; + + if (!pageType || typeof pageType !== 'string') { + return res.status(400).json({ error: 'pageType is required' }); + } + + const result = await generatePreview(pageType, customTemplate); + res.json(result); + } catch (error: any) { + console.error('[SEO] Error generating template preview:', error.message); + res.status(500).json({ error: 'Failed to generate template preview' }); + } +}); + +/** + * POST /api/seo/templates/validate - Validate a template string + */ +router.post('/templates/validate', authMiddleware, async (req: Request, res: Response) => { + try { + const { template } = req.body; + + if (!template || typeof template !== 'string') { + return res.status(400).json({ error: 'template is required' }); + } + + const validation = validateTemplate(template); + res.json(validation); + } catch (error: any) { + console.error('[SEO] Error validating template:', error.message); + res.status(500).json({ error: 'Failed to validate template' }); + } +}); + +/** + * POST /api/seo/templates/generate - Generate content using a template + */ +router.post('/templates/generate', authMiddleware, async (req: Request, res: Response) => { + try { + const { pageType, data } = req.body; + + if (!pageType || typeof pageType !== 'string') { + return res.status(400).json({ error: 'pageType is required' }); + } + + if (!data || typeof data !== 'object') { + return res.status(400).json({ error: 'data object is required' }); + } + + const result = await generatePageContent(pageType, data); + res.json(result); + } catch (error: any) { + console.error('[SEO] Error generating from template:', error.message); + res.status(500).json({ error: 'Failed to generate content from template' }); + } +}); + +/** + * POST /api/seo/templates/regenerate - Regenerate content with improvements + */ +router.post('/templates/regenerate', authMiddleware, async (req: Request, res: Response) => { + try { + const { pageType, originalContent, newData, improvementAreas } = req.body; + + if (!pageType || typeof pageType !== 'string') { + return res.status(400).json({ error: 'pageType is required' }); + } + + if (!originalContent || typeof originalContent !== 'string') { + return res.status(400).json({ error: 'originalContent is required' }); + } + + const result = await regenerateContent( + pageType, + originalContent, + newData || {}, + improvementAreas + ); + + res.json(result); + } catch (error: any) { + console.error('[SEO] Error regenerating content:', error.message); + res.status(500).json({ error: 'Failed to regenerate content' }); + } +}); + +/** + * GET /api/seo/templates/variables/:pageType - Get available variables for a page type + */ +router.get('/templates/variables/:pageType', authMiddleware, async (req: Request, res: Response) => { + try { + const { pageType } = req.params; + const normalizedType = (pageType?.toLowerCase().trim() || 'state') as PageType; + + const mockData = MOCK_DATA[normalizedType] || MOCK_DATA.state; + + res.json({ + pageType: normalizedType, + variables: Object.keys(mockData), + sampleValues: mockData, + }); + } catch (error: any) { + console.error('[SEO] Error fetching template variables:', error.message); + res.status(500).json({ error: 'Failed to fetch template variables' }); + } +}); + export default router; diff --git a/backend/src/routes/settings.ts b/backend/src/routes/settings.ts index ecc6242e..4eadb8c3 100755 --- a/backend/src/routes/settings.ts +++ b/backend/src/routes/settings.ts @@ -78,6 +78,60 @@ router.put('/:key', requireRole('superadmin', 'admin'), async (req, res) => { } }); +// Test AI provider connection +router.post('/test-ai', requireRole('superadmin', 'admin'), async (req, res) => { + try { + const { provider, apiKey } = req.body; + + if (!provider || !apiKey) { + return res.status(400).json({ success: false, error: 'Provider and API key required' }); + } + + if (provider === 'anthropic') { + // Test Anthropic API + const response = await fetch('https://api.anthropic.com/v1/messages', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-api-key': apiKey, + 'anthropic-version': '2023-06-01' + }, + body: JSON.stringify({ + model: 'claude-3-haiku-20240307', + max_tokens: 10, + messages: [{ role: 'user', content: 'Hi' }] + }) + }); + + if (response.ok) { + res.json({ success: true, model: 'claude-3-haiku-20240307' }); + } else { + const error = await response.json().catch(() => ({ error: { message: 'Unknown error' } })); + res.json({ success: false, error: error.error?.message || 'Invalid API key' }); + } + } else if (provider === 'openai') { + // Test OpenAI API + const response = await fetch('https://api.openai.com/v1/models', { + headers: { + 'Authorization': `Bearer ${apiKey}` + } + }); + + if (response.ok) { + res.json({ success: true, model: 'gpt-4' }); + } else { + const error = await response.json().catch(() => ({ error: { message: 'Unknown error' } })); + res.json({ success: false, error: error.error?.message || 'Invalid API key' }); + } + } else { + res.status(400).json({ success: false, error: 'Unknown provider' }); + } + } catch (error: any) { + console.error('Error testing AI connection:', error); + res.json({ success: false, error: error.message || 'Connection failed' }); + } +}); + // Update multiple settings at once router.put('/', requireRole('superadmin', 'admin'), async (req, res) => { try { diff --git a/backend/src/routes/stores.ts b/backend/src/routes/stores.ts index 491ad3f7..b6240591 100755 --- a/backend/src/routes/stores.ts +++ b/backend/src/routes/stores.ts @@ -429,28 +429,49 @@ router.delete('/:id', requireRole('superadmin'), async (req, res) => { } }); -// Get products for a store (uses dutchie_products table) +// Get products for a store (uses store_products via v_products view with snapshot pricing) router.get('/:id/products', async (req, res) => { try { const { id } = req.params; const result = await pool.query(` SELECT - id, - name, - brand_name, - type, - subcategory, - stock_status, - thc_content, - cbd_content, - primary_image_url, - external_product_id, - created_at, - updated_at - FROM dutchie_products - WHERE dispensary_id = $1 - ORDER BY name + p.id, + p.name, + p.brand_name, + p.type, + p.subcategory, + p.strain_type, + p.stock_status, + p.thc as thc_content, + p.cbd as cbd_content, + sp.description, + sp.total_quantity_available as quantity, + p.primary_image_url, + p.external_product_id, + p.created_at, + p.updated_at, + COALESCE(snap.rec_min_price_cents, 0)::numeric / 100.0 as regular_price, + CASE WHEN snap.rec_min_special_price_cents > 0 + THEN snap.rec_min_special_price_cents::numeric / 100.0 + ELSE NULL END as sale_price, + COALESCE(snap.med_min_price_cents, 0)::numeric / 100.0 as med_price, + CASE WHEN snap.med_min_special_price_cents > 0 + THEN snap.med_min_special_price_cents::numeric / 100.0 + ELSE NULL END as med_sale_price, + snap.special as on_special + FROM v_products p + JOIN store_products sp ON sp.id = p.id + LEFT JOIN LATERAL ( + SELECT rec_min_price_cents, rec_min_special_price_cents, + med_min_price_cents, med_min_special_price_cents, special + FROM v_product_snapshots vps + WHERE vps.store_product_id = p.id + ORDER BY vps.crawled_at DESC + LIMIT 1 + ) snap ON true + WHERE p.dispensary_id = $1 + ORDER BY p.name `, [id]); res.json({ products: result.rows }); @@ -460,6 +481,55 @@ router.get('/:id/products', async (req, res) => { } }); +// Get specials for a store (products with sale prices or on_special flag) +router.get('/:id/specials', async (req, res) => { + try { + const { id } = req.params; + + const result = await pool.query(` + SELECT + p.id, + p.name, + p.brand_name, + p.type, + p.subcategory, + p.strain_type, + p.stock_status, + p.thc as thc_content, + p.cbd as cbd_content, + sp.description, + sp.total_quantity_available as quantity, + p.primary_image_url, + p.external_product_id, + p.created_at, + p.updated_at, + COALESCE(snap.rec_min_price_cents, 0)::numeric / 100.0 as regular_price, + snap.rec_min_special_price_cents::numeric / 100.0 as sale_price, + COALESCE(snap.med_min_price_cents, 0)::numeric / 100.0 as med_price, + snap.med_min_special_price_cents::numeric / 100.0 as med_sale_price, + true as on_special + FROM v_products p + JOIN store_products sp ON sp.id = p.id + INNER JOIN LATERAL ( + SELECT rec_min_price_cents, rec_min_special_price_cents, + med_min_price_cents, med_min_special_price_cents, special + FROM v_product_snapshots vps + WHERE vps.store_product_id = p.id + AND (vps.special = true OR vps.rec_min_special_price_cents > 0 OR vps.med_min_special_price_cents > 0) + ORDER BY vps.crawled_at DESC + LIMIT 1 + ) snap ON true + WHERE p.dispensary_id = $1 + ORDER BY p.name + `, [id]); + + res.json({ specials: result.rows }); + } catch (error) { + console.error('Error fetching store specials:', error); + res.status(500).json({ error: 'Failed to fetch specials' }); + } +}); + // Get brands for a store router.get('/:id/brands', async (req, res) => { try { @@ -467,7 +537,7 @@ router.get('/:id/brands', async (req, res) => { const result = await pool.query(` SELECT DISTINCT brand_name as name, COUNT(*) as product_count - FROM dutchie_products + FROM v_products WHERE dispensary_id = $1 AND brand_name IS NOT NULL GROUP BY brand_name ORDER BY product_count DESC, brand_name diff --git a/backend/src/routes/workers.ts b/backend/src/routes/workers.ts index dc6d7e7a..f55a5553 100644 --- a/backend/src/routes/workers.ts +++ b/backend/src/routes/workers.ts @@ -24,6 +24,95 @@ import { pool } from '../db/pool'; const router = Router(); +// ============================================================ +// STATIC ROUTES (must come before parameterized routes) +// ============================================================ + +/** + * GET /api/workers/roles - List available worker roles + */ +router.get('/roles', async (_req: Request, res: Response) => { + const roles = [ + { id: 'product_sync', name: 'Product Sync', description: 'Crawls products from dispensary menus' }, + { id: 'store_discovery', name: 'Store Discovery', description: 'Discovers new dispensary locations' }, + { id: 'entry_point_finder', name: 'Entry Point Finder', description: 'Detects menu providers and resolves platform IDs' }, + { id: 'analytics_refresh', name: 'Analytics Refresh', description: 'Refreshes materialized views and analytics' }, + { id: 'price_monitor', name: 'Price Monitor', description: 'Monitors price changes and triggers alerts' }, + { id: 'inventory_sync', name: 'Inventory Sync', description: 'Syncs inventory levels' }, + { id: 'image_processor', name: 'Image Processor', description: 'Downloads and processes product images' }, + { id: 'data_validator', name: 'Data Validator', description: 'Validates data integrity' }, + { id: 'custom', name: 'Custom', description: 'Custom worker role' }, + ]; + + res.json({ success: true, roles }); +}); + +/** + * GET /api/workers/states - List available states for assignment + */ +router.get('/states', async (_req: Request, res: Response) => { + try { + const { rows } = await pool.query(` + SELECT state_code, state_name, dispensary_count + FROM states + WHERE active = true + ORDER BY state_name ASC + `); + res.json({ success: true, states: rows }); + } catch (error: any) { + // Fallback if states table doesn't exist + res.json({ success: true, states: [ + { state_code: 'AZ', state_name: 'Arizona', dispensary_count: 0 }, + { state_code: 'CA', state_name: 'California', dispensary_count: 0 }, + { state_code: 'CO', state_name: 'Colorado', dispensary_count: 0 }, + { state_code: 'MI', state_name: 'Michigan', dispensary_count: 0 }, + { state_code: 'NV', state_name: 'Nevada', dispensary_count: 0 }, + ]}); + } +}); + +/** + * GET /api/workers/dispensaries - List dispensaries for assignment (paginated search) + */ +router.get('/dispensaries', async (req: Request, res: Response) => { + try { + const search = (req.query.search as string) || ''; + const limit = parseInt(req.query.limit as string) || 50; + + const { rows } = await pool.query(` + SELECT id, name, city, state_code + FROM dispensaries + WHERE ($1 = '' OR name ILIKE $2) + ORDER BY name ASC + LIMIT $3 + `, [search, `%${search}%`, limit]); + + res.json({ success: true, dispensaries: rows }); + } catch (error: any) { + console.error('[Workers] Error fetching dispensaries:', error); + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * GET /api/workers/chains - List chains for assignment + */ +router.get('/chains', async (_req: Request, res: Response) => { + try { + const { rows } = await pool.query(` + SELECT DISTINCT chain_id as id, chain_name as name, COUNT(*) as dispensary_count + FROM dispensaries + WHERE chain_id IS NOT NULL AND chain_name IS NOT NULL + GROUP BY chain_id, chain_name + ORDER BY chain_name ASC + `); + res.json({ success: true, chains: rows }); + } catch (error: any) { + // Fallback if chain columns don't exist + res.json({ success: true, chains: [] }); + } +}); + // ============================================================ // WORKER TYPES // ============================================================ @@ -32,6 +121,7 @@ interface Worker { id: number; worker_name: string; run_role: string; + job_name?: string; scope: string[]; description: string; enabled: boolean; @@ -40,6 +130,8 @@ interface Worker { next_run_at: string | null; last_run_at: string | null; last_status: string | null; + last_error_message?: string | null; + last_duration_ms?: number | null; last_seen: string | null; visibility_lost: number; visibility_restored: number; @@ -124,15 +216,20 @@ router.get('/', async (_req: Request, res: Response) => { next_run_at, last_run_at, last_status, - job_config + last_error_message, + last_duration_ms, + job_config, + worker_name, + worker_role FROM job_schedules ORDER BY enabled DESC, last_run_at DESC NULLS LAST `); const workers: Worker[] = rows.map((row: any) => ({ id: row.id, - worker_name: extractWorkerName(row.job_name, row.job_config), - run_role: extractRunRole(row.job_name, row.job_config), + worker_name: row.worker_name || extractWorkerName(row.job_name, row.job_config), + run_role: row.worker_role || extractRunRole(row.job_name, row.job_config), + job_name: row.job_name, scope: parseScope(row.job_config), description: row.description || row.job_name, enabled: row.enabled, @@ -141,6 +238,8 @@ router.get('/', async (_req: Request, res: Response) => { next_run_at: row.next_run_at?.toISOString() || null, last_run_at: row.last_run_at?.toISOString() || null, last_status: row.last_status, + last_error_message: row.last_error_message, + last_duration_ms: row.last_duration_ms, last_seen: row.last_run_at?.toISOString() || null, visibility_lost: 0, visibility_restored: 0, @@ -619,4 +718,323 @@ router.get('/summary', async (req: Request, res: Response) => { } }); +// ============================================================ +// WORKER CRUD ROUTES (using new workers table) +// ============================================================ + +/** + * GET /api/workers/definitions - List all worker definitions from workers table + */ +router.get('/definitions', async (_req: Request, res: Response) => { + try { + const { rows } = await pool.query(` + SELECT + w.*, + (SELECT COUNT(*) FROM dispensary_crawl_jobs j WHERE j.assigned_worker_id = w.id AND j.status = 'pending') as pending_jobs, + (SELECT COUNT(*) FROM dispensary_crawl_jobs j WHERE j.assigned_worker_id = w.id AND j.status = 'running') as running_jobs + FROM workers w + ORDER BY w.enabled DESC, w.name ASC + `); + + res.json({ success: true, workers: rows }); + } catch (error: any) { + console.error('[Workers] Error listing worker definitions:', error); + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * POST /api/workers/definitions - Create a new worker definition + */ +router.post('/definitions', async (req: Request, res: Response) => { + try { + const { + name, + role, + description, + enabled = true, + schedule_type = 'interval', + interval_minutes = 240, + cron_expression, + jitter_minutes = 30, + assignment_type = 'all', + assigned_state_codes, + assigned_dispensary_ids, + assigned_chain_ids, + job_type = 'dutchie_product_crawl', + job_config = {}, + priority = 0, + max_concurrent = 1 + } = req.body; + + if (!name || !role) { + return res.status(400).json({ success: false, error: 'name and role are required' }); + } + + const { rows } = await pool.query(` + INSERT INTO workers ( + name, role, description, enabled, + schedule_type, interval_minutes, cron_expression, jitter_minutes, + assignment_type, assigned_state_codes, assigned_dispensary_ids, assigned_chain_ids, + job_type, job_config, priority, max_concurrent + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16) + RETURNING * + `, [ + name, role, description, enabled, + schedule_type, interval_minutes, cron_expression, jitter_minutes, + assignment_type, assigned_state_codes, assigned_dispensary_ids, assigned_chain_ids, + job_type, job_config, priority, max_concurrent + ]); + + // Also create a job_schedule entry for backwards compatibility + await pool.query(` + INSERT INTO job_schedules (job_name, description, enabled, base_interval_minutes, jitter_minutes, worker_name, worker_role, job_config) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8) + ON CONFLICT (job_name) DO UPDATE SET + description = EXCLUDED.description, + enabled = EXCLUDED.enabled, + base_interval_minutes = EXCLUDED.base_interval_minutes, + jitter_minutes = EXCLUDED.jitter_minutes, + worker_name = EXCLUDED.worker_name, + worker_role = EXCLUDED.worker_role, + updated_at = NOW() + `, [ + `worker_${name.toLowerCase().replace(/\s+/g, '_')}`, + description || `Worker: ${name}`, + enabled, + interval_minutes, + jitter_minutes, + name, + role, + job_config + ]); + + res.json({ success: true, worker: rows[0], message: 'Worker created' }); + } catch (error: any) { + console.error('[Workers] Error creating worker:', error); + if (error.code === '23505') { // unique violation + return res.status(400).json({ success: false, error: 'Worker name already exists' }); + } + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * PUT /api/workers/definitions/:id - Update a worker definition + */ +router.put('/definitions/:id', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const { + name, + role, + description, + enabled, + schedule_type, + interval_minutes, + cron_expression, + jitter_minutes, + assignment_type, + assigned_state_codes, + assigned_dispensary_ids, + assigned_chain_ids, + job_type, + job_config, + priority, + max_concurrent + } = req.body; + + const { rows } = await pool.query(` + UPDATE workers SET + name = COALESCE($1, name), + role = COALESCE($2, role), + description = COALESCE($3, description), + enabled = COALESCE($4, enabled), + schedule_type = COALESCE($5, schedule_type), + interval_minutes = COALESCE($6, interval_minutes), + cron_expression = COALESCE($7, cron_expression), + jitter_minutes = COALESCE($8, jitter_minutes), + assignment_type = COALESCE($9, assignment_type), + assigned_state_codes = COALESCE($10, assigned_state_codes), + assigned_dispensary_ids = COALESCE($11, assigned_dispensary_ids), + assigned_chain_ids = COALESCE($12, assigned_chain_ids), + job_type = COALESCE($13, job_type), + job_config = COALESCE($14, job_config), + priority = COALESCE($15, priority), + max_concurrent = COALESCE($16, max_concurrent), + updated_at = NOW() + WHERE id = $17 + RETURNING * + `, [ + name, role, description, enabled, + schedule_type, interval_minutes, cron_expression, jitter_minutes, + assignment_type, assigned_state_codes, assigned_dispensary_ids, assigned_chain_ids, + job_type, job_config, priority, max_concurrent, + id + ]); + + if (rows.length === 0) { + return res.status(404).json({ success: false, error: 'Worker not found' }); + } + + res.json({ success: true, worker: rows[0], message: 'Worker updated' }); + } catch (error: any) { + console.error('[Workers] Error updating worker:', error); + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * DELETE /api/workers/definitions/:id - Delete a worker definition + */ +router.delete('/definitions/:id', async (req: Request, res: Response) => { + try { + const { id } = req.params; + + const { rows } = await pool.query(` + DELETE FROM workers WHERE id = $1 RETURNING name + `, [id]); + + if (rows.length === 0) { + return res.status(404).json({ success: false, error: 'Worker not found' }); + } + + res.json({ success: true, message: `Worker "${rows[0].name}" deleted` }); + } catch (error: any) { + console.error('[Workers] Error deleting worker:', error); + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * POST /api/workers/definitions/:id/toggle - Enable/disable worker + */ +router.post('/definitions/:id/toggle', async (req: Request, res: Response) => { + try { + const { id } = req.params; + + const { rows } = await pool.query(` + UPDATE workers SET enabled = NOT enabled, updated_at = NOW() + WHERE id = $1 + RETURNING id, name, enabled + `, [id]); + + if (rows.length === 0) { + return res.status(404).json({ success: false, error: 'Worker not found' }); + } + + res.json({ success: true, worker: rows[0], message: `Worker ${rows[0].enabled ? 'enabled' : 'disabled'}` }); + } catch (error: any) { + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * POST /api/workers/definitions/:id/assign-dispensary - Assign dispensary to worker + */ +router.post('/definitions/:id/assign-dispensary', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const { dispensary_id } = req.body; + + if (!dispensary_id) { + return res.status(400).json({ success: false, error: 'dispensary_id is required' }); + } + + const { rows } = await pool.query(` + UPDATE workers SET + assigned_dispensary_ids = array_append( + COALESCE(assigned_dispensary_ids, ARRAY[]::integer[]), + $1::integer + ), + assignment_type = 'dispensary', + updated_at = NOW() + WHERE id = $2 AND NOT ($1 = ANY(COALESCE(assigned_dispensary_ids, ARRAY[]::integer[]))) + RETURNING id, name, assigned_dispensary_ids + `, [dispensary_id, id]); + + if (rows.length === 0) { + // Check if dispensary was already assigned + const existing = await pool.query(` + SELECT assigned_dispensary_ids FROM workers WHERE id = $1 + `, [id]); + + if (existing.rows.length === 0) { + return res.status(404).json({ success: false, error: 'Worker not found' }); + } + + return res.json({ success: true, message: 'Dispensary already assigned', worker: existing.rows[0] }); + } + + res.json({ success: true, worker: rows[0], message: 'Dispensary assigned to worker' }); + } catch (error: any) { + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * DELETE /api/workers/definitions/:id/assign-dispensary/:dispensaryId - Remove dispensary from worker + */ +router.delete('/definitions/:id/assign-dispensary/:dispensaryId', async (req: Request, res: Response) => { + try { + const { id, dispensaryId } = req.params; + + const { rows } = await pool.query(` + UPDATE workers SET + assigned_dispensary_ids = array_remove(assigned_dispensary_ids, $1::integer), + updated_at = NOW() + WHERE id = $2 + RETURNING id, name, assigned_dispensary_ids + `, [dispensaryId, id]); + + if (rows.length === 0) { + return res.status(404).json({ success: false, error: 'Worker not found' }); + } + + res.json({ success: true, worker: rows[0], message: 'Dispensary removed from worker' }); + } catch (error: any) { + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * PUT /api/workers/:id/schedule - Update worker schedule (for job_schedules table) + */ +router.put('/:id/schedule', async (req: Request, res: Response) => { + try { + const { id } = req.params; + const { + worker_name, + worker_role, + description, + enabled, + base_interval_minutes, + jitter_minutes, + job_config + } = req.body; + + const { rows } = await pool.query(` + UPDATE job_schedules SET + worker_name = COALESCE($1, worker_name), + worker_role = COALESCE($2, worker_role), + description = COALESCE($3, description), + enabled = COALESCE($4, enabled), + base_interval_minutes = COALESCE($5, base_interval_minutes), + jitter_minutes = COALESCE($6, jitter_minutes), + job_config = COALESCE($7, job_config), + updated_at = NOW() + WHERE id = $8 + RETURNING * + `, [worker_name, worker_role, description, enabled, base_interval_minutes, jitter_minutes, job_config, id]); + + if (rows.length === 0) { + return res.status(404).json({ success: false, error: 'Schedule not found' }); + } + + res.json({ success: true, schedule: rows[0], message: 'Schedule updated' }); + } catch (error: any) { + res.status(500).json({ success: false, error: error.message }); + } +}); + export default router; diff --git a/backend/src/scraper-v2/canonical-pipeline.ts b/backend/src/scraper-v2/canonical-pipeline.ts new file mode 100644 index 00000000..c76ccafe --- /dev/null +++ b/backend/src/scraper-v2/canonical-pipeline.ts @@ -0,0 +1,353 @@ +/** + * Canonical Database Pipeline + * + * Writes scraped products to the canonical tables: + * - store_products (current state) + * - store_product_snapshots (historical) + * - product_variants (per-weight pricing) + * - product_variant_snapshots (variant history) + * + * This replaces the legacy DatabasePipeline that wrote to `products` table. + */ + +import { ItemPipeline, Product } from './types'; +import { logger } from '../services/logger'; +import { pool } from '../db/pool'; +import { v4 as uuidv4 } from 'uuid'; + +interface VariantData { + option: string; + priceRec: number | null; + priceMed: number | null; + priceRecSpecial: number | null; + priceMedSpecial: number | null; + quantity: number | null; + inStock: boolean; + isOnSpecial: boolean; +} + +/** + * Parse weight string like "1g", "3.5g", "1/8oz" into value and unit + */ +function parseWeight(option: string): { value: number | null; unit: string | null } { + if (!option) return { value: null, unit: null }; + + // Match patterns like "1g", "3.5g", "1/8oz", "100mg" + const match = option.match(/^([\d.\/]+)\s*(g|oz|mg|ml|each|pk|ct)?$/i); + if (!match) return { value: null, unit: null }; + + let value: number | null = null; + const rawValue = match[1]; + const unit = match[2]?.toLowerCase() || null; + + // Handle fractions like "1/8" + if (rawValue.includes('/')) { + const [num, denom] = rawValue.split('/'); + value = parseFloat(num) / parseFloat(denom); + } else { + value = parseFloat(rawValue); + } + + if (isNaN(value)) value = null; + + return { value, unit }; +} + +/** + * Canonical Database Pipeline - saves items to canonical tables + * + * TABLES: + * - store_products: Current product state per store + * - store_product_snapshots: Historical snapshot per crawl + * - product_variants: Current variant state (per-weight pricing) + * - product_variant_snapshots: Historical variant snapshots + */ +export class CanonicalDatabasePipeline implements ItemPipeline { + name = 'CanonicalDatabasePipeline'; + priority = 10; // Low priority - runs last + + private crawlRunId: number | null = null; + + setCrawlRunId(id: number): void { + this.crawlRunId = id; + } + + async process(item: Product, spider: string): Promise { + const client = await pool.connect(); + + try { + // Extract metadata set by spider + const dispensaryId = (item as any).dispensaryId; + const categoryName = (item as any).categoryName; + const variants: VariantData[] = (item as any).variants || []; + + if (!dispensaryId) { + logger.error('pipeline', `Missing dispensaryId for ${item.name}`); + return null; + } + + const externalProductId = item.dutchieProductId || null; + const provider = 'dutchie'; + + // Determine stock status + const isInStock = (item as any).inStock !== false; + const stockQuantity = (item as any).stockQuantity || null; + + // Extract pricing + const priceRec = item.price || null; + const priceMed = (item as any).priceMed || null; + + let storeProductId: number | null = null; + let isNewProduct = false; + + // ============================================================ + // UPSERT store_products + // ============================================================ + + const upsertResult = await client.query(` + INSERT INTO store_products ( + dispensary_id, provider, provider_product_id, + name_raw, brand_name_raw, category_raw, + price_rec, price_med, + thc_percent, cbd_percent, + is_in_stock, stock_quantity, + image_url, source_url, + raw_data, + first_seen_at, last_seen_at, + created_at, updated_at + ) VALUES ( + $1, $2, $3, + $4, $5, $6, + $7, $8, + $9, $10, + $11, $12, + $13, $14, + $15, + NOW(), NOW(), + NOW(), NOW() + ) + ON CONFLICT (dispensary_id, provider, provider_product_id) + DO UPDATE SET + name_raw = EXCLUDED.name_raw, + brand_name_raw = EXCLUDED.brand_name_raw, + category_raw = EXCLUDED.category_raw, + price_rec = EXCLUDED.price_rec, + price_med = EXCLUDED.price_med, + thc_percent = EXCLUDED.thc_percent, + cbd_percent = EXCLUDED.cbd_percent, + is_in_stock = EXCLUDED.is_in_stock, + stock_quantity = EXCLUDED.stock_quantity, + image_url = COALESCE(EXCLUDED.image_url, store_products.image_url), + source_url = EXCLUDED.source_url, + raw_data = EXCLUDED.raw_data, + last_seen_at = NOW(), + updated_at = NOW() + RETURNING id, (xmax = 0) as is_new + `, [ + dispensaryId, provider, externalProductId, + item.name, item.brand || null, categoryName || null, + priceRec, priceMed, + item.thcPercentage || null, item.cbdPercentage || null, + isInStock, stockQuantity, + item.imageUrl || null, item.dutchieUrl || null, + JSON.stringify(item.metadata || {}), + ]); + + storeProductId = upsertResult.rows[0].id; + isNewProduct = upsertResult.rows[0].is_new; + + logger.debug('pipeline', `${isNewProduct ? 'Inserted' : 'Updated'} canonical product: ${item.name} (ID: ${storeProductId})`); + + // ============================================================ + // INSERT store_product_snapshots + // ============================================================ + + await client.query(` + INSERT INTO store_product_snapshots ( + store_product_id, dispensary_id, crawl_run_id, + price_rec, price_med, + is_in_stock, stock_quantity, + is_present_in_feed, + captured_at, created_at + ) VALUES ( + $1, $2, $3, + $4, $5, + $6, $7, + TRUE, + NOW(), NOW() + ) + ON CONFLICT (store_product_id, crawl_run_id) WHERE crawl_run_id IS NOT NULL + DO UPDATE SET + price_rec = EXCLUDED.price_rec, + price_med = EXCLUDED.price_med, + is_in_stock = EXCLUDED.is_in_stock, + stock_quantity = EXCLUDED.stock_quantity + `, [ + storeProductId, dispensaryId, this.crawlRunId, + priceRec, priceMed, + isInStock, stockQuantity, + ]); + + // ============================================================ + // UPSERT product_variants (if variants exist) + // ============================================================ + + if (variants.length > 0) { + for (const variant of variants) { + const { value: weightValue, unit: weightUnit } = parseWeight(variant.option); + + const variantResult = await client.query(` + INSERT INTO product_variants ( + store_product_id, dispensary_id, + option, + price_rec, price_med, price_rec_special, price_med_special, + quantity, quantity_available, in_stock, is_on_special, + weight_value, weight_unit, + first_seen_at, last_seen_at, + created_at, updated_at + ) VALUES ( + $1, $2, + $3, + $4, $5, $6, $7, + $8, $8, $9, $10, + $11, $12, + NOW(), NOW(), + NOW(), NOW() + ) + ON CONFLICT (store_product_id, option) + DO UPDATE SET + price_rec = EXCLUDED.price_rec, + price_med = EXCLUDED.price_med, + price_rec_special = EXCLUDED.price_rec_special, + price_med_special = EXCLUDED.price_med_special, + quantity = EXCLUDED.quantity, + quantity_available = EXCLUDED.quantity_available, + in_stock = EXCLUDED.in_stock, + is_on_special = EXCLUDED.is_on_special, + weight_value = EXCLUDED.weight_value, + weight_unit = EXCLUDED.weight_unit, + last_seen_at = NOW(), + last_price_change_at = CASE + WHEN product_variants.price_rec IS DISTINCT FROM EXCLUDED.price_rec + OR product_variants.price_rec_special IS DISTINCT FROM EXCLUDED.price_rec_special + THEN NOW() + ELSE product_variants.last_price_change_at + END, + last_stock_change_at = CASE + WHEN product_variants.in_stock IS DISTINCT FROM EXCLUDED.in_stock + THEN NOW() + ELSE product_variants.last_stock_change_at + END, + updated_at = NOW() + RETURNING id + `, [ + storeProductId, dispensaryId, + variant.option, + variant.priceRec, variant.priceMed, variant.priceRecSpecial, variant.priceMedSpecial, + variant.quantity, variant.inStock, variant.isOnSpecial, + weightValue, weightUnit, + ]); + + const variantId = variantResult.rows[0].id; + + // Insert variant snapshot + await client.query(` + INSERT INTO product_variant_snapshots ( + product_variant_id, store_product_id, dispensary_id, crawl_run_id, + option, + price_rec, price_med, price_rec_special, price_med_special, + quantity, in_stock, is_on_special, + is_present_in_feed, + captured_at, created_at + ) VALUES ( + $1, $2, $3, $4, + $5, + $6, $7, $8, $9, + $10, $11, $12, + TRUE, + NOW(), NOW() + ) + `, [ + variantId, storeProductId, dispensaryId, this.crawlRunId, + variant.option, + variant.priceRec, variant.priceMed, variant.priceRecSpecial, variant.priceMedSpecial, + variant.quantity, variant.inStock, variant.isOnSpecial, + ]); + } + + logger.debug('pipeline', `Upserted ${variants.length} variants for ${item.name}`); + } + + // Attach metadata for stats tracking + (item as any).isNewProduct = isNewProduct; + (item as any).storeProductId = storeProductId; + + return item; + + } catch (error) { + logger.error('pipeline', `Failed to save canonical product ${item.name}: ${error}`); + return null; + } finally { + client.release(); + } + } +} + +/** + * Create a crawl run record before starting crawl + */ +export async function createCrawlRun( + dispensaryId: number, + provider: string = 'dutchie', + triggerType: string = 'manual' +): Promise { + const result = await pool.query(` + INSERT INTO crawl_runs ( + dispensary_id, provider, + started_at, status, trigger_type + ) VALUES ($1, $2, NOW(), 'running', $3) + RETURNING id + `, [dispensaryId, provider, triggerType]); + + return result.rows[0].id; +} + +/** + * Complete a crawl run with stats + */ +export async function completeCrawlRun( + crawlRunId: number, + stats: { + productsFound: number; + productsNew: number; + productsUpdated: number; + snapshotsWritten: number; + variantsUpserted?: number; + status?: 'completed' | 'failed' | 'partial'; + error?: string; + } +): Promise { + await pool.query(` + UPDATE crawl_runs SET + finished_at = NOW(), + status = $2, + products_found = $3, + products_new = $4, + products_updated = $5, + snapshots_written = $6, + metadata = jsonb_build_object( + 'variants_upserted', $7, + 'error', $8 + ) + WHERE id = $1 + `, [ + crawlRunId, + stats.status || 'completed', + stats.productsFound, + stats.productsNew, + stats.productsUpdated, + stats.snapshotsWritten, + stats.variantsUpserted || 0, + stats.error || null, + ]); +} diff --git a/backend/src/scraper-v2/engine.ts b/backend/src/scraper-v2/engine.ts index 2bf194f2..c01eff71 100644 --- a/backend/src/scraper-v2/engine.ts +++ b/backend/src/scraper-v2/engine.ts @@ -2,6 +2,7 @@ import { RequestScheduler } from './scheduler'; import { Downloader } from './downloader'; import { MiddlewareEngine, UserAgentMiddleware, ProxyMiddleware, RateLimitMiddleware, RetryMiddleware, BotDetectionMiddleware, StealthMiddleware } from './middlewares'; import { PipelineEngine, ValidationPipeline, SanitizationPipeline, DeduplicationPipeline, ImagePipeline, DatabasePipeline, StatsPipeline } from './pipelines'; +import { CanonicalDatabasePipeline, createCrawlRun, completeCrawlRun } from './canonical-pipeline'; import { ScraperRequest, ScraperResponse, ParseResult, Product, ScraperStats } from './types'; import { logger } from '../services/logger'; import { pool } from '../db/pool'; @@ -65,6 +66,9 @@ export class ScraperEngine { this.pipelineEngine.use(new DeduplicationPipeline()); this.pipelineEngine.use(new ImagePipeline()); this.pipelineEngine.use(new StatsPipeline()); + // Use canonical pipeline for writing to store_products/product_variants + this.pipelineEngine.use(new CanonicalDatabasePipeline()); + // Keep legacy pipeline for backwards compatibility with existing stores table this.pipelineEngine.use(new DatabasePipeline()); } diff --git a/backend/src/scraper-v2/index.ts b/backend/src/scraper-v2/index.ts index 4053f9db..e44c74b9 100644 --- a/backend/src/scraper-v2/index.ts +++ b/backend/src/scraper-v2/index.ts @@ -39,6 +39,11 @@ export { DatabasePipeline, StatsPipeline } from './pipelines'; +export { + CanonicalDatabasePipeline, + createCrawlRun, + completeCrawlRun +} from './canonical-pipeline'; export * from './types'; // Main API functions diff --git a/backend/src/scripts/discover-all-states.ts b/backend/src/scripts/discover-all-states.ts new file mode 100644 index 00000000..4d921a5a --- /dev/null +++ b/backend/src/scripts/discover-all-states.ts @@ -0,0 +1,385 @@ +#!/usr/bin/env npx tsx +/** + * Discover All States - Sequential State-by-State Dutchie Discovery + * + * This script discovers all Dutchie dispensaries for every US state, + * processing one state at a time with delays between states. + * + * Progress is automatically saved to /tmp/discovery-progress.json + * so the script can resume from where it left off if interrupted. + * + * Usage: + * DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts + * DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --dry-run + * DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --start-from CA + * DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --resume + * DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --reset # Clear progress, start fresh + * + * Options: + * --dry-run Don't save to database, just show what would happen + * --start-from Start from a specific state (skip earlier states) + * --states Comma-separated list of specific states to run (e.g., AZ,CA,CO) + * --verbose Show detailed output + * --resume Auto-resume from last saved progress (default if progress file exists) + * --reset Clear progress file and start fresh + */ + +import { Pool } from 'pg'; +import * as fs from 'fs'; +import * as path from 'path'; + +const PROGRESS_FILE = '/tmp/discovery-progress.json'; + +interface ProgressData { + lastCompletedState: string | null; + lastCompletedIndex: number; + startedAt: string; + updatedAt: string; + completedStates: string[]; +} + +function loadProgress(): ProgressData | null { + try { + if (fs.existsSync(PROGRESS_FILE)) { + const data = JSON.parse(fs.readFileSync(PROGRESS_FILE, 'utf-8')); + return data; + } + } catch (e) { + console.warn('[Progress] Could not load progress file:', e); + } + return null; +} + +function saveProgress(progress: ProgressData): void { + try { + progress.updatedAt = new Date().toISOString(); + fs.writeFileSync(PROGRESS_FILE, JSON.stringify(progress, null, 2)); + } catch (e) { + console.warn('[Progress] Could not save progress:', e); + } +} + +function clearProgress(): void { + try { + if (fs.existsSync(PROGRESS_FILE)) { + fs.unlinkSync(PROGRESS_FILE); + console.log('[Progress] Cleared progress file'); + } + } catch (e) { + console.warn('[Progress] Could not clear progress:', e); + } +} +import { discoverState } from '../discovery'; + +// US states with legal cannabis (medical or recreational) +// Ordered roughly by market size / likelihood of Dutchie presence +const US_STATES = [ + 'AZ', // Arizona + 'CA', // California + 'CO', // Colorado + 'FL', // Florida + 'IL', // Illinois + 'MA', // Massachusetts + 'MI', // Michigan + 'NV', // Nevada + 'NJ', // New Jersey + 'NY', // New York + 'OH', // Ohio + 'OR', // Oregon + 'PA', // Pennsylvania + 'WA', // Washington + 'MD', // Maryland + 'MO', // Missouri + 'CT', // Connecticut + 'NM', // New Mexico + 'ME', // Maine + 'VT', // Vermont + 'MT', // Montana + 'AK', // Alaska + 'OK', // Oklahoma + 'AR', // Arkansas + 'ND', // North Dakota + 'SD', // South Dakota + 'MN', // Minnesota + 'NH', // New Hampshire + 'RI', // Rhode Island + 'DE', // Delaware + 'HI', // Hawaii + 'WV', // West Virginia + 'LA', // Louisiana + 'UT', // Utah + 'VA', // Virginia + 'DC', // District of Columbia +]; + +interface DiscoveryResult { + stateCode: string; + citiesCrawled: number; + locationsFound: number; + locationsUpserted: number; + durationMs: number; + errors: string[]; +} + +function parseArgs() { + const args = process.argv.slice(2); + const flags: Record = {}; + + for (let i = 0; i < args.length; i++) { + const arg = args[i]; + if (arg.startsWith('--')) { + const [key, value] = arg.slice(2).split('='); + if (value !== undefined) { + flags[key] = value; + } else if (args[i + 1] && !args[i + 1].startsWith('--')) { + flags[key] = args[i + 1]; + i++; + } else { + flags[key] = true; + } + } + } + + return flags; +} + +async function main() { + const flags = parseArgs(); + const dryRun = Boolean(flags['dry-run']); + const verbose = Boolean(flags.verbose); + const reset = Boolean(flags.reset); + const resume = Boolean(flags.resume); + let startFrom = flags['start-from'] as string | undefined; + const specificStates = flags.states + ? (flags.states as string).split(',').map((s) => s.trim().toUpperCase()) + : null; + + // Handle reset flag + if (reset) { + clearProgress(); + } + + // Determine which states to process + let statesToProcess = specificStates || US_STATES; + + // Check for saved progress (auto-resume unless --reset or --start-from specified) + const savedProgress = loadProgress(); + if (savedProgress && !reset && !startFrom && !specificStates) { + const nextIndex = savedProgress.lastCompletedIndex + 1; + if (nextIndex < US_STATES.length) { + startFrom = US_STATES[nextIndex]; + console.log(`[Progress] Resuming from saved progress`); + console.log(`[Progress] Last completed: ${savedProgress.lastCompletedState} (${savedProgress.completedStates.length} states done)`); + console.log(`[Progress] Started at: ${savedProgress.startedAt}`); + console.log(`[Progress] Last update: ${savedProgress.updatedAt}`); + console.log(''); + } else { + console.log(`[Progress] All states already completed! Use --reset to start over.`); + process.exit(0); + } + } + + if (startFrom) { + const startIndex = statesToProcess.indexOf(startFrom.toUpperCase()); + if (startIndex === -1) { + console.error(`ERROR: State ${startFrom} not found in list`); + process.exit(1); + } + statesToProcess = statesToProcess.slice(startIndex); + console.log(`Starting from ${startFrom}, ${statesToProcess.length} states remaining`); + } + + // Initialize progress tracking + let progress: ProgressData = savedProgress || { + lastCompletedState: null, + lastCompletedIndex: -1, + startedAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + completedStates: [], + }; + + console.log('='.repeat(70)); + console.log('DUTCHIE ALL-STATES DISCOVERY'); + console.log('='.repeat(70)); + console.log(`Mode: ${dryRun ? 'DRY RUN' : 'LIVE'}`); + console.log(`States to process: ${statesToProcess.length}`); + console.log(`States: ${statesToProcess.join(', ')}`); + console.log(''); + + // Create database pool + const connectionString = process.env.DATABASE_URL; + if (!connectionString) { + console.error('ERROR: DATABASE_URL environment variable is required'); + process.exit(1); + } + const pool = new Pool({ connectionString }); + + const results: DiscoveryResult[] = []; + const startTime = Date.now(); + + try { + for (let i = 0; i < statesToProcess.length; i++) { + const stateCode = statesToProcess[i]; + + console.log(''); + console.log('─'.repeat(70)); + console.log(`[${i + 1}/${statesToProcess.length}] Discovering ${stateCode}...`); + console.log('─'.repeat(70)); + + try { + const result = await discoverState(pool, stateCode, { + dryRun, + verbose, + cityLimit: 200, // Allow up to 200 cities per state + }); + + const discoveryResult: DiscoveryResult = { + stateCode, + citiesCrawled: result.locations.length, + locationsFound: result.totalLocationsFound, + locationsUpserted: result.totalLocationsUpserted, + durationMs: result.durationMs, + errors: [], + }; + + // Collect errors from city results + result.locations.forEach((loc) => { + if (loc.errors && loc.errors.length > 0) { + discoveryResult.errors.push(...loc.errors); + } + }); + + results.push(discoveryResult); + + // Save progress after each successful state + const stateIndex = US_STATES.indexOf(stateCode); + progress.lastCompletedState = stateCode; + progress.lastCompletedIndex = stateIndex; + if (!progress.completedStates.includes(stateCode)) { + progress.completedStates.push(stateCode); + } + saveProgress(progress); + + console.log(`\n[${stateCode}] COMPLETE:`); + console.log(` Cities crawled: ${discoveryResult.citiesCrawled}`); + console.log(` Locations found: ${discoveryResult.locationsFound}`); + console.log(` Locations upserted: ${discoveryResult.locationsUpserted}`); + console.log(` Duration: ${(discoveryResult.durationMs / 1000).toFixed(1)}s`); + console.log(` Progress saved (${progress.completedStates.length}/${US_STATES.length} states)`); + + if (discoveryResult.errors.length > 0) { + console.log(` Errors: ${discoveryResult.errors.length}`); + } + + // Delay between states to avoid rate limiting + if (i < statesToProcess.length - 1) { + const delaySeconds = 5; + console.log(`\n Waiting ${delaySeconds}s before next state...`); + await new Promise((r) => setTimeout(r, delaySeconds * 1000)); + } + } catch (error: any) { + console.error(`\n[${stateCode}] ERROR: ${error.message}`); + results.push({ + stateCode, + citiesCrawled: 0, + locationsFound: 0, + locationsUpserted: 0, + durationMs: 0, + errors: [error.message], + }); + + // Continue to next state even on error + await new Promise((r) => setTimeout(r, 3000)); + } + } + + // Print summary + const totalDuration = Date.now() - startTime; + const totalLocations = results.reduce((sum, r) => sum + r.locationsFound, 0); + const totalUpserted = results.reduce((sum, r) => sum + r.locationsUpserted, 0); + const totalCities = results.reduce((sum, r) => sum + r.citiesCrawled, 0); + const statesWithErrors = results.filter((r) => r.errors.length > 0); + + console.log(''); + console.log('='.repeat(70)); + console.log('DISCOVERY COMPLETE - SUMMARY'); + console.log('='.repeat(70)); + console.log(`Total states processed: ${results.length}`); + console.log(`Total cities crawled: ${totalCities}`); + console.log(`Total locations found: ${totalLocations}`); + console.log(`Total locations upserted: ${totalUpserted}`); + console.log(`Total duration: ${(totalDuration / 1000 / 60).toFixed(1)} minutes`); + console.log(''); + + if (statesWithErrors.length > 0) { + console.log('States with errors:'); + statesWithErrors.forEach((r) => { + console.log(` ${r.stateCode}: ${r.errors.length} error(s)`); + }); + console.log(''); + } + + // Print per-state breakdown + console.log('Per-state results:'); + console.log('-'.repeat(70)); + console.log('State\tCities\tFound\tUpserted\tDuration\tStatus'); + console.log('-'.repeat(70)); + + results.forEach((r) => { + const status = r.errors.length > 0 ? 'ERRORS' : 'OK'; + const duration = (r.durationMs / 1000).toFixed(1) + 's'; + console.log( + `${r.stateCode}\t${r.citiesCrawled}\t${r.locationsFound}\t${r.locationsUpserted}\t\t${duration}\t\t${status}` + ); + }); + + // Final count from database + console.log(''); + console.log('='.repeat(70)); + console.log('DATABASE TOTALS'); + console.log('='.repeat(70)); + + const { rows: locationCounts } = await pool.query(` + SELECT + state_code, + COUNT(*) as count, + COUNT(CASE WHEN status = 'discovered' THEN 1 END) as discovered, + COUNT(CASE WHEN status = 'promoted' THEN 1 END) as promoted + FROM dutchie_discovery_locations + WHERE active = TRUE + GROUP BY state_code + ORDER BY count DESC + `); + + console.log('State\tTotal\tDiscovered\tPromoted'); + console.log('-'.repeat(50)); + locationCounts.forEach((row: any) => { + console.log(`${row.state_code || 'N/A'}\t${row.count}\t${row.discovered}\t\t${row.promoted}`); + }); + + const { rows: totalRow } = await pool.query(` + SELECT COUNT(*) as total FROM dutchie_discovery_locations WHERE active = TRUE + `); + console.log('-'.repeat(50)); + console.log(`TOTAL: ${totalRow[0].total} locations in discovery table`); + + const { rows: dispRow } = await pool.query(` + SELECT COUNT(*) as total FROM dispensaries WHERE menu_type = 'dutchie' + `); + console.log(`DISPENSARIES: ${dispRow[0].total} Dutchie dispensaries in main table`); + + // Clear progress file on successful completion of all states + if (results.length === US_STATES.length || (savedProgress && progress.completedStates.length === US_STATES.length)) { + clearProgress(); + console.log('\n[Progress] All states completed! Progress file cleared.'); + } + + } finally { + await pool.end(); + } +} + +main().catch((error) => { + console.error('Fatal error:', error); + process.exit(1); +}); diff --git a/backend/src/scripts/estimate-bandwidth.ts b/backend/src/scripts/estimate-bandwidth.ts new file mode 100644 index 00000000..9bbd2683 --- /dev/null +++ b/backend/src/scripts/estimate-bandwidth.ts @@ -0,0 +1,173 @@ +import axios from 'axios'; +import { Pool } from 'pg'; + +const DUTCHIE_GRAPHQL_URL = 'https://dutchie.com/graphql'; + +const MENU_PRODUCTS_QUERY = ` + query FilteredProducts($productsFilter: ProductFilterInput!) { + filteredProducts(productsFilter: $productsFilter) { + products { + id + name + brand + category + subcategory + strainType + description + image + images { + id + url + } + posId + potencyCbd { + formatted + range + unit + } + potencyThc { + formatted + range + unit + } + variants { + id + option + price + priceMed + priceRec + quantity + specialPrice + } + status + } + } + } +`; + +function formatBytes(bytes: number): string { + if (bytes < 1024) return `${bytes} B`; + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(2)} KB`; + if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(2)} MB`; + return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`; +} + +async function measureRequest(dispensaryId: string, mode: 'A' | 'B') { + const variables: any = { + productsFilter: { + dispensaryId, + pricingType: 'rec', + Status: mode === 'A' ? 'Active' : null, + } + }; + + const requestBody = JSON.stringify({ + query: MENU_PRODUCTS_QUERY, + variables, + }); + + const requestSize = Buffer.byteLength(requestBody, 'utf8'); + + try { + const response = await axios.post(DUTCHIE_GRAPHQL_URL, requestBody, { + headers: { + 'Content-Type': 'application/json', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', + 'Origin': 'https://dutchie.com', + }, + timeout: 30000, + }); + + const responseSize = Buffer.byteLength(JSON.stringify(response.data), 'utf8'); + const productCount = response.data?.data?.filteredProducts?.products?.length || 0; + + // Debug: show what we got + if (productCount === 0) { + console.log(` Response preview: ${JSON.stringify(response.data).slice(0, 300)}...`); + } + + return { requestSize, responseSize, productCount }; + } catch (error: any) { + console.error(` Error: ${error.message}`); + if (error.response) { + console.error(` Status: ${error.response.status}`); + console.error(` Data: ${JSON.stringify(error.response.data).slice(0, 200)}`); + } + return { requestSize, responseSize: 0, productCount: 0, error: error.message }; + } +} + +async function main() { + const pool = new Pool({ connectionString: process.env.DATABASE_URL }); + + // Get one store with products (use a known good ID) + const { rows } = await pool.query(` + SELECT d.platform_dispensary_id, d.name, COUNT(sp.id) as product_count + FROM dispensaries d + LEFT JOIN store_products sp ON d.id = sp.dispensary_id + WHERE d.platform_dispensary_id IS NOT NULL + GROUP BY d.id + ORDER BY product_count DESC + LIMIT 1 + `); + + if (rows.length === 0) { + console.log('No crawlable stores found'); + await pool.end(); + return; + } + + const store = rows[0]; + console.log('=== Dutchie GraphQL Bandwidth for One Store ===\n'); + console.log(`Store: ${store.name}`); + console.log(`Platform ID: ${store.platform_dispensary_id}`); + console.log(`Products in DB: ${store.product_count || 'unknown'}\n`); + + // Mode A (Active products with pricing) + console.log('Fetching Mode A (Active products)...'); + const modeA = await measureRequest(store.platform_dispensary_id, 'A'); + + // Mode B (All products) + console.log('Fetching Mode B (All products)...'); + const modeB = await measureRequest(store.platform_dispensary_id, 'B'); + + console.log('\n=== Results for ONE STORE ==='); + console.log('\nMode A (Active products with pricing):'); + console.log(` Request size: ${formatBytes(modeA.requestSize)}`); + console.log(` Response size: ${formatBytes(modeA.responseSize)}`); + console.log(` Products: ${modeA.productCount}`); + if (modeA.productCount > 0) { + console.log(` Per product: ${formatBytes(modeA.responseSize / modeA.productCount)}`); + } + + console.log('\nMode B (All products incl. OOS):'); + console.log(` Request size: ${formatBytes(modeB.requestSize)}`); + console.log(` Response size: ${formatBytes(modeB.responseSize)}`); + console.log(` Products: ${modeB.productCount}`); + if (modeB.productCount > 0) { + console.log(` Per product: ${formatBytes(modeB.responseSize / modeB.productCount)}`); + } + + console.log('\nDual-Mode Crawl (what we actually do):'); + const totalRequest = modeA.requestSize + modeB.requestSize; + const totalResponse = modeA.responseSize + modeB.responseSize; + const totalBandwidth = totalRequest + totalResponse; + console.log(` Total request: ${formatBytes(totalRequest)}`); + console.log(` Total response: ${formatBytes(totalResponse)}`); + console.log(` TOTAL BANDWIDTH: ${formatBytes(totalBandwidth)}`); + + // Per-product average + const avgProducts = Math.max(modeA.productCount, modeB.productCount); + const bytesPerProduct = avgProducts > 0 ? totalResponse / avgProducts : 0; + + console.log('\n=== Quick Reference ==='); + console.log(`Average bytes per product: ~${formatBytes(bytesPerProduct)}`); + console.log(`\nTypical store sizes:`); + console.log(` Small (100 products): ~${formatBytes(bytesPerProduct * 100 + totalRequest)}`); + console.log(` Medium (300 products): ~${formatBytes(bytesPerProduct * 300 + totalRequest)}`); + console.log(` Large (500 products): ~${formatBytes(bytesPerProduct * 500 + totalRequest)}`); + + await pool.end(); +} + +main().catch(console.error); diff --git a/backend/src/scripts/retry-platform-ids.ts b/backend/src/scripts/retry-platform-ids.ts new file mode 100644 index 00000000..d16e9c69 --- /dev/null +++ b/backend/src/scripts/retry-platform-ids.ts @@ -0,0 +1,137 @@ +#!/usr/bin/env npx tsx +/** + * Retry resolving platform IDs for Dutchie stores that have menu_url but no platform_dispensary_id + * + * Usage: + * npx tsx src/scripts/retry-platform-ids.ts + */ + +import { Pool } from 'pg'; +import dotenv from 'dotenv'; +import { resolveDispensaryIdWithDetails } from '../platforms/dutchie/queries'; + +dotenv.config(); + +const pool = new Pool({ + connectionString: process.env.DATABASE_URL || + `postgresql://${process.env.CANNAIQ_DB_USER || 'dutchie'}:${process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass'}@${process.env.CANNAIQ_DB_HOST || 'localhost'}:${process.env.CANNAIQ_DB_PORT || '54320'}/${process.env.CANNAIQ_DB_NAME || 'dutchie_menus'}` +}); + +interface DispensaryRow { + id: number; + name: string; + menu_url: string; +} + +function extractSlugFromUrl(menuUrl: string): string | null { + // Extract slug from Dutchie URLs like: + // https://dutchie.com/stores/Nirvana-North-Phoenix + // https://dutchie.com/dispensary/curaleaf-dispensary-peoria + // https://dutchie.com/embedded-menu/some-slug + + const patterns = [ + /dutchie\.com\/stores\/([^/?]+)/i, + /dutchie\.com\/dispensary\/([^/?]+)/i, + /dutchie\.com\/embedded-menu\/([^/?]+)/i, + ]; + + for (const pattern of patterns) { + const match = menuUrl.match(pattern); + if (match) { + return match[1]; + } + } + + return null; +} + +async function main() { + console.log('='.repeat(60)); + console.log('Retry Platform ID Resolution'); + console.log('='.repeat(60)); + console.log(''); + + // Get Dutchie dispensaries with menu_url but no platform_dispensary_id + const result = await pool.query(` + SELECT id, name, menu_url + FROM dispensaries + WHERE menu_type = 'dutchie' + AND menu_url IS NOT NULL AND menu_url != '' + AND (platform_dispensary_id IS NULL OR platform_dispensary_id = '') + ORDER BY name + `); + + console.log(`Found ${result.rows.length} stores to retry\n`); + + if (result.rows.length === 0) { + console.log('No stores need platform ID resolution.'); + await pool.end(); + return; + } + + const successes: { id: number; name: string; platformId: string }[] = []; + const failures: { id: number; name: string; slug: string | null; error: string }[] = []; + + for (const row of result.rows) { + console.log(`\n[${row.id}] ${row.name}`); + console.log(` URL: ${row.menu_url}`); + + const slug = extractSlugFromUrl(row.menu_url); + if (!slug) { + console.log(` ❌ Could not extract slug from URL`); + failures.push({ id: row.id, name: row.name, slug: null, error: 'Could not extract slug' }); + continue; + } + + console.log(` Slug: ${slug}`); + + try { + const resolveResult = await resolveDispensaryIdWithDetails(slug); + + if (resolveResult.dispensaryId) { + console.log(` βœ… Resolved: ${resolveResult.dispensaryId}`); + + // Update database + await pool.query( + 'UPDATE dispensaries SET platform_dispensary_id = $1 WHERE id = $2', + [resolveResult.dispensaryId, row.id] + ); + console.log(` πŸ’Ύ Updated database`); + + successes.push({ id: row.id, name: row.name, platformId: resolveResult.dispensaryId }); + } else { + const errorMsg = resolveResult.error || 'Unknown error'; + console.log(` ❌ Failed: ${errorMsg}`); + failures.push({ id: row.id, name: row.name, slug, error: errorMsg }); + } + } catch (error: any) { + console.log(` ❌ Error: ${error.message}`); + failures.push({ id: row.id, name: row.name, slug, error: error.message }); + } + + // Small delay between requests + await new Promise(r => setTimeout(r, 500)); + } + + console.log('\n' + '='.repeat(60)); + console.log('SUMMARY'); + console.log('='.repeat(60)); + + console.log(`\nβœ… Successes (${successes.length}):`); + for (const s of successes) { + console.log(` [${s.id}] ${s.name} -> ${s.platformId}`); + } + + console.log(`\n❌ Failures (${failures.length}):`); + for (const f of failures) { + console.log(` [${f.id}] ${f.name} (slug: ${f.slug || 'N/A'})`); + console.log(` ${f.error}`); + } + + await pool.end(); +} + +main().catch(e => { + console.error('Fatal error:', e); + process.exit(1); +}); diff --git a/backend/src/scripts/run-discovery.ts b/backend/src/scripts/run-discovery.ts index f14e473f..72ba0522 100644 --- a/backend/src/scripts/run-discovery.ts +++ b/backend/src/scripts/run-discovery.ts @@ -30,8 +30,8 @@ import { discoverState, getDiscoveryStats, seedKnownCities, - ARIZONA_CITIES, } from '../discovery'; +import { getCitiesForState } from '../discovery/location-discovery'; // Parse command line arguments function parseArgs() { @@ -204,16 +204,22 @@ async function main() { process.exit(1); } - let cities: any[] = []; - if (stateCode.toUpperCase() === 'AZ') { - cities = ARIZONA_CITIES; - } else { - console.error(`No predefined cities for state: ${stateCode}`); - console.error('Add cities to city-discovery.ts ARIZONA_CITIES array (or add new state arrays)'); + // Dynamically fetch cities from Dutchie + console.log(`\nFetching cities for ${stateCode} from Dutchie...\n`); + const cityNames = await getCitiesForState(stateCode.toUpperCase()); + + if (cityNames.length === 0) { + console.error(`No cities found for state: ${stateCode}`); process.exit(1); } - console.log(`\nSeeding ${cities.length} cities for ${stateCode}...\n`); + const cities = cityNames.map(name => ({ + name, + slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''), + stateCode: stateCode.toUpperCase(), + })); + + console.log(`Seeding ${cities.length} cities for ${stateCode}...\n`); const result = await seedKnownCities(pool, cities); console.log(`Created: ${result.created} new cities`); console.log(`Updated: ${result.updated} existing cities`); diff --git a/backend/src/scripts/test-crawl-to-canonical.ts b/backend/src/scripts/test-crawl-to-canonical.ts new file mode 100644 index 00000000..752b9e4b --- /dev/null +++ b/backend/src/scripts/test-crawl-to-canonical.ts @@ -0,0 +1,271 @@ +#!/usr/bin/env npx tsx +/** + * Test Script: Crawl a single dispensary and write to canonical tables + * + * This script: + * 1. Fetches products from Dutchie GraphQL + * 2. Normalizes via DutchieNormalizer + * 3. Writes to store_products, product_variants, snapshots via hydrateToCanonical + * + * Usage: + * npx tsx src/scripts/test-crawl-to-canonical.ts + * npx tsx src/scripts/test-crawl-to-canonical.ts 235 + */ + +import { Pool } from 'pg'; +import dotenv from 'dotenv'; +import { + executeGraphQL, + GRAPHQL_HASHES, + DUTCHIE_CONFIG, +} from '../platforms/dutchie'; +import { + DutchieNormalizer, + hydrateToCanonical, +} from '../hydration'; + +dotenv.config(); + +// ============================================================ +// DATABASE CONNECTION +// ============================================================ + +function getConnectionString(): string { + if (process.env.CANNAIQ_DB_URL) { + return process.env.CANNAIQ_DB_URL; + } + if (process.env.DATABASE_URL) { + return process.env.DATABASE_URL; + } + const host = process.env.CANNAIQ_DB_HOST || 'localhost'; + const port = process.env.CANNAIQ_DB_PORT || '54320'; + const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus'; + const user = process.env.CANNAIQ_DB_USER || 'dutchie'; + const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass'; + return `postgresql://${user}:${pass}@${host}:${port}/${name}`; +} + +const pool = new Pool({ connectionString: getConnectionString() }); + +// ============================================================ +// FETCH PRODUCTS FROM DUTCHIE +// ============================================================ + +interface FetchResult { + products: any[]; + totalPages: number; + totalProducts: number; +} + +async function fetchAllProducts(platformDispensaryId: string, cName: string): Promise { + const allProducts: any[] = []; + let page = 0; + let totalPages = 1; + let totalProducts = 0; + + console.log(`[Fetch] Starting fetch for ${platformDispensaryId} (cName: ${cName})`); + + while (page < totalPages && page < DUTCHIE_CONFIG.maxPages) { + const variables = { + includeEnterpriseSpecials: false, + productsFilter: { + dispensaryId: platformDispensaryId, + pricingType: 'rec', + Status: 'Active', // 'Active' = in-stock products with pricing + types: [], + useCache: true, + isDefaultSort: true, + sortBy: 'popularSortIdx', + sortDirection: 1, + bypassOnlineThresholds: true, + isKioskMenu: false, + removeProductsBelowOptionThresholds: false, + }, + page, + perPage: DUTCHIE_CONFIG.perPage, + }; + + try { + const result = await executeGraphQL( + 'FilteredProducts', + variables, + GRAPHQL_HASHES.FilteredProducts, + { cName, maxRetries: 3 } + ); + + const data = result?.data?.filteredProducts; + if (!data) { + console.error(`[Fetch] No data returned for page ${page}`); + break; + } + + const products = data.products || []; + totalProducts = data.queryInfo?.totalCount || 0; + totalPages = Math.ceil(totalProducts / DUTCHIE_CONFIG.perPage); + + allProducts.push(...products); + console.log(`[Fetch] Page ${page + 1}/${totalPages}: ${products.length} products (total so far: ${allProducts.length})`); + + page++; + + if (page < totalPages) { + await new Promise(r => setTimeout(r, DUTCHIE_CONFIG.pageDelayMs)); + } + } catch (error: any) { + console.error(`[Fetch] Error on page ${page}: ${error.message}`); + break; + } + } + + return { products: allProducts, totalPages, totalProducts }; +} + +// ============================================================ +// MAIN +// ============================================================ + +async function main() { + const dispensaryId = parseInt(process.argv[2], 10); + + if (!dispensaryId) { + console.error('Usage: npx tsx src/scripts/test-crawl-to-canonical.ts '); + console.error('Example: npx tsx src/scripts/test-crawl-to-canonical.ts 235'); + process.exit(1); + } + + console.log('============================================================'); + console.log(`Test Crawl to Canonical - Dispensary ${dispensaryId}`); + console.log('============================================================\n'); + + try { + // Step 1: Get dispensary info + console.log('[Step 1] Getting dispensary info...'); + const dispResult = await pool.query(` + SELECT id, name, platform_dispensary_id, menu_url + FROM dispensaries + WHERE id = $1 + `, [dispensaryId]); + + if (dispResult.rows.length === 0) { + throw new Error(`Dispensary ${dispensaryId} not found`); + } + + const disp = dispResult.rows[0]; + console.log(` Name: ${disp.name}`); + console.log(` Platform ID: ${disp.platform_dispensary_id}`); + console.log(` Menu URL: ${disp.menu_url}`); + + if (!disp.platform_dispensary_id) { + throw new Error('Dispensary does not have a platform_dispensary_id'); + } + + // Extract cName from menu_url + const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/); + const cName = cNameMatch ? cNameMatch[1] : 'dispensary'; + console.log(` cName: ${cName}\n`); + + // Step 2: Fetch products from Dutchie + console.log('[Step 2] Fetching products from Dutchie GraphQL...'); + const fetchResult = await fetchAllProducts(disp.platform_dispensary_id, cName); + console.log(` Total products fetched: ${fetchResult.products.length}\n`); + + if (fetchResult.products.length === 0) { + console.log('No products fetched. Exiting.'); + process.exit(0); + } + + // Step 3: Normalize + console.log('[Step 3] Normalizing products...'); + const normalizer = new DutchieNormalizer(); + + // Construct a RawPayload structure that the normalizer expects + // The normalizer.normalize() expects: { raw_json, dispensary_id, ... } + const rawPayloadForValidation = { + products: fetchResult.products, + queryInfo: { + totalCount: fetchResult.totalProducts, + }, + }; + + const validation = normalizer.validatePayload(rawPayloadForValidation); + if (!validation.valid) { + console.error(` Validation failed: ${validation.errors?.join(', ')}`); + process.exit(1); + } + console.log(` Validation: PASS`); + + // Build proper RawPayload for normalize() + const rawPayload = { + id: `test-${Date.now()}`, + dispensary_id: dispensaryId, + crawl_run_id: null, + platform: 'dutchie', + payload_version: 1, + raw_json: rawPayloadForValidation, + product_count: fetchResult.totalProducts, + pricing_type: 'rec', + crawl_mode: 'active', + fetched_at: new Date(), + processed: false, + normalized_at: null, + hydration_error: null, + hydration_attempts: 0, + created_at: new Date(), + }; + + const normResult = normalizer.normalize(rawPayload); + console.log(` Normalized products: ${normResult.products.length}`); + console.log(` Brands extracted: ${normResult.brands.length}`); + console.log(` Sample product: ${normResult.products[0]?.name}\n`); + + // Step 4: Write to canonical tables + console.log('[Step 4] Writing to canonical tables via hydrateToCanonical...'); + const hydrateResult = await hydrateToCanonical( + pool, + dispensaryId, + normResult, + null // no crawl_run_id for this test + ); + + console.log(` Products upserted: ${hydrateResult.productsUpserted}`); + console.log(` Products new: ${hydrateResult.productsNew}`); + console.log(` Snapshots created: ${hydrateResult.snapshotsCreated}`); + console.log(` Variants upserted: ${hydrateResult.variantsUpserted}`); + console.log(` Brands created: ${hydrateResult.brandsCreated}\n`); + + // Step 5: Verify + console.log('[Step 5] Verifying data in canonical tables...'); + + const productCount = await pool.query(` + SELECT COUNT(*) as count FROM store_products WHERE dispensary_id = $1 + `, [dispensaryId]); + console.log(` store_products count: ${productCount.rows[0].count}`); + + const variantCount = await pool.query(` + SELECT COUNT(*) as count FROM product_variants WHERE dispensary_id = $1 + `, [dispensaryId]); + console.log(` product_variants count: ${variantCount.rows[0].count}`); + + const snapshotCount = await pool.query(` + SELECT COUNT(*) as count FROM store_product_snapshots WHERE dispensary_id = $1 + `, [dispensaryId]); + console.log(` store_product_snapshots count: ${snapshotCount.rows[0].count}`); + + console.log('\n============================================================'); + console.log('SUCCESS - Crawl and hydration complete!'); + console.log('============================================================'); + + } catch (error: any) { + console.error('\n============================================================'); + console.error('ERROR:', error.message); + console.error('============================================================'); + if (error.stack) { + console.error(error.stack); + } + process.exit(1); + } finally { + await pool.end(); + } +} + +main(); diff --git a/backend/src/seo/settings.ts b/backend/src/seo/settings.ts new file mode 100644 index 00000000..6ded6369 --- /dev/null +++ b/backend/src/seo/settings.ts @@ -0,0 +1,521 @@ +/** + * SEO Settings Helper Module + * + * Provides functions for managing SEO configuration stored in seo_settings table. + */ + +import { getPool } from '../db/pool'; + +// Default settings - used when table is empty or for reset +export const DEFAULT_SETTINGS: Record = { + // Section 1: Global Content Generation Settings + primary_prompt_template: `You are a cannabis industry content expert creating SEO-optimized content for {{page_type}} pages. + +Topic: {{subject}} +Focus Areas: {{focus_areas}} +Tone: {{tone}} +Length: {{length}} + +Generate engaging, informative content that: +1. Uses natural keyword placement +2. Provides value to cannabis consumers +3. Maintains compliance with industry standards +4. Includes relevant local market data +5. Avoids technical jargon about data collection + +Write content that feels authentic and helpful, not automated.`, + + regeneration_template: `You are improving existing SEO content for a {{page_type}} page. + +=== ORIGINAL CONTENT === +{{original_content}} + +=== IMPROVEMENT AREAS === +{{improvement_areas}} + +=== FRESH DATA === +{{fresh_data}} + +=== REQUIREMENTS === +- Tone: {{tone}} +- Length: {{length}} +- Preserve accurate information from original +- Update outdated statistics with fresh data +- Improve SEO keyword density naturally +- Enhance readability and engagement +- Maintain compliance with cannabis industry standards +- Keep the same content structure unless improvement is needed + +Generate the improved version, preserving what works while addressing the improvement areas.`, + + default_content_length: 'medium', + tone_voice: 'informational', + + // ============================================================================ + // TEMPLATE LIBRARY - Complete Page Type Templates + // ============================================================================ + + state_page_template: `# {{state_name}} Dispensaries - Your Cannabis Guide + +Explore **{{dispensary_count}} licensed dispensaries** across {{state_name}}. Our comprehensive directory features {{product_count}}+ products from {{brand_count}} trusted brands, with real-time menu updates and pricing. + +## Why Shop Cannabis in {{state_name}}? + +{{state_name}} offers a thriving cannabis market with diverse product selections and competitive pricing. Whether you're looking for premium flower, convenient vapes, or precisely dosed edibles, you'll find options to match your preferences. + +## Top Cannabis Brands in {{state_name}} + +{{top_brands}} + +These brands are available at dispensaries across the state, known for quality, consistency, and consumer trust. + +## Popular Product Categories + +{{top_categories}} + +Find everything from traditional flower to innovative concentrates and wellness-focused CBD products. + +## {{state_name}} Cannabis Market Overview + +| Metric | Value | +|--------|-------| +| Licensed Dispensaries | {{dispensary_count}} | +| Products Available | {{product_count}}+ | +| Active Brands | {{brand_count}} | +| Average Price | \${{avg_price}} | + +## Finding the Right Dispensary + +Use our search tools to filter by location, product availability, and store hours. Compare menus across dispensaries to find the best selection for your needs. + +--- +*Market data continuously updated. Last refresh: {{last_updated}}*`, + + city_page_template: `# {{city_name}}, {{state_code}} Cannabis Dispensaries + +Discover **{{dispensary_count}} dispensaries** in {{city_name}}, {{state_name}}. Browse {{product_count}} products from {{brand_count}} local and national brands. + +## Cannabis Shopping in {{city_name}} + +{{city_name}} offers convenient access to quality cannabis products through licensed retail locations. Our directory helps you find the perfect dispensary based on location, selection, and reviews. + +## Featured Dispensaries in {{city_name}} + +{{popular_dispensaries}} + +## Explore Nearby Cities + +Looking for more options? Check out dispensaries in these nearby areas: + +{{nearby_cities}} + +## {{city_name}} Market Snapshot + +- **Local Stores**: {{dispensary_count}} +- **Products Available**: {{product_count}} +- **Average Price**: \${{avg_price}} + +## What to Expect + +{{city_name}} dispensaries offer a range of experiences from boutique shops to high-volume retail stores. First-time visitors should bring valid ID and check store hours before visiting. + +--- +*Find your local dispensary and start shopping today.*`, + + category_page_template: `# {{category_name}} Products in {{state_name}} + +Explore **{{product_count}} {{category_name}} products** from {{brand_count}} trusted brands across {{state_name}} dispensaries. + +## About {{category_name}} + +{{category_name}} remains one of the most popular cannabis product categories, offering options for every preference and experience level. + +## Popular {{category_name}} Varieties + +{{top_strains}} + +## Browse by Type + +{{subcategories}} + +## {{category_name}} Pricing in {{state_name}} + +- **Average Price**: \${{avg_price}} +- **Budget Options**: Starting under $25 +- **Premium Selection**: $50+ + +## How to Choose {{category_name}} + +Consider potency levels, terpene profiles, and intended effects when selecting {{category_name}} products. Our filters help you narrow down options by THC/CBD content, brand, and price range. + +## Shop {{category_name}} Near You + +Find {{category_name}} products at dispensaries across {{state_name}}. Use our location search to find stores with current inventory.`, + + brand_page_template: `# {{brand_name}} - Cannabis Products & Store Locator + +{{description}} + +## Where to Find {{brand_name}} + +{{brand_name}} products are available at **{{store_count}} dispensaries** across multiple states: + +{{state_presence}} + +## {{brand_name}} Product Categories + +{{categories}} + +## Brand Statistics + +| Metric | Value | +|--------|-------| +| Total Products | {{product_count}} | +| Retail Partners | {{store_count}} | +| Average Price | \${{avg_price}} | + +## Why Choose {{brand_name}}? + +Consumers trust {{brand_name}} for consistent quality, transparent lab testing, and innovative product development. Whether you're a long-time fan or discovering them for the first time, explore their full lineup at dispensaries near you. + +## Shop {{brand_name}} Products + +Find {{brand_name}} at a dispensary near you. Compare prices and availability across stores to get the best deal.`, + + product_page_template: `# {{product_name}} + +**{{brand_name}}** | {{category}} + +## Product Details + +| Attribute | Value | +|-----------|-------| +| THC Content | {{thc_percent}}% | +| CBD Content | {{cbd_percent}}% | +| Category | {{category}} | +| Brand | {{brand_name}} | + +## Availability + +{{#if in_stock}} +**In Stock** at {{dispensary_name}} +{{else}} +**Currently Unavailable** at {{dispensary_name}} +{{/if}} + +πŸ“ {{dispensary_city}}, {{state_name}} + +## Pricing + +**\${{price}}** + +*Prices may vary by location. Check dispensary menu for current pricing.* + +## About This Product + +{{product_name}} from {{brand_name}} offers a quality {{category}} experience. Visit {{dispensary_name}} to learn more about this product and explore similar options. + +## Find More {{brand_name}} Products + +Browse the complete {{brand_name}} lineup and find products at dispensaries across {{state_name}}.`, + + search_results_template: `# Search Results: "{{query}}" + +Found **{{result_count}} results** across {{state_name}} dispensaries. + +## Results Overview + +| Category | Count | +|----------|-------| +| Products | {{product_results}} | +| Dispensaries | {{dispensary_results}} | +| Brands | {{brand_results}} | + +## Top Categories for "{{query}}" + +{{top_categories}} + +## Refine Your Search + +Use our filters to narrow results by: +- **Category**: Flower, Vape, Edibles, Concentrates, and more +- **Price Range**: Budget-friendly to premium options +- **Brand**: Shop your favorite brands +- **Location**: Find nearby dispensaries + +## Popular Related Searches + +Explore related products and categories to find exactly what you're looking for. + +--- +*Can't find what you need? Try broadening your search terms or browse by category.*`, + + // ============================================================================ + // Section 2: Automatic Refresh Rules + // ============================================================================ + auto_refresh_interval: 'weekly', + trigger_pct_product_change: true, + trigger_pct_brand_change: true, + trigger_new_stores: true, + trigger_market_shift: false, + webhook_url: '', + notify_on_trigger: false, + + // Section 3: Page-Level Defaults + default_title_template: '{{state_name}} Dispensaries | Find Cannabis Near You | CannaiQ', + default_meta_description_template: 'Discover the best dispensaries in {{state_name}}. Browse {{dispensary_count}}+ licensed retailers, compare prices, and find cannabis products near you.', + default_slug_template: 'dispensaries-{{state_code_lower}}', + default_og_image_template: '/images/seo/og-{{state_code_lower}}.jpg', + enable_ai_images: false, + + // Section 4: Crawl / Dataset Configuration + primary_data_provider: 'cannaiq', + fallback_data_provider: 'dutchie', + min_data_freshness_hours: 24, + stale_data_behavior: 'allow_with_warning', +}; + +/** + * Get a single setting by key + */ +export async function getSetting(key: string): Promise { + const pool = getPool(); + + try { + const result = await pool.query( + 'SELECT value FROM seo_settings WHERE key = $1', + [key] + ); + + if (result.rows.length === 0) { + // Return default if not found + return DEFAULT_SETTINGS[key] ?? null; + } + + return result.rows[0].value; + } catch (error: any) { + console.error(`[SEO Settings] Error getting setting "${key}":`, error.message); + // Return default on error + return DEFAULT_SETTINGS[key] ?? null; + } +} + +/** + * Set a single setting + */ +export async function setSetting(key: string, value: any): Promise { + const pool = getPool(); + + try { + await pool.query( + `INSERT INTO seo_settings (key, value, updated_at) + VALUES ($1, $2, NOW()) + ON CONFLICT (key) DO UPDATE SET + value = EXCLUDED.value, + updated_at = NOW()`, + [key, JSON.stringify(value)] + ); + } catch (error: any) { + console.error(`[SEO Settings] Error setting "${key}":`, error.message); + throw error; + } +} + +/** + * Get all settings as a key/value object + */ +export async function getAllSettings(): Promise> { + const pool = getPool(); + + try { + const result = await pool.query('SELECT key, value FROM seo_settings'); + + // Start with defaults + const settings: Record = { ...DEFAULT_SETTINGS }; + + // Override with stored values + for (const row of result.rows) { + settings[row.key] = row.value; + } + + return settings; + } catch (error: any) { + console.error('[SEO Settings] Error getting all settings:', error.message); + // Return defaults on error + return { ...DEFAULT_SETTINGS }; + } +} + +/** + * Set multiple settings at once + */ +export async function setMultipleSettings(settings: Record): Promise { + const pool = getPool(); + + try { + // Use a transaction for bulk updates + await pool.query('BEGIN'); + + for (const [key, value] of Object.entries(settings)) { + await pool.query( + `INSERT INTO seo_settings (key, value, updated_at) + VALUES ($1, $2, NOW()) + ON CONFLICT (key) DO UPDATE SET + value = EXCLUDED.value, + updated_at = NOW()`, + [key, JSON.stringify(value)] + ); + } + + await pool.query('COMMIT'); + } catch (error: any) { + await pool.query('ROLLBACK'); + console.error('[SEO Settings] Error setting multiple settings:', error.message); + throw error; + } +} + +/** + * Reset all settings to defaults + */ +export async function resetToDefaults(): Promise> { + const pool = getPool(); + + try { + await pool.query('BEGIN'); + + // Delete all existing settings + await pool.query('DELETE FROM seo_settings'); + + // Insert all defaults + for (const [key, value] of Object.entries(DEFAULT_SETTINGS)) { + await pool.query( + `INSERT INTO seo_settings (key, value, created_at, updated_at) + VALUES ($1, $2, NOW(), NOW())`, + [key, JSON.stringify(value)] + ); + } + + await pool.query('COMMIT'); + + return { ...DEFAULT_SETTINGS }; + } catch (error: any) { + await pool.query('ROLLBACK'); + console.error('[SEO Settings] Error resetting to defaults:', error.message); + throw error; + } +} + +/** + * Ensure settings table exists and has defaults + * Call this on app startup + */ +export async function ensureSettingsExist(): Promise { + const pool = getPool(); + + try { + // Check if table exists + const tableCheck = await pool.query(` + SELECT EXISTS ( + SELECT FROM information_schema.tables + WHERE table_name = 'seo_settings' + ) + `); + + if (!tableCheck.rows[0].exists) { + // Create table + await pool.query(` + CREATE TABLE IF NOT EXISTS seo_settings ( + id SERIAL PRIMARY KEY, + key TEXT UNIQUE NOT NULL, + value JSONB NOT NULL, + created_at TIMESTAMP DEFAULT NOW(), + updated_at TIMESTAMP DEFAULT NOW() + ) + `); + } + + // Check if settings exist + const countResult = await pool.query('SELECT COUNT(*) FROM seo_settings'); + const count = parseInt(countResult.rows[0].count, 10); + + if (count === 0) { + // Seed with defaults + for (const [key, value] of Object.entries(DEFAULT_SETTINGS)) { + await pool.query( + `INSERT INTO seo_settings (key, value) + VALUES ($1, $2) + ON CONFLICT (key) DO NOTHING`, + [key, JSON.stringify(value)] + ); + } + console.log('[SEO Settings] Seeded default settings'); + } + } catch (error: any) { + console.error('[SEO Settings] Error ensuring settings exist:', error.message); + } +} + +/** + * Build a prompt using settings and template variables + */ +export function buildPrompt( + template: string, + variables: Record +): string { + let result = template; + + for (const [key, value] of Object.entries(variables)) { + result = result.replace(new RegExp(`{{${key}}}`, 'g'), value); + } + + return result; +} + +/** + * Get content generation settings as a structured object + */ +export async function getContentGenerationSettings(): Promise<{ + promptTemplate: string; + regenerationTemplate: string; + contentLength: 'short' | 'medium' | 'long'; + tone: 'neutral' | 'informational' | 'consumer' | 'authoritative'; +}> { + const settings = await getAllSettings(); + + return { + promptTemplate: settings.primary_prompt_template, + regenerationTemplate: settings.regeneration_prompt_template, + contentLength: settings.default_content_length, + tone: settings.tone_voice, + }; +} + +/** + * Check if data is stale based on settings + */ +export async function checkDataFreshness(lastCrawlAt: Date | null): Promise<{ + isFresh: boolean; + behavior: 'block_generation' | 'allow_with_warning' | 'auto_trigger_crawl'; + hoursStale: number; +}> { + const settings = await getAllSettings(); + const maxHours = settings.min_data_freshness_hours || 24; + const behavior = settings.stale_data_behavior || 'allow_with_warning'; + + if (!lastCrawlAt) { + return { + isFresh: false, + behavior, + hoursStale: Infinity, + }; + } + + const hoursStale = (Date.now() - lastCrawlAt.getTime()) / (1000 * 60 * 60); + + return { + isFresh: hoursStale <= maxHours, + behavior, + hoursStale: Math.round(hoursStale), + }; +} diff --git a/backend/src/seo/template-engine.ts b/backend/src/seo/template-engine.ts new file mode 100644 index 00000000..3bab1957 --- /dev/null +++ b/backend/src/seo/template-engine.ts @@ -0,0 +1,369 @@ +/** + * SEO Template Engine + * + * Handles template selection, variable injection, and content generation + * for different page types (state, city, category, brand, product, search). + */ + +import { getAllSettings, getSetting } from './settings'; + +// Page types supported by the template engine +export type PageType = 'state' | 'city' | 'category' | 'brand' | 'product' | 'search'; + +// Template keys mapping +export const TEMPLATE_KEYS: Record = { + state: 'state_page_template', + city: 'city_page_template', + category: 'category_page_template', + brand: 'brand_page_template', + product: 'product_page_template', + search: 'search_results_template', +}; + +// Sample mock data for previews +export const MOCK_DATA: Record> = { + state: { + state_name: 'Arizona', + state_code: 'AZ', + state_code_lower: 'az', + dispensary_count: 156, + product_count: 12450, + brand_count: 287, + category_count: 8, + top_brands: ['Raw Garden', 'Stiiizy', 'Select', 'Pax', 'Bloom'], + top_categories: ['Flower', 'Vape', 'Edibles', 'Concentrate', 'Pre-rolls'], + avg_price: 42.50, + last_updated: new Date().toISOString().split('T')[0], + }, + city: { + city_name: 'Phoenix', + state_name: 'Arizona', + state_code: 'AZ', + dispensary_count: 45, + product_count: 3200, + brand_count: 120, + nearby_cities: ['Scottsdale', 'Tempe', 'Mesa', 'Glendale'], + popular_dispensaries: ['Harvest', 'Curaleaf', 'Zen Leaf'], + avg_price: 40.00, + }, + category: { + category_name: 'Flower', + category_slug: 'flower', + product_count: 4500, + brand_count: 95, + state_name: 'Arizona', + avg_price: 35.00, + top_strains: ['Blue Dream', 'OG Kush', 'Girl Scout Cookies'], + subcategories: ['Indica', 'Sativa', 'Hybrid'], + }, + brand: { + brand_name: 'Raw Garden', + brand_slug: 'raw-garden', + product_count: 156, + state_presence: ['AZ', 'CA', 'NV', 'CO'], + store_count: 89, + avg_price: 45.00, + categories: ['Concentrate', 'Vape', 'Live Resin'], + description: 'Premium cannabis products from California', + }, + product: { + product_name: 'Blue Dream Cartridge', + brand_name: 'Select', + category: 'Vape', + thc_percent: 85.5, + cbd_percent: 0.5, + price: 45.00, + dispensary_name: 'Harvest HOC', + dispensary_city: 'Phoenix', + state_name: 'Arizona', + in_stock: true, + }, + search: { + query: 'live resin', + result_count: 245, + product_results: 180, + dispensary_results: 45, + brand_results: 20, + state_name: 'Arizona', + top_categories: ['Concentrate', 'Vape'], + }, +}; + +/** + * Apply template variables to a template string + * Replaces {{variable}} with values from data object + * + * Rules: + * - Replace {{variable}} occurrences + * - Leave unknown variables unchanged + * - Prevent undefined values (replace with empty string) + * - Support arrays by joining with comma + */ +export function applyTemplateVariables( + template: string, + data: Record +): string { + if (!template) return ''; + + let result = template; + + // Find all {{variable}} patterns + const variablePattern = /\{\{(\w+)\}\}/g; + let match; + + while ((match = variablePattern.exec(template)) !== null) { + const fullMatch = match[0]; + const variableName = match[1]; + + if (variableName in data) { + let value = data[variableName]; + + // Handle different value types + if (value === undefined || value === null) { + value = ''; + } else if (Array.isArray(value)) { + value = value.join(', '); + } else if (typeof value === 'object') { + value = JSON.stringify(value); + } else { + value = String(value); + } + + // Replace all occurrences of this variable + result = result.split(fullMatch).join(value); + } + // Leave unknown variables unchanged + } + + return result; +} + +/** + * Get the correct template for a page type + * Uses case-insensitive matching + */ +export async function getTemplateForPageType(pageType: string): Promise { + const normalizedType = pageType.toLowerCase().trim() as PageType; + const templateKey = TEMPLATE_KEYS[normalizedType]; + + if (!templateKey) { + console.warn(`[TemplateEngine] Unknown page type: ${pageType}, falling back to state template`); + return getSetting('state_page_template'); + } + + return getSetting(templateKey); +} + +/** + * Get regeneration template + */ +export async function getRegenerationTemplate(): Promise { + return getSetting('regeneration_template'); +} + +/** + * Generate content for a page using the appropriate template + */ +export async function generatePageContent( + pageType: string, + data: Record +): Promise<{ + content: string; + templateUsed: string; + variablesApplied: string[]; +}> { + const template = await getTemplateForPageType(pageType); + const content = applyTemplateVariables(template, data); + + // Extract which variables were actually used + const variablePattern = /\{\{(\w+)\}\}/g; + const variablesInTemplate: string[] = []; + let match; + while ((match = variablePattern.exec(template)) !== null) { + if (!variablesInTemplate.includes(match[1])) { + variablesInTemplate.push(match[1]); + } + } + + const variablesApplied = variablesInTemplate.filter(v => v in data); + + return { + content, + templateUsed: TEMPLATE_KEYS[pageType.toLowerCase() as PageType] || 'state_page_template', + variablesApplied, + }; +} + +/** + * Generate a preview with mock data + */ +export async function generatePreview( + pageType: string, + customTemplate?: string +): Promise<{ + preview: string; + template: string; + mockData: Record; + availableVariables: string[]; +}> { + const normalizedType = (pageType?.toLowerCase().trim() || 'state') as PageType; + const template = customTemplate || await getTemplateForPageType(normalizedType); + const mockData = MOCK_DATA[normalizedType] || MOCK_DATA.state; + + const preview = applyTemplateVariables(template, mockData); + + return { + preview, + template, + mockData, + availableVariables: Object.keys(mockData), + }; +} + +/** + * Regenerate content using regeneration template + */ +export async function regenerateContent( + pageType: string, + originalContent: string, + newData: Record, + improvementAreas?: string[] +): Promise<{ + content: string; + regenerationPrompt: string; +}> { + const regenerationTemplate = await getRegenerationTemplate(); + const settings = await getAllSettings(); + + // Build regeneration context + const regenerationData = { + ...newData, + original_content: originalContent, + page_type: pageType, + improvement_areas: improvementAreas?.join(', ') || 'SEO keywords, local relevance, data freshness', + tone: settings.tone_voice || 'informational', + length: settings.default_content_length || 'medium', + }; + + const regenerationPrompt = applyTemplateVariables(regenerationTemplate, regenerationData); + + // Generate new content using the page template + const pageTemplate = await getTemplateForPageType(pageType); + const content = applyTemplateVariables(pageTemplate, newData); + + return { + content, + regenerationPrompt, + }; +} + +/** + * Get all available templates and their metadata + */ +export async function getAllTemplates(): Promise> { + const settings = await getAllSettings(); + + return { + state: { + key: 'state_page_template', + template: settings.state_page_template || '', + description: 'Template for state landing pages (e.g., "Arizona Dispensaries")', + availableVariables: Object.keys(MOCK_DATA.state), + }, + city: { + key: 'city_page_template', + template: settings.city_page_template || '', + description: 'Template for city landing pages (e.g., "Phoenix Dispensaries")', + availableVariables: Object.keys(MOCK_DATA.city), + }, + category: { + key: 'category_page_template', + template: settings.category_page_template || '', + description: 'Template for category pages (e.g., "Flower", "Edibles")', + availableVariables: Object.keys(MOCK_DATA.category), + }, + brand: { + key: 'brand_page_template', + template: settings.brand_page_template || '', + description: 'Template for brand pages (e.g., "Raw Garden Products")', + availableVariables: Object.keys(MOCK_DATA.brand), + }, + product: { + key: 'product_page_template', + template: settings.product_page_template || '', + description: 'Template for individual product pages', + availableVariables: Object.keys(MOCK_DATA.product), + }, + search: { + key: 'search_results_template', + template: settings.search_results_template || '', + description: 'Template for search results pages', + availableVariables: Object.keys(MOCK_DATA.search), + }, + regeneration: { + key: 'regeneration_template', + template: settings.regeneration_template || '', + description: 'Template used when regenerating/improving existing content', + availableVariables: ['original_content', 'page_type', 'improvement_areas', 'tone', 'length', '...page-specific variables'], + }, + }; +} + +/** + * Validate a template string + */ +export function validateTemplate(template: string): { + valid: boolean; + variables: string[]; + unknownVariables: string[]; + errors: string[]; +} { + const errors: string[] = []; + const variables: string[] = []; + + // Find all variables + const variablePattern = /\{\{(\w+)\}\}/g; + let match; + while ((match = variablePattern.exec(template)) !== null) { + if (!variables.includes(match[1])) { + variables.push(match[1]); + } + } + + // Check for unclosed brackets + const openBrackets = (template.match(/\{\{/g) || []).length; + const closeBrackets = (template.match(/\}\}/g) || []).length; + if (openBrackets !== closeBrackets) { + errors.push('Mismatched template brackets: {{ and }} counts do not match'); + } + + // Check for empty variable names + if (template.includes('{{}}')) { + errors.push('Empty variable name found: {{}}'); + } + + // Get all known variables + const allKnownVariables = new Set(); + Object.values(MOCK_DATA).forEach(data => { + Object.keys(data).forEach(key => allKnownVariables.add(key)); + }); + allKnownVariables.add('original_content'); + allKnownVariables.add('page_type'); + allKnownVariables.add('improvement_areas'); + allKnownVariables.add('tone'); + allKnownVariables.add('length'); + + const unknownVariables = variables.filter(v => !allKnownVariables.has(v)); + + return { + valid: errors.length === 0, + variables, + unknownVariables, + errors, + }; +} diff --git a/backend/src/services/LegalStateService.ts b/backend/src/services/LegalStateService.ts index b18c5b35..df7f76e2 100644 --- a/backend/src/services/LegalStateService.ts +++ b/backend/src/services/LegalStateService.ts @@ -115,7 +115,7 @@ export class LegalStateService { } /** - * Get all states with dispensary counts + * Get all states with dispensary counts (active/crawlable dispensaries only) */ async getAllStatesWithDispensaryCounts(): Promise { const { rows } = await this.pool.query(` @@ -127,6 +127,8 @@ export class LegalStateService { SELECT state_id, COUNT(*) AS cnt FROM dispensaries WHERE state_id IS NOT NULL + AND menu_type = 'dutchie' + AND platform_dispensary_id IS NOT NULL GROUP BY state_id ) d ON d.state_id = s.id ORDER BY s.name ASC @@ -324,6 +326,8 @@ export class LegalStateService { SELECT state_id, COUNT(*) AS cnt FROM dispensaries WHERE state_id IS NOT NULL + AND menu_type = 'dutchie' + AND platform_dispensary_id IS NOT NULL GROUP BY state_id ) d ON d.state_id = s.id ORDER BY s.name ASC diff --git a/backend/src/system/services/sync-orchestrator.ts b/backend/src/system/services/sync-orchestrator.ts index 4af427e8..f6e8917e 100644 --- a/backend/src/system/services/sync-orchestrator.ts +++ b/backend/src/system/services/sync-orchestrator.ts @@ -17,6 +17,7 @@ import { Pool } from 'pg'; import { MetricsService } from './metrics'; import { DLQService } from './dlq'; import { AlertService } from './alerts'; +import { DutchieNormalizer, hydrateToCanonical } from '../../hydration'; export type OrchestratorStatus = 'RUNNING' | 'SLEEPING' | 'LOCKED' | 'PAUSED' | 'ERROR'; @@ -90,6 +91,7 @@ export class SyncOrchestrator { private workerId: string; private isRunning: boolean = false; private pollInterval: NodeJS.Timeout | null = null; + private normalizer: DutchieNormalizer; constructor( pool: Pool, @@ -103,6 +105,7 @@ export class SyncOrchestrator { this.dlq = dlq; this.alerts = alerts; this.workerId = workerId || `orchestrator-${process.env.HOSTNAME || process.pid}`; + this.normalizer = new DutchieNormalizer(); } /** @@ -503,7 +506,7 @@ export class SyncOrchestrator { } /** - * Process a single payload + * Process a single payload - now uses canonical tables via hydration pipeline */ private async processPayload( payload: any, @@ -518,25 +521,52 @@ export class SyncOrchestrator { // Parse products from raw JSON const rawData = payload.raw_json; - const products = this.extractProducts(rawData); - if (!products || products.length === 0) { + // Validate the payload using normalizer + const validation = this.normalizer.validatePayload(rawData); + if (!validation.valid) { // Mark as processed with warning await this.pool.query(` UPDATE raw_payloads SET processed = TRUE, normalized_at = NOW(), - hydration_error = 'No products found in payload' + hydration_error = $2 + WHERE id = $1 + `, [payload.id, validation.errors.join('; ')]); + + return { productsUpserted: 0, productsInserted: 0, productsUpdated: 0, snapshotsCreated: 0 }; + } + + // Normalize the payload using the hydration normalizer + const normResult = this.normalizer.normalize(rawData); + + if (normResult.products.length === 0) { + // Mark as processed with warning + await this.pool.query(` + UPDATE raw_payloads + SET processed = TRUE, + normalized_at = NOW(), + hydration_error = 'No products found in payload after normalization' WHERE id = $1 `, [payload.id]); return { productsUpserted: 0, productsInserted: 0, productsUpdated: 0, snapshotsCreated: 0 }; } - // Upsert products to canonical table - const result = await this.upsertProducts(payload.dispensary_id, products); + // Get or create crawl_run for this payload + const crawlRunId = await this.getOrCreateCrawlRun(payload.dispensary_id, payload.id); - // Create snapshots + // Use canonical hydration to write to store_products, product_variants, etc. + const hydrateResult = await hydrateToCanonical( + this.pool, + payload.dispensary_id, + normResult, + crawlRunId + ); + + // Also write to legacy tables for backwards compatibility + const products = this.extractProducts(rawData); + await this.upsertProducts(payload.dispensary_id, products); const snapshotsCreated = await this.createSnapshots(payload.dispensary_id, products, payload.id); // Calculate latency @@ -551,14 +581,32 @@ export class SyncOrchestrator { WHERE id = $1 `, [payload.id]); + // Return combined metrics (canonical + legacy) return { - productsUpserted: result.upserted, - productsInserted: result.inserted, - productsUpdated: result.updated, - snapshotsCreated, + productsUpserted: hydrateResult.productsUpserted, + productsInserted: hydrateResult.productsNew, + productsUpdated: hydrateResult.productsUpdated, + snapshotsCreated: hydrateResult.snapshotsCreated + snapshotsCreated, }; } + /** + * Get or create a crawl_run record for tracking + */ + private async getOrCreateCrawlRun(dispensaryId: number, payloadId: string): Promise { + try { + const result = await this.pool.query(` + INSERT INTO crawl_runs (dispensary_id, provider, started_at, status, trigger_type, metadata) + VALUES ($1, 'dutchie', NOW(), 'running', 'hydration', jsonb_build_object('payload_id', $2)) + RETURNING id + `, [dispensaryId, payloadId]); + return result.rows[0].id; + } catch (error) { + console.warn('[SyncOrchestrator] Could not create crawl_run:', error); + return null; + } + } + /** * Extract products from raw payload */ diff --git a/cannaiq/dist/index.html b/cannaiq/dist/index.html index 803fd5fc..b6578530 100644 --- a/cannaiq/dist/index.html +++ b/cannaiq/dist/index.html @@ -7,7 +7,7 @@ CannaIQ - Cannabis Menu Intelligence Platform - + diff --git a/cannaiq/src/App.tsx b/cannaiq/src/App.tsx index 31496b7a..529e754e 100755 --- a/cannaiq/src/App.tsx +++ b/cannaiq/src/App.tsx @@ -15,6 +15,8 @@ import { Categories } from './pages/Categories'; import { Campaigns } from './pages/Campaigns'; import { Analytics } from './pages/Analytics'; import { ClickAnalytics } from './pages/ClickAnalytics'; +import { Specials } from './pages/Specials'; +import { PriceCompare } from './pages/PriceCompare'; import { Settings } from './pages/Settings'; import { Proxies } from './pages/Proxies'; import { Logs } from './pages/Logs'; @@ -23,6 +25,7 @@ import { ScraperSchedule } from './pages/ScraperSchedule'; import { ScraperTools } from './pages/ScraperTools'; import { ChangeApproval } from './pages/ChangeApproval'; import { ApiPermissions } from './pages/ApiPermissions'; +import { AISettings } from './pages/AISettings'; import { CrawlSchedulePage } from './pages/CrawlSchedulePage'; import { StoresListPage } from './pages/StoresListPage'; import { StoreDetailPage } from './pages/StoreDetailPage'; @@ -40,8 +43,10 @@ import { SyncInfoPanel } from './pages/SyncInfoPanel'; import NationalDashboard from './pages/NationalDashboard'; import StateHeatmap from './pages/StateHeatmap'; import CrossStateCompare from './pages/CrossStateCompare'; +import StateDetail from './pages/StateDetail'; import { Discovery } from './pages/Discovery'; import { WorkersDashboard } from './pages/WorkersDashboard'; +import { JobQueue } from './pages/JobQueue'; import { ScraperOverviewDashboard } from './pages/ScraperOverviewDashboard'; import { SeoOrchestrator } from './pages/admin/seo/SeoOrchestrator'; import { StatePage } from './pages/public/StatePage'; @@ -67,6 +72,8 @@ export default function App() { } /> } /> } /> + } /> + } /> } /> } /> } /> @@ -84,12 +91,14 @@ export default function App() { } /> } /> } /> + } /> } /> } /> {/* National / Multi-State routes */} } /> } /> } /> + } /> {/* Admin routes */} } /> } /> @@ -113,6 +122,8 @@ export default function App() { } /> {/* Workers Dashboard */} } /> + {/* Job Queue Management */} + } /> {/* Scraper Overview Dashboard (new primary) */} } /> } /> diff --git a/cannaiq/src/components/Layout.tsx b/cannaiq/src/components/Layout.tsx index 2fad5b8b..ad5878f5 100755 --- a/cannaiq/src/components/Layout.tsx +++ b/cannaiq/src/components/Layout.tsx @@ -19,7 +19,10 @@ import { FileText, Menu, X, - Users + Users, + ListOrdered, + Key, + Bot } from 'lucide-react'; interface LayoutProps { @@ -150,8 +153,11 @@ export function Layout({ children }: LayoutProps) { } label="Orchestrator" isActive={isActive('/admin/orchestrator')} /> } label="Workers" isActive={isActive('/workers')} /> + } label="Job Queue" isActive={isActive('/job-queue')} /> } label="SEO Pages" isActive={isActive('/admin/seo')} /> } label="Proxies" isActive={isActive('/proxies')} /> + } label="API Keys" isActive={isActive('/api-permissions')} /> + } label="AI Settings" isActive={isActive('/ai-settings')} /> } label="Settings" isActive={isActive('/settings')} /> diff --git a/cannaiq/src/components/StateSelector.tsx b/cannaiq/src/components/StateSelector.tsx index e6be6dd3..fcaca6df 100644 --- a/cannaiq/src/components/StateSelector.tsx +++ b/cannaiq/src/components/StateSelector.tsx @@ -31,20 +31,19 @@ export function StateSelector({ className = '', showLabel = true }: StateSelecto const fetchStates = async () => { setLoading(true); try { - // Use /api/states/legal which includes dispensary_count - const response = await api.get('/api/states/legal'); - // Response: { success, count, states: [{ code, name, dispensary_count, recreational, medical }] } + // Use /api/states which returns ALL states with dispensary counts + const response = await api.get('/api/states'); + // Response: { success, count, states: [{ code, name, dispensary_count, ... }] } const data = response.data; if (data?.states && Array.isArray(data.states)) { - // Map to { code, name } format, filtering to states with dispensaries - const statesWithData = data.states - .filter((s: { dispensary_count?: number }) => (s.dispensary_count ?? 0) > 0) - .map((s: { code: string; name: string; dispensary_count: number }) => ({ + // Map to { code, name } format - include ALL states (not filtering by dispensary_count) + const allStates = data.states + .map((s: { code: string; name: string }) => ({ code: s.code, name: s.name, })) .sort((a: { name: string }, b: { name: string }) => a.name.localeCompare(b.name)); - setAvailableStates(statesWithData); + setAvailableStates(allStates); } } catch (error) { console.error('Failed to fetch states:', error); diff --git a/cannaiq/src/components/StoreOrchestratorPanel.tsx b/cannaiq/src/components/StoreOrchestratorPanel.tsx index 2768a7f3..7f271522 100644 --- a/cannaiq/src/components/StoreOrchestratorPanel.tsx +++ b/cannaiq/src/components/StoreOrchestratorPanel.tsx @@ -36,16 +36,17 @@ interface StoreInfo { provider_raw?: string | null; provider_display?: string; platformDispensaryId: string | null; - status: string; + // 7-stage pipeline: discovered, validated, promoted, sandbox, hydrating, production, failing + stage: string; + stageChangedAt: string | null; + firstCrawlAt: string | null; profileId: number | null; profileKey: string | null; - sandboxAttempts?: number; - nextRetryAt?: string | null; + consecutiveSuccesses: number; + consecutiveFailures: number; lastCrawlAt: string | null; lastSuccessAt: string | null; lastFailureAt: string | null; - failedAt?: string | null; - consecutiveFailures?: number; productCount: number; } @@ -346,13 +347,15 @@ export function StoreOrchestratorPanel({ }; const renderControlTab = () => { - const getStatusColor = (status: string) => { - switch (status) { + const getStageColor = (stage: string) => { + switch (stage) { + case 'discovered': return 'text-gray-600 bg-gray-100'; + case 'validated': return 'text-blue-600 bg-blue-100'; + case 'promoted': return 'text-yellow-600 bg-yellow-100'; + case 'sandbox': return 'text-orange-600 bg-orange-100'; + case 'hydrating': return 'text-purple-600 bg-purple-100'; case 'production': return 'text-green-600 bg-green-100'; - case 'sandbox': return 'text-yellow-600 bg-yellow-100'; - case 'needs_manual': return 'text-orange-600 bg-orange-100'; - case 'disabled': return 'text-gray-600 bg-gray-100'; - case 'legacy': return 'text-blue-600 bg-blue-100'; + case 'failing': return 'text-red-600 bg-red-100'; default: return 'text-gray-600 bg-gray-100'; } }; @@ -368,8 +371,8 @@ export function StoreOrchestratorPanel({

Status

- - {store.status?.toUpperCase() || 'UNKNOWN'} + + {store.stage?.toUpperCase() || 'UNKNOWN'}
@@ -381,8 +384,8 @@ export function StoreOrchestratorPanel({

{store.provider_display || store.provider || '-'}

-

Sandbox Attempts

-

{store.sandboxAttempts || 0}

+

Consecutive Successes

+

{store.consecutiveSuccesses || 0}

Last Success

@@ -430,9 +433,9 @@ export function StoreOrchestratorPanel({
- {store.status !== 'production' && ( + {store.stage !== 'production' && (

Production crawl only available when store is in production status.

@@ -526,8 +529,8 @@ export function StoreOrchestratorPanel({

Store ID: {store.id}

Platform ID: {store.platformDispensaryId || 'Not set'}

Profile ID: {store.profileId || 'Not set'}

- {store.nextRetryAt && ( -

Next Retry: {formatTimestamp(store.nextRetryAt)}

+ {store.stageChangedAt && ( +

Stage Changed: {formatTimestamp(store.stageChangedAt)}

)}
diff --git a/cannaiq/src/lib/api.ts b/cannaiq/src/lib/api.ts index 1c88496c..a19afe58 100755 --- a/cannaiq/src/lib/api.ts +++ b/cannaiq/src/lib/api.ts @@ -241,6 +241,14 @@ class ApiClient { }); } + // AI Settings + async testAIConnection(provider: string, apiKey: string) { + return this.request<{ success: boolean; error?: string; model?: string }>('/api/settings/test-ai', { + method: 'POST', + body: JSON.stringify({ provider, apiKey }), + }); + } + // Proxies async getProxies() { return this.request<{ proxies: any[] }>('/api/proxies'); @@ -464,6 +472,36 @@ class ApiClient { return this.request<{ specials: any[] }>(`/api/dispensaries/${slug}/specials`); } + // Store Products/Brands by ID (uses dutchie_products table with actual data) + async getStoreProductsById(dispensaryId: number) { + return this.request<{ products: any[] }>(`/api/stores/${dispensaryId}/products`); + } + + async getStoreBrandsById(dispensaryId: number) { + return this.request<{ brands: string[]; details: Array<{ name: string; product_count: number }> }>(`/api/stores/${dispensaryId}/brands`); + } + + async getMarketStoreProductsById(dispensaryId: number, params?: { + stockStatus?: string; + type?: string; + subcategory?: string; + brandName?: string; + search?: string; + limit?: number; + offset?: number; + }) { + const searchParams = new URLSearchParams(); + if (params?.stockStatus) searchParams.append('stockStatus', params.stockStatus); + if (params?.type) searchParams.append('type', params.type); + if (params?.subcategory) searchParams.append('subcategory', params.subcategory); + if (params?.brandName) searchParams.append('brandName', params.brandName); + if (params?.search) searchParams.append('search', params.search); + if (params?.limit) searchParams.append('limit', params.limit.toString()); + if (params?.offset) searchParams.append('offset', params.offset.toString()); + const qs = searchParams.toString(); + return this.request<{ products: any[]; total: number }>(`/api/markets/stores/${dispensaryId}/products${qs ? `?${qs}` : ''}`); + } + // API Permissions async getApiPermissions() { return this.request<{ permissions: any[] }>('/api/api-permissions'); @@ -1200,6 +1238,17 @@ class ApiClient { total_stores: number; market_sentiment: string; market_direction: string; + // 7-stage pipeline counts + stages: { + discovered: number; + validated: number; + promoted: number; + sandbox: number; + hydrating: number; + production: number; + failing: number; + }; + // Legacy compatibility healthy_count: number; sandbox_count: number; needs_manual_count: number; @@ -1230,17 +1279,22 @@ class ApiClient { city: string; state: string; provider: string; + provider_raw?: string | null; + provider_display?: string; platformDispensaryId: string | null; - status: string; + crawlEnabled?: boolean; + // 6-stage pipeline + stage: string; + stageChangedAt: string | null; + firstCrawlAt: string | null; + lastSuccessfulCrawlAt: string | null; + consecutiveSuccesses: number; + consecutiveFailures: number; profileId: number | null; profileKey: string | null; - sandboxAttempts: number; - nextRetryAt: string | null; lastCrawlAt: string | null; lastSuccessAt: string | null; lastFailureAt: string | null; - failedAt: string | null; - consecutiveFailures: number; productCount: number; }>; total: number; @@ -1998,6 +2052,28 @@ class ApiClient { }>(`/api/admin/debug/products/${productId}/raw-payload`); } + async getProductStats(productId: number) { + return this.request<{ + product_id: number; + product_name: string; + first_seen: string | null; + last_seen: string | null; + total_snapshots: number; + inventory: { + current: number; + daily: { change: number; start: number; end: number } | null; + weekly: { change: number; start: number; end: number; percent_change: string | null } | null; + monthly: { change: number; start: number; end: number; percent_change: string | null } | null; + }; + price: { + current: number; + weekly: { change: number; start: number; end: number; percent_change: string | null } | null; + monthly: { change: number; start: number; end: number; percent_change: string | null } | null; + }; + history: Array<{ date: string; avg_quantity: number; avg_price: number; snapshots: number }>; + }>(`/api/products/${productId}/stats`); + } + async getStoreSnapshots(dispensaryId: number, params?: { limit?: number; offset?: number }) { const searchParams = new URLSearchParams(); if (params?.limit) searchParams.append('limit', params.limit.toString()); @@ -2566,6 +2642,112 @@ class ApiClient { `/api/seo/public/content?slug=${encodeURIComponent(slug)}` ); } + + // SEO Settings + async getSeoSettings() { + return this.request<{ settings: Record }>('/api/seo/settings'); + } + + async saveSeoSetting(key: string, value: any) { + return this.request<{ success: boolean }>('/api/seo/settings', { + method: 'POST', + body: JSON.stringify({ key, value }) + }); + } + + async saveSeoSettingsBulk(settings: Record) { + return this.request<{ success: boolean; count: number }>('/api/seo/settings/bulk', { + method: 'POST', + body: JSON.stringify({ settings }) + }); + } + + async resetSeoSettings() { + return this.request<{ success: boolean; settings: Record }>('/api/seo/settings/reset', { + method: 'POST' + }); + } + + async getSeoSettingsDefaults() { + return this.request<{ settings: Record }>('/api/seo/settings/defaults'); + } + + async previewSeoPrompt(template: string, variables?: Record) { + return this.request<{ preview: string; variables: Record }>('/api/seo/settings/preview', { + method: 'POST', + body: JSON.stringify({ template, variables }) + }); + } + + // SEO Template Library + async getSeoTemplates() { + return this.request<{ + templates: Record; + }>('/api/seo/templates'); + } + + async previewSeoTemplate(pageType: string, customTemplate?: string) { + return this.request<{ + preview: string; + template: string; + mockData: Record; + availableVariables: string[]; + }>('/api/seo/templates/preview', { + method: 'POST', + body: JSON.stringify({ pageType, customTemplate }) + }); + } + + async validateSeoTemplate(template: string) { + return this.request<{ + valid: boolean; + variables: string[]; + unknownVariables: string[]; + errors: string[]; + }>('/api/seo/templates/validate', { + method: 'POST', + body: JSON.stringify({ template }) + }); + } + + async generateFromTemplate(pageType: string, data: Record) { + return this.request<{ + content: string; + templateUsed: string; + variablesApplied: string[]; + }>('/api/seo/templates/generate', { + method: 'POST', + body: JSON.stringify({ pageType, data }) + }); + } + + async regenerateFromTemplate( + pageType: string, + originalContent: string, + newData?: Record, + improvementAreas?: string[] + ) { + return this.request<{ + content: string; + regenerationPrompt: string; + }>('/api/seo/templates/regenerate', { + method: 'POST', + body: JSON.stringify({ pageType, originalContent, newData, improvementAreas }) + }); + } + + async getTemplateVariables(pageType: string) { + return this.request<{ + pageType: string; + variables: string[]; + sampleValues: Record; + }>(`/api/seo/templates/variables/${encodeURIComponent(pageType)}`); + } } -export const api = new ApiClient(API_URL); +export const api = new ApiClient(API_URL); diff --git a/cannaiq/src/pages/AISettings.tsx b/cannaiq/src/pages/AISettings.tsx new file mode 100644 index 00000000..7739d590 --- /dev/null +++ b/cannaiq/src/pages/AISettings.tsx @@ -0,0 +1,350 @@ +import { useEffect, useState } from 'react'; +import { Layout } from '../components/Layout'; +import { api } from '../lib/api'; +import { Toast } from '../components/Toast'; +import { Bot, Key, Cpu, Check, X, Eye, EyeOff, Plus, Trash2 } from 'lucide-react'; + +interface AIProvider { + id: string; + name: string; + description: string; + models: string[]; + icon: string; + settingKey: string; +} + +const AI_PROVIDERS: AIProvider[] = [ + { + id: 'anthropic', + name: 'Anthropic', + description: 'Claude AI models for intelligent content generation', + models: [ + 'claude-opus-4-20250514', + 'claude-sonnet-4-20250514', + 'claude-3-7-sonnet-20250219', + 'claude-3-5-sonnet-20241022', + 'claude-3-5-haiku-20241022', + 'claude-3-opus-20240229', + 'claude-3-sonnet-20240229', + 'claude-3-haiku-20240307' + ], + icon: 'anthropic', + settingKey: 'anthropic_api_key' + }, + { + id: 'openai', + name: 'OpenAI / ChatGPT', + description: 'GPT models for text generation and analysis', + models: [ + // GPT-5 Series (Flagship) + 'gpt-5.1', + 'gpt-5.1-mini', + 'gpt-5', + 'gpt-5-mini', + 'gpt-5-nano', + // Reasoning Models (o-Series) + 'o4-mini', + 'o3', + 'o3-mini', + 'o3-deep-research', + 'o4-mini-deep-research', + // GPT-4.1 Family (Coding optimized) + 'gpt-4.1', + 'gpt-4.1-mini', + 'gpt-4.1-nano', + // Legacy Models + 'gpt-4o', + 'gpt-4o-mini', + 'gpt-4-turbo', + 'gpt-3.5-turbo' + ], + icon: 'openai', + settingKey: 'openai_api_key' + } +]; + +interface ProviderConfig { + apiKey: string; + selectedModel: string; + isEnabled: boolean; +} + +export function AISettings() { + const [configs, setConfigs] = useState>({}); + const [loading, setLoading] = useState(true); + const [saving, setSaving] = useState(null); + const [showKeys, setShowKeys] = useState>({}); + const [notification, setNotification] = useState<{ message: string; type: 'success' | 'error' | 'info' } | null>(null); + const [testingProvider, setTestingProvider] = useState(null); + + useEffect(() => { + loadSettings(); + }, []); + + const loadSettings = async () => { + setLoading(true); + try { + const data = await api.getSettings(); + const settings = data.settings || []; + + const newConfigs: Record = {}; + + for (const provider of AI_PROVIDERS) { + const apiKeySetting = settings.find((s: any) => s.key === provider.settingKey); + const modelSetting = settings.find((s: any) => s.key === `${provider.id}_model`); + const enabledSetting = settings.find((s: any) => s.key === `${provider.id}_enabled`); + + newConfigs[provider.id] = { + apiKey: apiKeySetting?.value || '', + selectedModel: modelSetting?.value || provider.models[0], + isEnabled: enabledSetting?.value === 'true' || !!apiKeySetting?.value + }; + } + + setConfigs(newConfigs); + } catch (error) { + console.error('Failed to load settings:', error); + setNotification({ message: 'Failed to load AI settings', type: 'error' }); + } finally { + setLoading(false); + } + }; + + const handleSaveProvider = async (providerId: string) => { + const config = configs[providerId]; + if (!config) return; + + setSaving(providerId); + try { + const provider = AI_PROVIDERS.find(p => p.id === providerId); + if (!provider) return; + + await api.updateSettings([ + { key: provider.settingKey, value: config.apiKey }, + { key: `${providerId}_model`, value: config.selectedModel }, + { key: `${providerId}_enabled`, value: config.isEnabled ? 'true' : 'false' } + ]); + + setNotification({ message: `${provider.name} settings saved successfully`, type: 'success' }); + } catch (error: any) { + setNotification({ message: `Failed to save ${providerId} settings: ${error.message}`, type: 'error' }); + } finally { + setSaving(null); + } + }; + + const handleTestConnection = async (providerId: string) => { + const config = configs[providerId]; + if (!config?.apiKey) { + setNotification({ message: 'Please enter an API key first', type: 'error' }); + return; + } + + setTestingProvider(providerId); + try { + const response = await api.testAIConnection(providerId, config.apiKey); + if (response.success) { + setNotification({ message: `${providerId} connection successful!`, type: 'success' }); + } else { + setNotification({ message: `Connection failed: ${response.error}`, type: 'error' }); + } + } catch (error: any) { + setNotification({ message: `Test failed: ${error.message}`, type: 'error' }); + } finally { + setTestingProvider(null); + } + }; + + const updateConfig = (providerId: string, updates: Partial) => { + setConfigs(prev => ({ + ...prev, + [providerId]: { ...prev[providerId], ...updates } + })); + }; + + const toggleShowKey = (providerId: string) => { + setShowKeys(prev => ({ ...prev, [providerId]: !prev[providerId] })); + }; + + const maskApiKey = (key: string) => { + if (!key) return ''; + if (key.length <= 8) return '*'.repeat(key.length); + return key.substring(0, 4) + '*'.repeat(key.length - 8) + key.substring(key.length - 4); + }; + + if (loading) { + return ( + +
+
Loading AI settings...
+
+
+ ); + } + + return ( + +
+ {notification && ( + setNotification(null)} + /> + )} + +
+

+ + AI Settings +

+

+ Configure AI providers for content generation, SEO descriptions, and intelligent features. +

+
+ +
+ {AI_PROVIDERS.map(provider => { + const config = configs[provider.id] || { apiKey: '', selectedModel: provider.models[0], isEnabled: false }; + const isConfigured = !!config.apiKey; + + return ( +
+ {/* Header */} +
+
+
+ {provider.id === 'anthropic' ? ( + A + ) : ( + O + )} +
+
+

{provider.name}

+

{provider.description}

+
+
+
+ {isConfigured ? ( + + + Configured + + ) : ( + + + Not configured + + )} +
+
+ + {/* Body */} +
+ {/* API Key */} +
+ +
+ updateConfig(provider.id, { apiKey: e.target.value })} + placeholder={`Enter your ${provider.name} API key`} + className="w-full px-4 py-2.5 pr-24 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-emerald-500 focus:border-transparent font-mono text-sm" + /> + +
+

+ {provider.id === 'anthropic' + ? 'Get your API key from console.anthropic.com' + : 'Get your API key from platform.openai.com'} +

+
+ + {/* Model Selection */} +
+ + +
+ + {/* Enable/Disable */} +
+
+ Enable {provider.name} +

Use this provider for AI-powered features

+
+ +
+
+ + {/* Footer */} +
+ + +
+
+ ); + })} +
+ + {/* Usage Info */} +
+

How AI is used in CannaIQ

+
    +
  • - Generate SEO-optimized product descriptions
  • +
  • - Create category and brand summaries
  • +
  • - Analyze pricing trends and market data
  • +
  • - Power intelligent search and recommendations
  • +
+
+
+
+ ); +} diff --git a/cannaiq/src/pages/ApiPermissions.tsx b/cannaiq/src/pages/ApiPermissions.tsx index b4c794db..e58925d0 100644 --- a/cannaiq/src/pages/ApiPermissions.tsx +++ b/cannaiq/src/pages/ApiPermissions.tsx @@ -1,7 +1,8 @@ -import { useEffect, useState } from 'react'; +import { useEffect, useState, useRef } from 'react'; import { Layout } from '../components/Layout'; import { api } from '../lib/api'; import { Toast } from '../components/Toast'; +import { Key, Plus, Copy, Check, X, Trash2, Power, PowerOff, Store, Globe, Shield, Clock, Eye, EyeOff, Search, ChevronDown } from 'lucide-react'; interface ApiPermission { id: number; @@ -14,6 +15,7 @@ interface ApiPermission { last_used_at: string | null; store_id: number | null; store_name: string | null; + request_count?: number; } interface Dispensary { @@ -21,11 +23,138 @@ interface Dispensary { name: string; } +// Searchable Dropdown Component +function SearchableSelect({ + options, + value, + onChange, + placeholder = "Select...", + required = false +}: { + options: Dispensary[]; + value: string; + onChange: (value: string) => void; + placeholder?: string; + required?: boolean; +}) { + const [isOpen, setIsOpen] = useState(false); + const [search, setSearch] = useState(''); + const dropdownRef = useRef(null); + const inputRef = useRef(null); + + const selectedOption = options.find(o => o.id.toString() === value); + + const filteredOptions = options.filter(option => + option.name.toLowerCase().includes(search.toLowerCase()) + ); + + // Close dropdown when clicking outside + useEffect(() => { + const handleClickOutside = (event: MouseEvent) => { + if (dropdownRef.current && !dropdownRef.current.contains(event.target as Node)) { + setIsOpen(false); + setSearch(''); + } + }; + document.addEventListener('mousedown', handleClickOutside); + return () => document.removeEventListener('mousedown', handleClickOutside); + }, []); + + // Focus input when dropdown opens + useEffect(() => { + if (isOpen && inputRef.current) { + inputRef.current.focus(); + } + }, [isOpen]); + + return ( +
+ + + {/* Hidden input for form validation */} + {required && ( + {}} + required + className="absolute opacity-0 w-0 h-0" + tabIndex={-1} + /> + )} + + {isOpen && ( +
+ {/* Search Input */} +
+
+ + setSearch(e.target.value)} + placeholder="Type to search dispensaries..." + className="w-full pl-9 pr-4 py-2 border border-gray-200 rounded-md text-sm focus:outline-none focus:ring-2 focus:ring-emerald-500 focus:border-transparent" + /> +
+
+ + {/* Options List */} +
+ {filteredOptions.length === 0 ? ( +
+ No dispensaries found +
+ ) : ( + filteredOptions.slice(0, 100).map((option) => ( + + )) + )} + {filteredOptions.length > 100 && ( +
+ Showing first 100 results. Type to narrow down. +
+ )} +
+
+ )} +
+ ); +} + export function ApiPermissions() { const [permissions, setPermissions] = useState([]); const [dispensaries, setDispensaries] = useState([]); const [loading, setLoading] = useState(true); const [showAddForm, setShowAddForm] = useState(false); + const [showKeys, setShowKeys] = useState>({}); + const [copiedId, setCopiedId] = useState(null); const [newPermission, setNewPermission] = useState({ user_name: '', store_id: '', @@ -42,7 +171,7 @@ export function ApiPermissions() { const loadDispensaries = async () => { try { const data = await api.getApiPermissionDispensaries(); - setDispensaries(data.dispensaries); + setDispensaries(data.dispensaries || []); } catch (error: any) { console.error('Failed to load dispensaries:', error); } @@ -52,7 +181,7 @@ export function ApiPermissions() { setLoading(true); try { const data = await api.getApiPermissions(); - setPermissions(data.permissions); + setPermissions(data.permissions || []); } catch (error: any) { setNotification({ message: 'Failed to load API permissions: ' + error.message, type: 'error' }); } finally { @@ -69,7 +198,7 @@ export function ApiPermissions() { } if (!newPermission.store_id) { - setNotification({ message: 'Store is required', type: 'error' }); + setNotification({ message: 'Please select a dispensary', type: 'error' }); return; } @@ -78,7 +207,7 @@ export function ApiPermissions() { ...newPermission, store_id: parseInt(newPermission.store_id), }); - setNotification({ message: result.message, type: 'success' }); + setNotification({ message: 'API key created successfully! Copy it now - it won\'t be shown again in full.', type: 'success' }); setNewPermission({ user_name: '', store_id: '', allowed_ips: '', allowed_domains: '' }); setShowAddForm(false); loadPermissions(); @@ -90,7 +219,7 @@ export function ApiPermissions() { const handleToggle = async (id: number) => { try { await api.toggleApiPermission(id); - setNotification({ message: 'Permission status updated', type: 'success' }); + setNotification({ message: 'API key status updated', type: 'success' }); loadPermissions(); } catch (error: any) { setNotification({ message: 'Failed to toggle permission: ' + error.message, type: 'error' }); @@ -98,35 +227,50 @@ export function ApiPermissions() { }; const handleDelete = async (id: number) => { - if (!confirm('Are you sure you want to delete this API permission?')) { + if (!confirm('Are you sure you want to delete this API key? This action cannot be undone.')) { return; } try { await api.deleteApiPermission(id); - setNotification({ message: 'Permission deleted successfully', type: 'success' }); + setNotification({ message: 'API key deleted successfully', type: 'success' }); loadPermissions(); } catch (error: any) { setNotification({ message: 'Failed to delete permission: ' + error.message, type: 'error' }); } }; - const copyToClipboard = (text: string) => { - navigator.clipboard.writeText(text); - setNotification({ message: 'API key copied to clipboard!', type: 'success' }); + const copyToClipboard = async (text: string, id: number) => { + await navigator.clipboard.writeText(text); + setCopiedId(id); + setTimeout(() => setCopiedId(null), 2000); }; const formatDate = (dateString: string | null) => { if (!dateString) return 'Never'; const date = new Date(dateString); - return date.toLocaleDateString() + ' ' + date.toLocaleTimeString(); + const now = new Date(); + const diffMs = now.getTime() - date.getTime(); + const diffMins = Math.floor(diffMs / 60000); + const diffHours = Math.floor(diffMs / 3600000); + const diffDays = Math.floor(diffMs / 86400000); + + if (diffMins < 1) return 'Just now'; + if (diffMins < 60) return `${diffMins}m ago`; + if (diffHours < 24) return `${diffHours}h ago`; + if (diffDays < 7) return `${diffDays}d ago`; + return date.toLocaleDateString(); + }; + + const toggleShowKey = (id: number) => { + setShowKeys(prev => ({ ...prev, [id]: !prev[id] })); }; if (loading) { return (
-
Loading API permissions...
+
Loading API keys...
); @@ -134,7 +278,7 @@ export function ApiPermissions() { return ( -
+
{notification && ( )} -
-

API Permissions

+ {/* Header */} +
+
+

+ + WordPress API Keys +

+

+ Generate and manage API keys for WordPress plugin integrations +

+
-
-

How it works:

-

- Users with valid permissions can access your API without entering tokens. - Access is automatically validated based on their IP address and/or domain name. -

+ {/* WordPress Plugin Instructions */} +
+

+ + WordPress Plugin Setup +

+
+

1. Install the CannaIQ Menus plugin on your WordPress site

+

2. Generate an API key below for your dispensary

+

3. In WordPress, go to Settings β†’ CannaIQ Menus

+

4. Paste your API key and save

+
+
+

+ API Endpoint: https://api.cannaiq.co/api/v1/products +

+
+ {/* Add Form */} {showAddForm && ( -
-

Add New API User

-
-
- - setNewPermission({ ...newPermission, user_name: e.target.value })} - className="w-full px-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-2 focus:ring-blue-500" - placeholder="e.g., My Website" - required - /> -

A friendly name to identify this API user

+
+

+ + Generate New API Key +

+ +
+
+ + setNewPermission({ ...newPermission, user_name: e.target.value })} + className="w-full px-4 py-2.5 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-emerald-500 focus:border-transparent" + placeholder="e.g., Main Website, Dev Site" + required + /> +

A name to identify this API key

+
+ +
+ + setNewPermission({ ...newPermission, store_id: value })} + placeholder="Search for a dispensary..." + required + /> +

This key will only access this dispensary's data

+
-
- -