feat: SEO template library, discovery pipeline, and orchestrator enhancements

## SEO Template Library
- Add complete template library with 7 page types (state, city, category, brand, product, search, regeneration)
- Add Template Library tab in SEO Orchestrator with accordion-based editors
- Add template preview, validation, and variable injection engine
- Add API endpoints: /api/seo/templates, preview, validate, generate, regenerate

## Discovery Pipeline
- Add promotion.ts for discovery location validation and promotion
- Add discover-all-states.ts script for multi-state discovery
- Add promotion log migration (067)
- Enhance discovery routes and types

## Orchestrator & Admin
- Add crawl_enabled filter to stores page
- Add API permissions page
- Add job queue management
- Add price analytics routes
- Add markets and intelligence routes
- Enhance dashboard and worker monitoring

## Infrastructure
- Add migrations for worker definitions, SEO settings, field alignment
- Add canonical pipeline for scraper v2
- Update hydration and sync orchestrator
- Enhance multi-state query service

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-09 00:05:34 -07:00
parent 9711d594db
commit 2f483b3084
83 changed files with 16700 additions and 1277 deletions

View File

@@ -0,0 +1,119 @@
-- Migration 051: Worker Definitions
-- Creates a dedicated workers table for named workers with roles and assignments
-- Workers table - defines named workers with roles
CREATE TABLE IF NOT EXISTS workers (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL UNIQUE,
role VARCHAR(100) NOT NULL,
description TEXT,
enabled BOOLEAN DEFAULT TRUE,
-- Schedule configuration (for dedicated crawl workers)
schedule_type VARCHAR(50) DEFAULT 'interval', -- 'interval', 'cron', 'manual'
interval_minutes INTEGER DEFAULT 240,
cron_expression VARCHAR(100), -- e.g., '0 */4 * * *'
jitter_minutes INTEGER DEFAULT 30,
-- Assignment scope
assignment_type VARCHAR(50) DEFAULT 'all', -- 'all', 'state', 'dispensary', 'chain'
assigned_state_codes TEXT[], -- e.g., ['AZ', 'CA']
assigned_dispensary_ids INTEGER[],
assigned_chain_ids INTEGER[],
-- Job configuration
job_type VARCHAR(50) NOT NULL DEFAULT 'dutchie_product_crawl',
job_config JSONB DEFAULT '{}',
priority INTEGER DEFAULT 0,
max_concurrent INTEGER DEFAULT 1,
-- Status tracking
status VARCHAR(50) DEFAULT 'idle', -- 'idle', 'running', 'paused', 'error'
last_run_at TIMESTAMPTZ,
last_status VARCHAR(50),
last_error TEXT,
last_duration_ms INTEGER,
next_run_at TIMESTAMPTZ,
current_job_id INTEGER,
-- Metrics
total_runs INTEGER DEFAULT 0,
successful_runs INTEGER DEFAULT 0,
failed_runs INTEGER DEFAULT 0,
avg_duration_ms INTEGER,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Worker run history
CREATE TABLE IF NOT EXISTS worker_runs (
id SERIAL PRIMARY KEY,
worker_id INTEGER NOT NULL REFERENCES workers(id) ON DELETE CASCADE,
started_at TIMESTAMPTZ DEFAULT NOW(),
completed_at TIMESTAMPTZ,
status VARCHAR(50) DEFAULT 'running', -- 'running', 'success', 'error', 'cancelled'
duration_ms INTEGER,
-- What was processed
jobs_created INTEGER DEFAULT 0,
jobs_completed INTEGER DEFAULT 0,
jobs_failed INTEGER DEFAULT 0,
dispensaries_crawled INTEGER DEFAULT 0,
products_found INTEGER DEFAULT 0,
error_message TEXT,
metadata JSONB DEFAULT '{}',
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Index for efficient lookups
CREATE INDEX IF NOT EXISTS idx_workers_enabled ON workers(enabled) WHERE enabled = TRUE;
CREATE INDEX IF NOT EXISTS idx_workers_next_run ON workers(next_run_at) WHERE enabled = TRUE;
CREATE INDEX IF NOT EXISTS idx_workers_status ON workers(status);
CREATE INDEX IF NOT EXISTS idx_worker_runs_worker_id ON worker_runs(worker_id);
CREATE INDEX IF NOT EXISTS idx_worker_runs_started_at ON worker_runs(started_at DESC);
-- Add worker_id to dispensary_crawl_jobs if not exists
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'dispensary_crawl_jobs' AND column_name = 'assigned_worker_id'
) THEN
ALTER TABLE dispensary_crawl_jobs ADD COLUMN assigned_worker_id INTEGER REFERENCES workers(id);
END IF;
END $$;
-- Migrate existing job_schedules workers to new workers table
INSERT INTO workers (name, role, description, enabled, interval_minutes, jitter_minutes, job_type, job_config, last_run_at, last_status, last_error, last_duration_ms, next_run_at)
SELECT
worker_name,
worker_role,
description,
enabled,
base_interval_minutes,
jitter_minutes,
job_name,
job_config,
last_run_at,
last_status,
last_error_message,
last_duration_ms,
next_run_at
FROM job_schedules
WHERE worker_name IS NOT NULL
ON CONFLICT (name) DO UPDATE SET
updated_at = NOW();
-- Available worker roles (reference)
COMMENT ON TABLE workers IS 'Named workers with specific roles and assignments. Roles include:
- product_sync: Crawls products from dispensary menus
- store_discovery: Discovers new dispensary locations
- entry_point_finder: Detects menu providers and resolves platform IDs
- analytics_refresh: Refreshes materialized views and analytics
- price_monitor: Monitors price changes and triggers alerts
- inventory_sync: Syncs inventory levels
- image_processor: Downloads and processes product images
- data_validator: Validates data integrity';

View File

@@ -0,0 +1,49 @@
-- Migration 052: SEO Settings Table
-- Key/value store for SEO Orchestrator configuration
CREATE TABLE IF NOT EXISTS seo_settings (
id SERIAL PRIMARY KEY,
key TEXT UNIQUE NOT NULL,
value JSONB NOT NULL,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
-- Create index on key for fast lookups
CREATE INDEX IF NOT EXISTS idx_seo_settings_key ON seo_settings(key);
-- Seed with default settings
INSERT INTO seo_settings (key, value) VALUES
-- Section 1: Global Content Generation Settings
('primary_prompt_template', '"You are a cannabis industry content expert. Generate SEO-optimized content for {{page_type}} pages about {{subject}}. Focus on: {{focus_areas}}. Maintain a {{tone}} tone and keep content {{length}}."'),
('regeneration_prompt_template', '"Regenerate the following SEO content with fresh perspectives. Original topic: {{subject}}. Improve upon: {{improvement_areas}}. Maintain compliance with cannabis industry standards."'),
('default_content_length', '"medium"'),
('tone_voice', '"informational"'),
-- Section 2: Automatic Refresh Rules
('auto_refresh_interval', '"weekly"'),
('trigger_pct_product_change', 'true'),
('trigger_pct_brand_change', 'true'),
('trigger_new_stores', 'true'),
('trigger_market_shift', 'false'),
('webhook_url', '""'),
('notify_on_trigger', 'false'),
-- Section 3: Page-Level Defaults
('default_title_template', '"{{state_name}} Dispensaries | Find Cannabis Near You | CannaiQ"'),
('default_meta_description_template', '"Discover the best dispensaries in {{state_name}}. Browse {{dispensary_count}}+ licensed retailers, compare prices, and find cannabis products near you."'),
('default_slug_template', '"dispensaries-{{state_code_lower}}"'),
('default_og_image_template', '"/images/seo/og-{{state_code_lower}}.jpg"'),
('enable_ai_images', 'false'),
-- Section 4: Crawl / Dataset Configuration
('primary_data_provider', '"cannaiq"'),
('fallback_data_provider', '"dutchie"'),
('min_data_freshness_hours', '24'),
('stale_data_behavior', '"allow_with_warning"')
ON CONFLICT (key) DO NOTHING;
-- Record migration
INSERT INTO schema_migrations (version, name, applied_at)
VALUES ('052', 'seo_settings', NOW())
ON CONFLICT (version) DO NOTHING;

View File

@@ -0,0 +1,140 @@
-- Migration 066: Align dispensaries and discovery_locations tables with Dutchie field names
-- Uses snake_case convention (Postgres standard) mapped from Dutchie's camelCase
--
-- Changes:
-- 1. dispensaries: rename address→address1, zip→zipcode, remove company_name
-- 2. dispensaries: add missing Dutchie fields
-- 3. dutchie_discovery_locations: add missing Dutchie fields
-- ============================================================================
-- DISPENSARIES TABLE
-- ============================================================================
-- Rename address to address1 (matches Dutchie's address1)
ALTER TABLE dispensaries RENAME COLUMN address TO address1;
-- Rename zip to zipcode (matches Dutchie's zip, but we use zipcode for clarity)
ALTER TABLE dispensaries RENAME COLUMN zip TO zipcode;
-- Drop company_name (redundant with name)
ALTER TABLE dispensaries DROP COLUMN IF EXISTS company_name;
-- Add address2
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS address2 VARCHAR(255);
-- Add country
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS country VARCHAR(100) DEFAULT 'United States';
-- Add timezone
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
-- Add email
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS email VARCHAR(255);
-- Add description
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS description TEXT;
-- Add logo_image (Dutchie: logoImage)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS logo_image TEXT;
-- Add banner_image (Dutchie: bannerImage)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS banner_image TEXT;
-- Add offer_pickup (Dutchie: offerPickup)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_pickup BOOLEAN DEFAULT TRUE;
-- Add offer_delivery (Dutchie: offerDelivery)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_delivery BOOLEAN DEFAULT FALSE;
-- Add offer_curbside_pickup (Dutchie: offerCurbsidePickup)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_curbside_pickup BOOLEAN DEFAULT FALSE;
-- Add is_medical (Dutchie: isMedical)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS is_medical BOOLEAN DEFAULT FALSE;
-- Add is_recreational (Dutchie: isRecreational)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS is_recreational BOOLEAN DEFAULT FALSE;
-- Add chain_slug (Dutchie: chain)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS chain_slug VARCHAR(255);
-- Add enterprise_id (Dutchie: retailer.enterpriseId)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS enterprise_id VARCHAR(100);
-- Add status (Dutchie: status - open/closed)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS status VARCHAR(50);
-- Add c_name (Dutchie: cName - the URL slug used in embedded menus)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS c_name VARCHAR(255);
-- ============================================================================
-- DUTCHIE_DISCOVERY_LOCATIONS TABLE
-- ============================================================================
-- Add phone
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS phone VARCHAR(50);
-- Add website (Dutchie: embedBackUrl)
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS website TEXT;
-- Add email
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS email VARCHAR(255);
-- Add description
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS description TEXT;
-- Add logo_image
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS logo_image TEXT;
-- Add banner_image
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS banner_image TEXT;
-- Add chain_slug
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS chain_slug VARCHAR(255);
-- Add enterprise_id
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS enterprise_id VARCHAR(100);
-- Add c_name
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS c_name VARCHAR(255);
-- Add country
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS country VARCHAR(100) DEFAULT 'United States';
-- Add store status
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS store_status VARCHAR(50);
-- ============================================================================
-- INDEXES
-- ============================================================================
-- Index for chain lookups
CREATE INDEX IF NOT EXISTS idx_dispensaries_chain_slug ON dispensaries(chain_slug) WHERE chain_slug IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_discovery_locations_chain_slug ON dutchie_discovery_locations(chain_slug) WHERE chain_slug IS NOT NULL;
-- Index for enterprise lookups (for multi-location chains)
CREATE INDEX IF NOT EXISTS idx_dispensaries_enterprise_id ON dispensaries(enterprise_id) WHERE enterprise_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_discovery_locations_enterprise_id ON dutchie_discovery_locations(enterprise_id) WHERE enterprise_id IS NOT NULL;
-- Index for c_name lookups
CREATE INDEX IF NOT EXISTS idx_dispensaries_c_name ON dispensaries(c_name) WHERE c_name IS NOT NULL;
-- ============================================================================
-- COMMENTS
-- ============================================================================
COMMENT ON COLUMN dispensaries.address1 IS 'Street address line 1 (Dutchie: address1)';
COMMENT ON COLUMN dispensaries.address2 IS 'Street address line 2 (Dutchie: address2)';
COMMENT ON COLUMN dispensaries.zipcode IS 'ZIP/postal code (Dutchie: zip)';
COMMENT ON COLUMN dispensaries.c_name IS 'Dutchie URL slug for embedded menus (Dutchie: cName)';
COMMENT ON COLUMN dispensaries.chain_slug IS 'Chain identifier slug (Dutchie: chain)';
COMMENT ON COLUMN dispensaries.enterprise_id IS 'Parent enterprise UUID (Dutchie: retailer.enterpriseId)';
COMMENT ON COLUMN dispensaries.logo_image IS 'Logo image URL (Dutchie: logoImage)';
COMMENT ON COLUMN dispensaries.banner_image IS 'Banner image URL (Dutchie: bannerImage)';
COMMENT ON COLUMN dispensaries.offer_pickup IS 'Offers in-store pickup (Dutchie: offerPickup)';
COMMENT ON COLUMN dispensaries.offer_delivery IS 'Offers delivery (Dutchie: offerDelivery)';
COMMENT ON COLUMN dispensaries.offer_curbside_pickup IS 'Offers curbside pickup (Dutchie: offerCurbsidePickup)';
COMMENT ON COLUMN dispensaries.is_medical IS 'Licensed for medical sales (Dutchie: isMedical)';
COMMENT ON COLUMN dispensaries.is_recreational IS 'Licensed for recreational sales (Dutchie: isRecreational)';
SELECT 'Migration 066 completed: Dutchie field alignment' as status;

View File

@@ -0,0 +1,24 @@
-- Promotion log table for tracking discovery → dispensary promotions
-- Tracks validation and promotion actions for audit/review
CREATE TABLE IF NOT EXISTS dutchie_promotion_log (
id SERIAL PRIMARY KEY,
discovery_id INTEGER REFERENCES dutchie_discovery_locations(id) ON DELETE SET NULL,
dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE SET NULL,
action VARCHAR(50) NOT NULL, -- 'validated', 'rejected', 'promoted_create', 'promoted_update', 'skipped'
state_code VARCHAR(10),
store_name VARCHAR(255),
validation_errors TEXT[], -- Array of error messages if rejected
field_changes JSONB, -- Before/after snapshot of changed fields
triggered_by VARCHAR(100) DEFAULT 'auto', -- 'auto', 'manual', 'api'
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Indexes for efficient querying
CREATE INDEX IF NOT EXISTS idx_promotion_log_discovery_id ON dutchie_promotion_log(discovery_id);
CREATE INDEX IF NOT EXISTS idx_promotion_log_dispensary_id ON dutchie_promotion_log(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_promotion_log_action ON dutchie_promotion_log(action);
CREATE INDEX IF NOT EXISTS idx_promotion_log_state_code ON dutchie_promotion_log(state_code);
CREATE INDEX IF NOT EXISTS idx_promotion_log_created_at ON dutchie_promotion_log(created_at DESC);
COMMENT ON TABLE dutchie_promotion_log IS 'Audit log for discovery location validation and promotion to dispensaries';

View File

@@ -0,0 +1,95 @@
-- Migration 068: Crawler Status Alerts
-- Creates status_alerts table for dashboard notifications and status change logging
-- ============================================================
-- STATUS ALERTS TABLE
-- ============================================================
CREATE TABLE IF NOT EXISTS crawler_status_alerts (
id SERIAL PRIMARY KEY,
-- References
dispensary_id INTEGER REFERENCES dispensaries(id),
profile_id INTEGER REFERENCES dispensary_crawler_profiles(id),
-- Alert info
alert_type VARCHAR(50) NOT NULL, -- 'status_change', 'crawl_error', 'validation_failed', 'promoted', 'demoted'
severity VARCHAR(20) DEFAULT 'info', -- 'info', 'warning', 'error', 'critical'
-- Status transition
previous_status VARCHAR(50),
new_status VARCHAR(50),
-- Context
message TEXT,
error_details JSONB,
metadata JSONB, -- Additional context (product counts, error codes, etc.)
-- Tracking
acknowledged BOOLEAN DEFAULT FALSE,
acknowledged_at TIMESTAMP WITH TIME ZONE,
acknowledged_by VARCHAR(100),
-- Timestamps
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Indexes for common queries
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_dispensary ON crawler_status_alerts(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_type ON crawler_status_alerts(alert_type);
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_severity ON crawler_status_alerts(severity);
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_unack ON crawler_status_alerts(acknowledged) WHERE acknowledged = FALSE;
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_created ON crawler_status_alerts(created_at DESC);
-- ============================================================
-- STATUS DEFINITIONS (for reference/validation)
-- ============================================================
COMMENT ON TABLE crawler_status_alerts IS 'Crawler status change notifications for dashboard alerting';
COMMENT ON COLUMN crawler_status_alerts.alert_type IS 'Type: status_change, crawl_error, validation_failed, promoted, demoted';
COMMENT ON COLUMN crawler_status_alerts.severity IS 'Severity: info, warning, error, critical';
COMMENT ON COLUMN crawler_status_alerts.previous_status IS 'Previous crawler status before change';
COMMENT ON COLUMN crawler_status_alerts.new_status IS 'New crawler status after change';
-- ============================================================
-- STATUS TRACKING ON PROFILES
-- ============================================================
-- Add columns for status tracking if not exists
DO $$
BEGIN
-- Consecutive success count for auto-promotion
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'consecutive_successes') THEN
ALTER TABLE dispensary_crawler_profiles ADD COLUMN consecutive_successes INTEGER DEFAULT 0;
END IF;
-- Consecutive failure count for auto-demotion
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'consecutive_failures') THEN
ALTER TABLE dispensary_crawler_profiles ADD COLUMN consecutive_failures INTEGER DEFAULT 0;
END IF;
-- Last status change timestamp
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'status_changed_at') THEN
ALTER TABLE dispensary_crawler_profiles ADD COLUMN status_changed_at TIMESTAMP WITH TIME ZONE;
END IF;
-- Status change reason
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'status_reason') THEN
ALTER TABLE dispensary_crawler_profiles ADD COLUMN status_reason TEXT;
END IF;
END $$;
-- ============================================================
-- VALID STATUS VALUES
-- ============================================================
-- Status values for dispensary_crawler_profiles.status:
-- 'sandbox' - Newly created, being validated
-- 'production' - Healthy, actively crawled
-- 'needs_manual' - Requires human intervention
-- 'failing' - Multiple consecutive failures
-- 'disabled' - Manually disabled
-- 'legacy' - No profile, uses default method (virtual status)

View File

@@ -0,0 +1,163 @@
-- Migration 069: Seven-Stage Status System
--
-- Implements explicit 7-stage pipeline for store lifecycle:
-- 1. discovered - Found via Dutchie API, raw data
-- 2. validated - Passed field checks, ready for promotion
-- 3. promoted - In dispensaries table, has crawler profile
-- 4. sandbox - First crawl attempted, testing
-- 5. hydrating - Products are being loaded/updated
-- 6. production - Healthy, scheduled crawls via Horizon
-- 7. failing - Crawl errors, needs attention
-- ============================================================
-- STAGE ENUM TYPE
-- ============================================================
DO $$
BEGIN
-- Create enum if not exists
IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'store_stage') THEN
CREATE TYPE store_stage AS ENUM (
'discovered',
'validated',
'promoted',
'sandbox',
'hydrating',
'production',
'failing'
);
END IF;
END $$;
-- ============================================================
-- UPDATE DISCOVERY LOCATIONS TABLE
-- ============================================================
-- Add stage column to discovery locations (replaces status)
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'dutchie_discovery_locations' AND column_name = 'stage') THEN
ALTER TABLE dutchie_discovery_locations ADD COLUMN stage VARCHAR(20) DEFAULT 'discovered';
END IF;
END $$;
-- Migrate existing status values to stage
UPDATE dutchie_discovery_locations
SET stage = CASE
WHEN status = 'discovered' THEN 'discovered'
WHEN status = 'verified' THEN 'validated'
WHEN status = 'rejected' THEN 'failing'
WHEN status = 'merged' THEN 'validated'
ELSE 'discovered'
END
WHERE stage IS NULL OR stage = '';
-- ============================================================
-- UPDATE CRAWLER PROFILES TABLE
-- ============================================================
-- Ensure status column exists and update to new values
UPDATE dispensary_crawler_profiles
SET status = CASE
WHEN status = 'sandbox' THEN 'sandbox'
WHEN status = 'production' THEN 'production'
WHEN status = 'needs_manual' THEN 'failing'
WHEN status = 'failing' THEN 'failing'
WHEN status = 'disabled' THEN 'failing'
WHEN status IS NULL THEN 'promoted'
ELSE 'promoted'
END;
-- ============================================================
-- ADD STAGE TRACKING TO DISPENSARIES
-- ============================================================
DO $$
BEGIN
-- Add stage column to dispensaries for quick filtering
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'dispensaries' AND column_name = 'stage') THEN
ALTER TABLE dispensaries ADD COLUMN stage VARCHAR(20) DEFAULT 'promoted';
END IF;
-- Add stage_changed_at for tracking
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'dispensaries' AND column_name = 'stage_changed_at') THEN
ALTER TABLE dispensaries ADD COLUMN stage_changed_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP;
END IF;
-- Add first_crawl_at to track sandbox → production transition
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'dispensaries' AND column_name = 'first_crawl_at') THEN
ALTER TABLE dispensaries ADD COLUMN first_crawl_at TIMESTAMP WITH TIME ZONE;
END IF;
-- Add last_successful_crawl_at
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'dispensaries' AND column_name = 'last_successful_crawl_at') THEN
ALTER TABLE dispensaries ADD COLUMN last_successful_crawl_at TIMESTAMP WITH TIME ZONE;
END IF;
END $$;
-- Set initial stage for existing dispensaries based on their crawler profile status
UPDATE dispensaries d
SET stage = COALESCE(
(SELECT dcp.status FROM dispensary_crawler_profiles dcp
WHERE dcp.dispensary_id = d.id AND dcp.enabled = true
ORDER BY dcp.updated_at DESC LIMIT 1),
'promoted'
)
WHERE d.stage IS NULL OR d.stage = '';
-- ============================================================
-- INDEXES FOR STAGE-BASED QUERIES
-- ============================================================
CREATE INDEX IF NOT EXISTS idx_dispensaries_stage ON dispensaries(stage);
CREATE INDEX IF NOT EXISTS idx_dispensaries_stage_state ON dispensaries(stage, state);
CREATE INDEX IF NOT EXISTS idx_discovery_locations_stage ON dutchie_discovery_locations(stage);
CREATE INDEX IF NOT EXISTS idx_crawler_profiles_status ON dispensary_crawler_profiles(status);
-- ============================================================
-- STAGE TRANSITION LOG
-- ============================================================
CREATE TABLE IF NOT EXISTS stage_transitions (
id SERIAL PRIMARY KEY,
-- What changed
entity_type VARCHAR(20) NOT NULL, -- 'discovery_location' or 'dispensary'
entity_id INTEGER NOT NULL,
-- Stage change
from_stage VARCHAR(20),
to_stage VARCHAR(20) NOT NULL,
-- Context
trigger_type VARCHAR(50) NOT NULL, -- 'api', 'scheduler', 'manual', 'auto'
trigger_endpoint VARCHAR(200),
-- Outcome
success BOOLEAN DEFAULT TRUE,
error_message TEXT,
metadata JSONB,
-- Timing
duration_ms INTEGER,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_stage_transitions_entity ON stage_transitions(entity_type, entity_id);
CREATE INDEX IF NOT EXISTS idx_stage_transitions_to_stage ON stage_transitions(to_stage);
CREATE INDEX IF NOT EXISTS idx_stage_transitions_created ON stage_transitions(created_at DESC);
-- ============================================================
-- COMMENTS
-- ============================================================
COMMENT ON TABLE stage_transitions IS 'Audit log for all stage transitions in the pipeline';
COMMENT ON COLUMN dispensaries.stage IS 'Current pipeline stage: discovered, validated, promoted, sandbox, production, failing';
COMMENT ON COLUMN dispensaries.stage_changed_at IS 'When the stage was last changed';
COMMENT ON COLUMN dispensaries.first_crawl_at IS 'When the first crawl was attempted (sandbox stage)';
COMMENT ON COLUMN dispensaries.last_successful_crawl_at IS 'When the last successful crawl completed';

View File

@@ -0,0 +1,239 @@
-- ============================================================================
-- Migration 070: Product Variants Tables
-- ============================================================================
--
-- Purpose: Store variant-level pricing and inventory as first-class entities
-- to enable time-series analytics, price comparisons, and sale tracking.
--
-- Enables queries like:
-- - Price history for a specific variant (1g Blue Dream over time)
-- - Sale frequency analysis (how often is this on special?)
-- - Cross-store price comparison (who has cheapest 1g flower?)
-- - Current specials across all stores
--
-- RULES:
-- - STRICTLY ADDITIVE (no DROP, DELETE, TRUNCATE)
-- - All new tables use IF NOT EXISTS
-- - All indexes use IF NOT EXISTS
--
-- ============================================================================
-- ============================================================================
-- SECTION 1: PRODUCT_VARIANTS TABLE (Current State)
-- ============================================================================
-- One row per product+option combination. Tracks current pricing/inventory.
CREATE TABLE IF NOT EXISTS product_variants (
id SERIAL PRIMARY KEY,
store_product_id INTEGER NOT NULL REFERENCES store_products(id) ON DELETE CASCADE,
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
-- Variant identity (from Dutchie POSMetaData.children)
option VARCHAR(100) NOT NULL, -- "1g", "3.5g", "1/8oz", "100mg"
canonical_sku VARCHAR(100), -- Dutchie canonicalSKU
canonical_id VARCHAR(100), -- Dutchie canonicalID
canonical_name VARCHAR(500), -- Dutchie canonicalName
-- Current pricing (in dollars, not cents)
price_rec NUMERIC(10,2),
price_med NUMERIC(10,2),
price_rec_special NUMERIC(10,2),
price_med_special NUMERIC(10,2),
-- Current inventory
quantity INTEGER,
quantity_available INTEGER,
in_stock BOOLEAN DEFAULT TRUE,
-- Special/sale status
is_on_special BOOLEAN DEFAULT FALSE,
-- Weight/size parsing (for analytics)
weight_value NUMERIC(10,2), -- 1, 3.5, 28, etc.
weight_unit VARCHAR(20), -- g, oz, mg, ml, etc.
-- Timestamps
first_seen_at TIMESTAMPTZ DEFAULT NOW(),
last_seen_at TIMESTAMPTZ DEFAULT NOW(),
last_price_change_at TIMESTAMPTZ,
last_stock_change_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(store_product_id, option)
);
-- Indexes for common queries
CREATE INDEX IF NOT EXISTS idx_variants_store_product ON product_variants(store_product_id);
CREATE INDEX IF NOT EXISTS idx_variants_dispensary ON product_variants(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_variants_option ON product_variants(option);
CREATE INDEX IF NOT EXISTS idx_variants_in_stock ON product_variants(dispensary_id, in_stock) WHERE in_stock = TRUE;
CREATE INDEX IF NOT EXISTS idx_variants_on_special ON product_variants(dispensary_id, is_on_special) WHERE is_on_special = TRUE;
CREATE INDEX IF NOT EXISTS idx_variants_canonical_sku ON product_variants(canonical_sku) WHERE canonical_sku IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_variants_price_rec ON product_variants(price_rec) WHERE price_rec IS NOT NULL;
COMMENT ON TABLE product_variants IS 'Current state of each product variant (weight/size option). One row per product+option.';
COMMENT ON COLUMN product_variants.option IS 'Weight/size option string from Dutchie (e.g., "1g", "3.5g", "1/8oz")';
COMMENT ON COLUMN product_variants.canonical_sku IS 'Dutchie POS SKU for cross-store matching';
-- ============================================================================
-- SECTION 2: PRODUCT_VARIANT_SNAPSHOTS TABLE (Historical Data)
-- ============================================================================
-- Time-series data for variant pricing. One row per variant per crawl.
-- CRITICAL: NEVER DELETE from this table.
CREATE TABLE IF NOT EXISTS product_variant_snapshots (
id SERIAL PRIMARY KEY,
product_variant_id INTEGER NOT NULL REFERENCES product_variants(id) ON DELETE CASCADE,
store_product_id INTEGER REFERENCES store_products(id) ON DELETE SET NULL,
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL,
-- Variant identity (denormalized for query performance)
option VARCHAR(100) NOT NULL,
-- Pricing at time of capture
price_rec NUMERIC(10,2),
price_med NUMERIC(10,2),
price_rec_special NUMERIC(10,2),
price_med_special NUMERIC(10,2),
-- Inventory at time of capture
quantity INTEGER,
in_stock BOOLEAN DEFAULT TRUE,
-- Special status at time of capture
is_on_special BOOLEAN DEFAULT FALSE,
-- Feed presence (FALSE = variant missing from crawl)
is_present_in_feed BOOLEAN DEFAULT TRUE,
-- Capture timestamp
captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes for time-series queries
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_variant ON product_variant_snapshots(product_variant_id, captured_at DESC);
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_dispensary ON product_variant_snapshots(dispensary_id, captured_at DESC);
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_crawl ON product_variant_snapshots(crawl_run_id) WHERE crawl_run_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_captured ON product_variant_snapshots(captured_at DESC);
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_special ON product_variant_snapshots(is_on_special, captured_at DESC) WHERE is_on_special = TRUE;
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_option ON product_variant_snapshots(option, captured_at DESC);
COMMENT ON TABLE product_variant_snapshots IS 'Historical variant pricing/inventory. One row per variant per crawl. NEVER DELETE.';
-- ============================================================================
-- SECTION 3: USEFUL VIEWS
-- ============================================================================
-- View: Current specials across all stores
CREATE OR REPLACE VIEW v_current_specials AS
SELECT
pv.id as variant_id,
sp.id as product_id,
sp.name_raw as product_name,
sp.brand_name_raw as brand_name,
sp.category_raw as category,
d.id as dispensary_id,
d.name as dispensary_name,
d.city,
d.state,
pv.option,
pv.price_rec,
pv.price_rec_special,
ROUND(((pv.price_rec - pv.price_rec_special) / NULLIF(pv.price_rec, 0)) * 100, 1) as discount_percent,
pv.quantity,
pv.in_stock,
pv.last_seen_at
FROM product_variants pv
JOIN store_products sp ON sp.id = pv.store_product_id
JOIN dispensaries d ON d.id = pv.dispensary_id
WHERE pv.is_on_special = TRUE
AND pv.in_stock = TRUE
AND pv.price_rec_special IS NOT NULL
AND pv.price_rec_special < pv.price_rec;
COMMENT ON VIEW v_current_specials IS 'All products currently on special across all stores';
-- View: Price comparison for a product across stores
CREATE OR REPLACE VIEW v_price_comparison AS
SELECT
sp.name_raw as product_name,
sp.brand_name_raw as brand_name,
sp.category_raw as category,
pv.option,
d.id as dispensary_id,
d.name as dispensary_name,
d.city,
pv.price_rec,
pv.price_rec_special,
pv.is_on_special,
pv.in_stock,
pv.quantity,
RANK() OVER (PARTITION BY sp.name_raw, pv.option ORDER BY COALESCE(pv.price_rec_special, pv.price_rec) ASC) as price_rank
FROM product_variants pv
JOIN store_products sp ON sp.id = pv.store_product_id
JOIN dispensaries d ON d.id = pv.dispensary_id
WHERE pv.in_stock = TRUE
AND (pv.price_rec IS NOT NULL OR pv.price_rec_special IS NOT NULL);
COMMENT ON VIEW v_price_comparison IS 'Compare prices for same product across stores, ranked by price';
-- View: Latest snapshot per variant
CREATE OR REPLACE VIEW v_latest_variant_snapshots AS
SELECT DISTINCT ON (product_variant_id)
pvs.*
FROM product_variant_snapshots pvs
ORDER BY product_variant_id, captured_at DESC;
-- ============================================================================
-- SECTION 4: HELPER FUNCTION FOR SALE FREQUENCY
-- ============================================================================
-- Function to calculate sale frequency for a variant
CREATE OR REPLACE FUNCTION get_variant_sale_stats(p_variant_id INTEGER, p_days INTEGER DEFAULT 30)
RETURNS TABLE (
total_snapshots BIGINT,
times_on_special BIGINT,
special_frequency_pct NUMERIC,
avg_discount_pct NUMERIC,
min_price NUMERIC,
max_price NUMERIC,
avg_price NUMERIC
) AS $$
BEGIN
RETURN QUERY
SELECT
COUNT(*)::BIGINT as total_snapshots,
COUNT(*) FILTER (WHERE is_on_special)::BIGINT as times_on_special,
ROUND((COUNT(*) FILTER (WHERE is_on_special)::NUMERIC / NULLIF(COUNT(*), 0)) * 100, 1) as special_frequency_pct,
ROUND(AVG(
CASE WHEN is_on_special AND price_rec_special IS NOT NULL AND price_rec IS NOT NULL
THEN ((price_rec - price_rec_special) / NULLIF(price_rec, 0)) * 100
END
), 1) as avg_discount_pct,
MIN(COALESCE(price_rec_special, price_rec)) as min_price,
MAX(price_rec) as max_price,
ROUND(AVG(COALESCE(price_rec_special, price_rec)), 2) as avg_price
FROM product_variant_snapshots
WHERE product_variant_id = p_variant_id
AND captured_at >= NOW() - (p_days || ' days')::INTERVAL;
END;
$$ LANGUAGE plpgsql;
COMMENT ON FUNCTION get_variant_sale_stats IS 'Get sale frequency and price stats for a variant over N days';
-- ============================================================================
-- DONE
-- ============================================================================
SELECT 'Migration 070 completed. Product variants tables ready for time-series analytics.' AS status;

View File

@@ -0,0 +1,53 @@
-- Migration 071: Harmonize store_products with dutchie_products
-- Adds missing columns to store_products to consolidate on a single canonical table
-- Product details
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS description TEXT;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS weight VARCHAR(50);
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS weights JSONB;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS measurements JSONB;
-- Cannabinoid/terpene data
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS effects JSONB;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS terpenes JSONB;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cannabinoids_v2 JSONB;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS thc_content NUMERIC(10,4);
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cbd_content NUMERIC(10,4);
-- Images
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS images JSONB;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS primary_image_url TEXT;
-- Inventory
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS total_quantity_available INTEGER DEFAULT 0;
-- Status/flags
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS status VARCHAR(50);
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS featured BOOLEAN DEFAULT FALSE;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS coming_soon BOOLEAN DEFAULT FALSE;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_lost BOOLEAN DEFAULT FALSE;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_lost_at TIMESTAMP WITH TIME ZONE;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_restored_at TIMESTAMP WITH TIME ZONE;
-- Threshold flags (Dutchie-specific)
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS is_below_threshold BOOLEAN DEFAULT FALSE;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS is_below_kiosk_threshold BOOLEAN DEFAULT FALSE;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS options_below_threshold BOOLEAN DEFAULT FALSE;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS options_below_kiosk_threshold BOOLEAN DEFAULT FALSE;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS certificate_of_analysis_enabled BOOLEAN DEFAULT FALSE;
-- Platform metadata
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS external_product_id VARCHAR(100);
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS c_name VARCHAR(500);
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS past_c_names TEXT[];
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS latest_raw_payload JSONB;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS created_at_platform TIMESTAMP WITH TIME ZONE;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS updated_at_platform TIMESTAMP WITH TIME ZONE;
-- Indexes for common queries
CREATE INDEX IF NOT EXISTS idx_store_products_external_id ON store_products(external_product_id);
CREATE INDEX IF NOT EXISTS idx_store_products_visibility_lost ON store_products(visibility_lost) WHERE visibility_lost = TRUE;
CREATE INDEX IF NOT EXISTS idx_store_products_status ON store_products(status);
-- Add comment
COMMENT ON TABLE store_products IS 'Canonical product table - consolidated from dutchie_products';

View File

@@ -0,0 +1,74 @@
-- Migration 072: Create compatibility views for store_products and store_product_snapshots
-- These views provide backward-compatible column names for API routes
-- v_products view - aliases store_products columns to match legacy dutchie_products naming
CREATE OR REPLACE VIEW v_products AS
SELECT
id,
dispensary_id,
provider_product_id as external_product_id,
provider_product_id as dutchie_id,
name_raw as name,
brand_name_raw as brand_name,
category_raw as type,
subcategory_raw as subcategory,
strain_type,
thc_percent as thc,
cbd_percent as cbd,
stock_status,
is_in_stock,
stock_quantity,
image_url,
primary_image_url,
images,
effects,
description,
is_on_special,
featured,
medical_only,
rec_only,
external_product_id as external_id,
provider,
created_at,
updated_at
FROM store_products;
-- v_product_snapshots view - aliases store_product_snapshots columns to match legacy naming
CREATE OR REPLACE VIEW v_product_snapshots AS
SELECT
id,
store_product_id,
dispensary_id,
provider,
provider_product_id,
crawl_run_id,
captured_at as crawled_at,
name_raw,
brand_name_raw,
category_raw,
subcategory_raw,
-- Convert price_rec (dollars) to rec_min_price_cents (cents)
CASE WHEN price_rec IS NOT NULL THEN (price_rec * 100)::integer END as rec_min_price_cents,
CASE WHEN price_rec IS NOT NULL THEN (price_rec * 100)::integer END as rec_max_price_cents,
CASE WHEN price_rec_special IS NOT NULL THEN (price_rec_special * 100)::integer END as rec_min_special_price_cents,
CASE WHEN price_med IS NOT NULL THEN (price_med * 100)::integer END as med_min_price_cents,
CASE WHEN price_med IS NOT NULL THEN (price_med * 100)::integer END as med_max_price_cents,
CASE WHEN price_med_special IS NOT NULL THEN (price_med_special * 100)::integer END as med_min_special_price_cents,
is_on_special as special,
discount_percent,
is_in_stock,
stock_quantity,
stock_status,
stock_quantity as total_quantity_available,
thc_percent,
cbd_percent,
image_url,
raw_data as options,
created_at
FROM store_product_snapshots;
-- Add indexes for the views' underlying tables
CREATE INDEX IF NOT EXISTS idx_store_products_dispensary ON store_products(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_store_products_stock ON store_products(stock_status);
CREATE INDEX IF NOT EXISTS idx_store_snapshots_product ON store_product_snapshots(store_product_id);
CREATE INDEX IF NOT EXISTS idx_store_snapshots_captured ON store_product_snapshots(captured_at DESC);