Files
cannaiq/backend/migrations/037_dispensary_crawler_profiles.sql
Kelly b4a2fb7d03 feat: Add v2 architecture with multi-state support and orchestrator services
Major additions:
- Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare
- Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator
- Discovery system: dutchie discovery service, geo validation, city seeding scripts
- Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages
- Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram)
- Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata

Frontend pages added:
- Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores
- StateHeatmap, CrossStateCompare, SyncInfoPanel

Components added:
- StateSelector, OrchestratorTraceModal, WorkflowStepper

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-07 11:30:57 -07:00

91 lines
4.2 KiB
PL/PgSQL

-- Migration 037: Add per-store crawler profiles for Dutchie dispensaries
-- This enables per-store crawler configuration without changing shared logic
-- Phase 1: Schema only - no automatic behavior changes
-- Create the crawler profiles table
CREATE TABLE IF NOT EXISTS dispensary_crawler_profiles (
id SERIAL PRIMARY KEY,
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
-- Human readable name for this profile
profile_name VARCHAR(255) NOT NULL,
-- High-level type, e.g. 'dutchie', 'treez', 'jane'
crawler_type VARCHAR(50) NOT NULL,
-- Optional key for mapping to a per-store crawler module later,
-- e.g. 'curaleaf-dispensary-gilbert'
profile_key VARCHAR(255),
-- Generic configuration bucket; will hold selectors, URLs, flags, etc.
config JSONB NOT NULL DEFAULT '{}'::jsonb,
-- Execution hints (safe defaults; can be overridden in config if needed)
timeout_ms INTEGER DEFAULT 30000,
download_images BOOLEAN DEFAULT TRUE,
track_stock BOOLEAN DEFAULT TRUE,
version INTEGER DEFAULT 1,
enabled BOOLEAN DEFAULT TRUE,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Unique index on dispensary_id + profile_name
CREATE UNIQUE INDEX IF NOT EXISTS dispensary_crawler_profiles_unique_name
ON dispensary_crawler_profiles (dispensary_id, profile_name);
-- Index for finding enabled profiles by type
CREATE INDEX IF NOT EXISTS idx_crawler_profiles_type_enabled
ON dispensary_crawler_profiles (crawler_type, enabled);
-- Index for dispensary lookup
CREATE INDEX IF NOT EXISTS idx_crawler_profiles_dispensary
ON dispensary_crawler_profiles (dispensary_id);
-- Add FK from dispensaries to active profile
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'dispensaries'
AND column_name = 'active_crawler_profile_id') THEN
ALTER TABLE dispensaries
ADD COLUMN active_crawler_profile_id INTEGER NULL
REFERENCES dispensary_crawler_profiles(id) ON DELETE SET NULL;
END IF;
END $$;
-- Create index on the FK for faster joins
CREATE INDEX IF NOT EXISTS idx_dispensaries_active_profile
ON dispensaries (active_crawler_profile_id)
WHERE active_crawler_profile_id IS NOT NULL;
-- Create or replace trigger function for updated_at
CREATE OR REPLACE FUNCTION set_updated_at_timestamp()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Add trigger to keep updated_at fresh (drop first if exists to avoid duplicates)
DROP TRIGGER IF EXISTS dispensary_crawler_profiles_set_timestamp ON dispensary_crawler_profiles;
CREATE TRIGGER dispensary_crawler_profiles_set_timestamp
BEFORE UPDATE ON dispensary_crawler_profiles
FOR EACH ROW EXECUTE PROCEDURE set_updated_at_timestamp();
-- Add comments for documentation
COMMENT ON TABLE dispensary_crawler_profiles IS 'Per-store crawler configuration profiles. Each dispensary can have multiple profiles but only one active at a time.';
COMMENT ON COLUMN dispensary_crawler_profiles.profile_name IS 'Human readable name for the profile, e.g. "Curaleaf Gilbert - Dutchie v1"';
COMMENT ON COLUMN dispensary_crawler_profiles.crawler_type IS 'The crawler implementation type: dutchie, treez, jane, sandbox, custom';
COMMENT ON COLUMN dispensary_crawler_profiles.profile_key IS 'Optional identifier for per-store crawler module mapping';
COMMENT ON COLUMN dispensary_crawler_profiles.config IS 'JSONB configuration for the crawler. Schema depends on crawler_type.';
COMMENT ON COLUMN dispensary_crawler_profiles.timeout_ms IS 'Request timeout in milliseconds (default 30000)';
COMMENT ON COLUMN dispensary_crawler_profiles.download_images IS 'Whether to download product images locally';
COMMENT ON COLUMN dispensary_crawler_profiles.track_stock IS 'Whether to track inventory/stock levels';
COMMENT ON COLUMN dispensary_crawler_profiles.version IS 'Profile version number for A/B testing or upgrades';
COMMENT ON COLUMN dispensary_crawler_profiles.enabled IS 'Whether this profile can be used (soft delete)';
COMMENT ON COLUMN dispensaries.active_crawler_profile_id IS 'FK to the currently active crawler profile for this dispensary';