Major additions: - Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare - Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator - Discovery system: dutchie discovery service, geo validation, city seeding scripts - Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages - Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram) - Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata Frontend pages added: - Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores - StateHeatmap, CrossStateCompare, SyncInfoPanel Components added: - StateSelector, OrchestratorTraceModal, WorkflowStepper 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
91 lines
4.2 KiB
PL/PgSQL
91 lines
4.2 KiB
PL/PgSQL
-- Migration 037: Add per-store crawler profiles for Dutchie dispensaries
|
|
-- This enables per-store crawler configuration without changing shared logic
|
|
-- Phase 1: Schema only - no automatic behavior changes
|
|
|
|
-- Create the crawler profiles table
|
|
CREATE TABLE IF NOT EXISTS dispensary_crawler_profiles (
|
|
id SERIAL PRIMARY KEY,
|
|
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
|
|
|
-- Human readable name for this profile
|
|
profile_name VARCHAR(255) NOT NULL,
|
|
|
|
-- High-level type, e.g. 'dutchie', 'treez', 'jane'
|
|
crawler_type VARCHAR(50) NOT NULL,
|
|
|
|
-- Optional key for mapping to a per-store crawler module later,
|
|
-- e.g. 'curaleaf-dispensary-gilbert'
|
|
profile_key VARCHAR(255),
|
|
|
|
-- Generic configuration bucket; will hold selectors, URLs, flags, etc.
|
|
config JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
|
|
-- Execution hints (safe defaults; can be overridden in config if needed)
|
|
timeout_ms INTEGER DEFAULT 30000,
|
|
download_images BOOLEAN DEFAULT TRUE,
|
|
track_stock BOOLEAN DEFAULT TRUE,
|
|
|
|
version INTEGER DEFAULT 1,
|
|
enabled BOOLEAN DEFAULT TRUE,
|
|
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- Unique index on dispensary_id + profile_name
|
|
CREATE UNIQUE INDEX IF NOT EXISTS dispensary_crawler_profiles_unique_name
|
|
ON dispensary_crawler_profiles (dispensary_id, profile_name);
|
|
|
|
-- Index for finding enabled profiles by type
|
|
CREATE INDEX IF NOT EXISTS idx_crawler_profiles_type_enabled
|
|
ON dispensary_crawler_profiles (crawler_type, enabled);
|
|
|
|
-- Index for dispensary lookup
|
|
CREATE INDEX IF NOT EXISTS idx_crawler_profiles_dispensary
|
|
ON dispensary_crawler_profiles (dispensary_id);
|
|
|
|
-- Add FK from dispensaries to active profile
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
|
WHERE table_name = 'dispensaries'
|
|
AND column_name = 'active_crawler_profile_id') THEN
|
|
ALTER TABLE dispensaries
|
|
ADD COLUMN active_crawler_profile_id INTEGER NULL
|
|
REFERENCES dispensary_crawler_profiles(id) ON DELETE SET NULL;
|
|
END IF;
|
|
END $$;
|
|
|
|
-- Create index on the FK for faster joins
|
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_active_profile
|
|
ON dispensaries (active_crawler_profile_id)
|
|
WHERE active_crawler_profile_id IS NOT NULL;
|
|
|
|
-- Create or replace trigger function for updated_at
|
|
CREATE OR REPLACE FUNCTION set_updated_at_timestamp()
|
|
RETURNS TRIGGER AS $$
|
|
BEGIN
|
|
NEW.updated_at = NOW();
|
|
RETURN NEW;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- Add trigger to keep updated_at fresh (drop first if exists to avoid duplicates)
|
|
DROP TRIGGER IF EXISTS dispensary_crawler_profiles_set_timestamp ON dispensary_crawler_profiles;
|
|
CREATE TRIGGER dispensary_crawler_profiles_set_timestamp
|
|
BEFORE UPDATE ON dispensary_crawler_profiles
|
|
FOR EACH ROW EXECUTE PROCEDURE set_updated_at_timestamp();
|
|
|
|
-- Add comments for documentation
|
|
COMMENT ON TABLE dispensary_crawler_profiles IS 'Per-store crawler configuration profiles. Each dispensary can have multiple profiles but only one active at a time.';
|
|
COMMENT ON COLUMN dispensary_crawler_profiles.profile_name IS 'Human readable name for the profile, e.g. "Curaleaf Gilbert - Dutchie v1"';
|
|
COMMENT ON COLUMN dispensary_crawler_profiles.crawler_type IS 'The crawler implementation type: dutchie, treez, jane, sandbox, custom';
|
|
COMMENT ON COLUMN dispensary_crawler_profiles.profile_key IS 'Optional identifier for per-store crawler module mapping';
|
|
COMMENT ON COLUMN dispensary_crawler_profiles.config IS 'JSONB configuration for the crawler. Schema depends on crawler_type.';
|
|
COMMENT ON COLUMN dispensary_crawler_profiles.timeout_ms IS 'Request timeout in milliseconds (default 30000)';
|
|
COMMENT ON COLUMN dispensary_crawler_profiles.download_images IS 'Whether to download product images locally';
|
|
COMMENT ON COLUMN dispensary_crawler_profiles.track_stock IS 'Whether to track inventory/stock levels';
|
|
COMMENT ON COLUMN dispensary_crawler_profiles.version IS 'Profile version number for A/B testing or upgrades';
|
|
COMMENT ON COLUMN dispensary_crawler_profiles.enabled IS 'Whether this profile can be used (soft delete)';
|
|
COMMENT ON COLUMN dispensaries.active_crawler_profile_id IS 'FK to the currently active crawler profile for this dispensary';
|