feat: AZ dispensary harmonization with Dutchie source of truth
Major changes: - Add harmonize-az-dispensaries.ts script to sync dispensaries with Dutchie API - Add migration 057 for crawl_enabled and dutchie_verified fields - Remove legacy dutchie-az module (replaced by platforms/dutchie) - Clean up deprecated crawlers, scrapers, and orchestrator code - Update location-discovery to not fallback to slug when ID is missing - Add crawl-rotator service for proxy rotation - Add types/index.ts for shared type definitions - Add woodpecker-agent k8s manifest Harmonization script: - Queries ConsumerDispensaries API for all 32 AZ cities - Matches dispensaries by platform_dispensary_id (not slug) - Updates existing records with full Dutchie data - Creates new records for unmatched Dutchie dispensaries - Disables dispensaries not found in Dutchie 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,42 @@
|
||||
-- Migration 057: Add crawl_enabled and dutchie_verified fields to dispensaries
|
||||
--
|
||||
-- Purpose:
|
||||
-- 1. Add crawl_enabled to control which dispensaries get crawled
|
||||
-- 2. Add dutchie_verified to track Dutchie source-of-truth verification
|
||||
-- 3. Default existing records to crawl_enabled = TRUE to preserve behavior
|
||||
--
|
||||
-- After this migration, run the harmonization script to:
|
||||
-- - Match dispensaries to Dutchie discoveries
|
||||
-- - Update platform_dispensary_id from Dutchie
|
||||
-- - Set dutchie_verified = TRUE for matches
|
||||
-- - Set crawl_enabled = FALSE for unverified records
|
||||
|
||||
-- Add crawl_enabled column (defaults to true to not break existing crawls)
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN IF NOT EXISTS crawl_enabled BOOLEAN DEFAULT TRUE;
|
||||
|
||||
-- Add dutchie_verified column to track if record is verified against Dutchie
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN IF NOT EXISTS dutchie_verified BOOLEAN DEFAULT FALSE;
|
||||
|
||||
-- Add dutchie_verified_at timestamp
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN IF NOT EXISTS dutchie_verified_at TIMESTAMP WITH TIME ZONE;
|
||||
|
||||
-- Add dutchie_discovery_id to link back to the discovery record
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN IF NOT EXISTS dutchie_discovery_id BIGINT REFERENCES dutchie_discovery_locations(id);
|
||||
|
||||
-- Create index for crawl queries (only crawl enabled dispensaries)
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_crawl_enabled
|
||||
ON dispensaries(crawl_enabled, state)
|
||||
WHERE crawl_enabled = TRUE;
|
||||
|
||||
-- Create index for dutchie verification status
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_dutchie_verified
|
||||
ON dispensaries(dutchie_verified, state);
|
||||
|
||||
COMMENT ON COLUMN dispensaries.crawl_enabled IS 'Whether this dispensary should be included in crawl jobs. Set to FALSE for unverified or problematic records.';
|
||||
COMMENT ON COLUMN dispensaries.dutchie_verified IS 'Whether this dispensary has been verified against Dutchie source of truth (matched by slug or manually linked).';
|
||||
COMMENT ON COLUMN dispensaries.dutchie_verified_at IS 'Timestamp when Dutchie verification was completed.';
|
||||
COMMENT ON COLUMN dispensaries.dutchie_discovery_id IS 'Link to the dutchie_discovery_locations record this was matched/verified against.';
|
||||
56
backend/migrations/065_slug_verification_tracking.sql
Normal file
56
backend/migrations/065_slug_verification_tracking.sql
Normal file
@@ -0,0 +1,56 @@
|
||||
-- Migration 065: Slug verification and data source tracking
|
||||
-- Adds columns to track when slug/menu data was verified and from what source
|
||||
|
||||
-- Add slug verification columns to dispensaries
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN IF NOT EXISTS slug_source VARCHAR(50),
|
||||
ADD COLUMN IF NOT EXISTS slug_verified_at TIMESTAMPTZ,
|
||||
ADD COLUMN IF NOT EXISTS slug_status VARCHAR(20) DEFAULT 'unverified',
|
||||
ADD COLUMN IF NOT EXISTS menu_url_source VARCHAR(50),
|
||||
ADD COLUMN IF NOT EXISTS menu_url_verified_at TIMESTAMPTZ,
|
||||
ADD COLUMN IF NOT EXISTS platform_id_source VARCHAR(50),
|
||||
ADD COLUMN IF NOT EXISTS platform_id_verified_at TIMESTAMPTZ,
|
||||
ADD COLUMN IF NOT EXISTS country VARCHAR(2) DEFAULT 'US';
|
||||
|
||||
-- Add index for finding unverified stores
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_slug_status
|
||||
ON dispensaries(slug_status)
|
||||
WHERE slug_status != 'verified';
|
||||
|
||||
-- Add index for country
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_country
|
||||
ON dispensaries(country);
|
||||
|
||||
-- Comment on columns
|
||||
COMMENT ON COLUMN dispensaries.slug_source IS 'Source of slug data: dutchie_api, manual, azdhs, discovery, etc.';
|
||||
COMMENT ON COLUMN dispensaries.slug_verified_at IS 'When the slug was last verified against the source';
|
||||
COMMENT ON COLUMN dispensaries.slug_status IS 'Status: unverified, verified, invalid, changed';
|
||||
COMMENT ON COLUMN dispensaries.menu_url_source IS 'Source of menu_url: dutchie_api, website_scrape, manual, etc.';
|
||||
COMMENT ON COLUMN dispensaries.menu_url_verified_at IS 'When the menu_url was last verified';
|
||||
COMMENT ON COLUMN dispensaries.platform_id_source IS 'Source of platform_dispensary_id: dutchie_api, graphql_resolution, etc.';
|
||||
COMMENT ON COLUMN dispensaries.platform_id_verified_at IS 'When the platform_dispensary_id was last verified';
|
||||
COMMENT ON COLUMN dispensaries.country IS 'ISO 2-letter country code: US, CA, etc.';
|
||||
|
||||
-- Update Green Pharms Mesa with verified Dutchie data
|
||||
UPDATE dispensaries
|
||||
SET
|
||||
slug = 'green-pharms-mesa',
|
||||
menu_url = 'https://dutchie.com/embedded-menu/green-pharms-mesa',
|
||||
menu_type = 'dutchie',
|
||||
platform_dispensary_id = '68dc47a2af90f2e653f8df30',
|
||||
slug_source = 'dutchie_api',
|
||||
slug_verified_at = NOW(),
|
||||
slug_status = 'verified',
|
||||
menu_url_source = 'dutchie_api',
|
||||
menu_url_verified_at = NOW(),
|
||||
platform_id_source = 'dutchie_api',
|
||||
platform_id_verified_at = NOW(),
|
||||
updated_at = NOW()
|
||||
WHERE id = 232;
|
||||
|
||||
-- Mark all other AZ dispensaries as needing verification
|
||||
UPDATE dispensaries
|
||||
SET slug_status = 'unverified'
|
||||
WHERE state = 'AZ'
|
||||
AND id != 232
|
||||
AND (slug_status IS NULL OR slug_status = 'unverified');
|
||||
Reference in New Issue
Block a user