feat: AZ dispensary harmonization with Dutchie source of truth

Major changes:
- Add harmonize-az-dispensaries.ts script to sync dispensaries with Dutchie API
- Add migration 057 for crawl_enabled and dutchie_verified fields
- Remove legacy dutchie-az module (replaced by platforms/dutchie)
- Clean up deprecated crawlers, scrapers, and orchestrator code
- Update location-discovery to not fallback to slug when ID is missing
- Add crawl-rotator service for proxy rotation
- Add types/index.ts for shared type definitions
- Add woodpecker-agent k8s manifest

Harmonization script:
- Queries ConsumerDispensaries API for all 32 AZ cities
- Matches dispensaries by platform_dispensary_id (not slug)
- Updates existing records with full Dutchie data
- Creates new records for unmatched Dutchie dispensaries
- Disables dispensaries not found in Dutchie

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-08 10:19:49 -07:00
parent 948a732dd5
commit b7cfec0770
112 changed files with 3163 additions and 34694 deletions

View File

@@ -0,0 +1,56 @@
-- Migration 065: Slug verification and data source tracking
-- Adds columns to track when slug/menu data was verified and from what source
-- Add slug verification columns to dispensaries
ALTER TABLE dispensaries
ADD COLUMN IF NOT EXISTS slug_source VARCHAR(50),
ADD COLUMN IF NOT EXISTS slug_verified_at TIMESTAMPTZ,
ADD COLUMN IF NOT EXISTS slug_status VARCHAR(20) DEFAULT 'unverified',
ADD COLUMN IF NOT EXISTS menu_url_source VARCHAR(50),
ADD COLUMN IF NOT EXISTS menu_url_verified_at TIMESTAMPTZ,
ADD COLUMN IF NOT EXISTS platform_id_source VARCHAR(50),
ADD COLUMN IF NOT EXISTS platform_id_verified_at TIMESTAMPTZ,
ADD COLUMN IF NOT EXISTS country VARCHAR(2) DEFAULT 'US';
-- Add index for finding unverified stores
CREATE INDEX IF NOT EXISTS idx_dispensaries_slug_status
ON dispensaries(slug_status)
WHERE slug_status != 'verified';
-- Add index for country
CREATE INDEX IF NOT EXISTS idx_dispensaries_country
ON dispensaries(country);
-- Comment on columns
COMMENT ON COLUMN dispensaries.slug_source IS 'Source of slug data: dutchie_api, manual, azdhs, discovery, etc.';
COMMENT ON COLUMN dispensaries.slug_verified_at IS 'When the slug was last verified against the source';
COMMENT ON COLUMN dispensaries.slug_status IS 'Status: unverified, verified, invalid, changed';
COMMENT ON COLUMN dispensaries.menu_url_source IS 'Source of menu_url: dutchie_api, website_scrape, manual, etc.';
COMMENT ON COLUMN dispensaries.menu_url_verified_at IS 'When the menu_url was last verified';
COMMENT ON COLUMN dispensaries.platform_id_source IS 'Source of platform_dispensary_id: dutchie_api, graphql_resolution, etc.';
COMMENT ON COLUMN dispensaries.platform_id_verified_at IS 'When the platform_dispensary_id was last verified';
COMMENT ON COLUMN dispensaries.country IS 'ISO 2-letter country code: US, CA, etc.';
-- Update Green Pharms Mesa with verified Dutchie data
UPDATE dispensaries
SET
slug = 'green-pharms-mesa',
menu_url = 'https://dutchie.com/embedded-menu/green-pharms-mesa',
menu_type = 'dutchie',
platform_dispensary_id = '68dc47a2af90f2e653f8df30',
slug_source = 'dutchie_api',
slug_verified_at = NOW(),
slug_status = 'verified',
menu_url_source = 'dutchie_api',
menu_url_verified_at = NOW(),
platform_id_source = 'dutchie_api',
platform_id_verified_at = NOW(),
updated_at = NOW()
WHERE id = 232;
-- Mark all other AZ dispensaries as needing verification
UPDATE dispensaries
SET slug_status = 'unverified'
WHERE state = 'AZ'
AND id != 232
AND (slug_status IS NULL OR slug_status = 'unverified');