Add crawl fields directly to dispensaries table

Migration 025: Dispensaries ARE stores - add crawl metadata fields
(menu_url, provider_type, scrape_enabled, crawl_status, etc.)
directly to dispensaries table instead of maintaining separate stores table.

- Copies menu_url from 22 existing stores to their dispensaries
- Migrates products from store_id to dispensary_id
- Detects provider_type from menu_url domain
- Adds indexes for crawl scheduling

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-11-30 23:52:48 -07:00
parent 5306c3f4ca
commit 1fb8f84929

View File

@@ -0,0 +1,99 @@
-- Migration 025: Add Crawl Fields to Dispensaries
-- Dispensaries ARE stores. Add crawl metadata directly to dispensaries table.
-- =====================================================
-- STEP 1: Add crawl fields to dispensaries
-- =====================================================
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS menu_url TEXT;
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS provider_type VARCHAR(50) DEFAULT 'unknown';
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS scrape_enabled BOOLEAN DEFAULT FALSE;
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_crawl_at TIMESTAMPTZ;
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS next_crawl_at TIMESTAMPTZ;
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS crawl_status VARCHAR(50) DEFAULT 'pending';
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS crawl_error TEXT;
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS consecutive_failures INTEGER DEFAULT 0;
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS total_crawls INTEGER DEFAULT 0;
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS successful_crawls INTEGER DEFAULT 0;
-- =====================================================
-- STEP 2: Copy menu_url from old stores table where available
-- =====================================================
-- Deeply Rooted
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 1) WHERE id = 112;
-- Curaleaf stores
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 18) WHERE id = 81;
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 19) WHERE id = 195;
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 20) WHERE id = 115;
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 21) WHERE id = 140;
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 22) WHERE id = 177;
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 23) WHERE id = 77;
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 24) WHERE id = 248;
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 26) WHERE id = 200;
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 27) WHERE id = 106;
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 28) WHERE id = 196;
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 29) WHERE id = 252;
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 30) WHERE id = 235;
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 32) WHERE id = 180;
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 33) WHERE id = 87;
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 34) WHERE id = 94;
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 35) WHERE id = 152;
-- Sol Flower stores
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 36) WHERE id = 119;
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 37) WHERE id = 207;
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 38) WHERE id = 206;
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 39) WHERE id = 153;
UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 40) WHERE id = 205;
-- Enable scraping for dispensaries that have menu_url
UPDATE dispensaries SET scrape_enabled = TRUE WHERE menu_url IS NOT NULL AND menu_url != '';
-- =====================================================
-- STEP 3: Detect provider_type from menu_url
-- =====================================================
UPDATE dispensaries SET provider_type = 'dutchie' WHERE menu_url LIKE '%dutchie.com%';
UPDATE dispensaries SET provider_type = 'curaleaf' WHERE menu_url LIKE '%curaleaf.com%';
UPDATE dispensaries SET provider_type = 'leafly' WHERE menu_url LIKE '%leafly.com%';
UPDATE dispensaries SET provider_type = 'weedmaps' WHERE menu_url LIKE '%weedmaps.com%';
UPDATE dispensaries SET provider_type = 'iheartjane' WHERE menu_url LIKE '%iheartjane.com%';
UPDATE dispensaries SET provider_type = 'jane' WHERE menu_url LIKE '%jane.com%' AND provider_type = 'unknown';
-- =====================================================
-- STEP 4: Migrate products from store_id to dispensary_id
-- =====================================================
UPDATE products SET dispensary_id = 112 WHERE store_id = 1 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 81 WHERE store_id = 18 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 195 WHERE store_id = 19 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 115 WHERE store_id = 20 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 140 WHERE store_id = 21 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 177 WHERE store_id = 22 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 77 WHERE store_id = 23 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 248 WHERE store_id = 24 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 248 WHERE store_id = 25 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 200 WHERE store_id = 26 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 106 WHERE store_id = 27 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 196 WHERE store_id = 28 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 252 WHERE store_id = 29 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 235 WHERE store_id = 30 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 248 WHERE store_id = 31 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 180 WHERE store_id = 32 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 87 WHERE store_id = 33 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 94 WHERE store_id = 34 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 152 WHERE store_id = 35 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 119 WHERE store_id = 36 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 207 WHERE store_id = 37 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 206 WHERE store_id = 38 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 153 WHERE store_id = 39 AND dispensary_id IS NULL;
UPDATE products SET dispensary_id = 205 WHERE store_id = 40 AND dispensary_id IS NULL;
-- =====================================================
-- STEP 5: Create indexes
-- =====================================================
CREATE INDEX IF NOT EXISTS idx_dispensaries_scrape_enabled ON dispensaries(scrape_enabled) WHERE scrape_enabled = TRUE;
CREATE INDEX IF NOT EXISTS idx_dispensaries_next_crawl ON dispensaries(next_crawl_at) WHERE scrape_enabled = TRUE;
CREATE INDEX IF NOT EXISTS idx_dispensaries_crawl_status ON dispensaries(crawl_status);
CREATE INDEX IF NOT EXISTS idx_products_dispensary_id ON products(dispensary_id);
-- =====================================================
-- DONE. dispensaries table is now the single source of truth.
-- stores table is deprecated - will be removed in future migration.
-- =====================================================