From 1fb8f84929174dfe61ce8781e61b13df3eecc262 Mon Sep 17 00:00:00 2001 From: Kelly Date: Sun, 30 Nov 2025 23:52:48 -0700 Subject: [PATCH] Add crawl fields directly to dispensaries table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migration 025: Dispensaries ARE stores - add crawl metadata fields (menu_url, provider_type, scrape_enabled, crawl_status, etc.) directly to dispensaries table instead of maintaining separate stores table. - Copies menu_url from 22 existing stores to their dispensaries - Migrates products from store_id to dispensary_id - Detects provider_type from menu_url domain - Adds indexes for crawl scheduling 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../025_dispensary_crawl_fields.sql | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 backend/migrations/025_dispensary_crawl_fields.sql diff --git a/backend/migrations/025_dispensary_crawl_fields.sql b/backend/migrations/025_dispensary_crawl_fields.sql new file mode 100644 index 00000000..c9523dff --- /dev/null +++ b/backend/migrations/025_dispensary_crawl_fields.sql @@ -0,0 +1,99 @@ +-- Migration 025: Add Crawl Fields to Dispensaries +-- Dispensaries ARE stores. Add crawl metadata directly to dispensaries table. + +-- ===================================================== +-- STEP 1: Add crawl fields to dispensaries +-- ===================================================== +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS menu_url TEXT; +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS provider_type VARCHAR(50) DEFAULT 'unknown'; +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS scrape_enabled BOOLEAN DEFAULT FALSE; +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_crawl_at TIMESTAMPTZ; +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS next_crawl_at TIMESTAMPTZ; +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS crawl_status VARCHAR(50) DEFAULT 'pending'; +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS crawl_error TEXT; +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS consecutive_failures INTEGER DEFAULT 0; +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS total_crawls INTEGER DEFAULT 0; +ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS successful_crawls INTEGER DEFAULT 0; + +-- ===================================================== +-- STEP 2: Copy menu_url from old stores table where available +-- ===================================================== +-- Deeply Rooted +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 1) WHERE id = 112; +-- Curaleaf stores +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 18) WHERE id = 81; +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 19) WHERE id = 195; +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 20) WHERE id = 115; +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 21) WHERE id = 140; +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 22) WHERE id = 177; +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 23) WHERE id = 77; +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 24) WHERE id = 248; +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 26) WHERE id = 200; +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 27) WHERE id = 106; +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 28) WHERE id = 196; +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 29) WHERE id = 252; +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 30) WHERE id = 235; +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 32) WHERE id = 180; +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 33) WHERE id = 87; +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 34) WHERE id = 94; +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 35) WHERE id = 152; +-- Sol Flower stores +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 36) WHERE id = 119; +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 37) WHERE id = 207; +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 38) WHERE id = 206; +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 39) WHERE id = 153; +UPDATE dispensaries SET menu_url = (SELECT dutchie_url FROM stores WHERE id = 40) WHERE id = 205; + +-- Enable scraping for dispensaries that have menu_url +UPDATE dispensaries SET scrape_enabled = TRUE WHERE menu_url IS NOT NULL AND menu_url != ''; + +-- ===================================================== +-- STEP 3: Detect provider_type from menu_url +-- ===================================================== +UPDATE dispensaries SET provider_type = 'dutchie' WHERE menu_url LIKE '%dutchie.com%'; +UPDATE dispensaries SET provider_type = 'curaleaf' WHERE menu_url LIKE '%curaleaf.com%'; +UPDATE dispensaries SET provider_type = 'leafly' WHERE menu_url LIKE '%leafly.com%'; +UPDATE dispensaries SET provider_type = 'weedmaps' WHERE menu_url LIKE '%weedmaps.com%'; +UPDATE dispensaries SET provider_type = 'iheartjane' WHERE menu_url LIKE '%iheartjane.com%'; +UPDATE dispensaries SET provider_type = 'jane' WHERE menu_url LIKE '%jane.com%' AND provider_type = 'unknown'; + +-- ===================================================== +-- STEP 4: Migrate products from store_id to dispensary_id +-- ===================================================== +UPDATE products SET dispensary_id = 112 WHERE store_id = 1 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 81 WHERE store_id = 18 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 195 WHERE store_id = 19 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 115 WHERE store_id = 20 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 140 WHERE store_id = 21 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 177 WHERE store_id = 22 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 77 WHERE store_id = 23 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 248 WHERE store_id = 24 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 248 WHERE store_id = 25 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 200 WHERE store_id = 26 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 106 WHERE store_id = 27 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 196 WHERE store_id = 28 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 252 WHERE store_id = 29 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 235 WHERE store_id = 30 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 248 WHERE store_id = 31 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 180 WHERE store_id = 32 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 87 WHERE store_id = 33 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 94 WHERE store_id = 34 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 152 WHERE store_id = 35 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 119 WHERE store_id = 36 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 207 WHERE store_id = 37 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 206 WHERE store_id = 38 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 153 WHERE store_id = 39 AND dispensary_id IS NULL; +UPDATE products SET dispensary_id = 205 WHERE store_id = 40 AND dispensary_id IS NULL; + +-- ===================================================== +-- STEP 5: Create indexes +-- ===================================================== +CREATE INDEX IF NOT EXISTS idx_dispensaries_scrape_enabled ON dispensaries(scrape_enabled) WHERE scrape_enabled = TRUE; +CREATE INDEX IF NOT EXISTS idx_dispensaries_next_crawl ON dispensaries(next_crawl_at) WHERE scrape_enabled = TRUE; +CREATE INDEX IF NOT EXISTS idx_dispensaries_crawl_status ON dispensaries(crawl_status); +CREATE INDEX IF NOT EXISTS idx_products_dispensary_id ON products(dispensary_id); + +-- ===================================================== +-- DONE. dispensaries table is now the single source of truth. +-- stores table is deprecated - will be removed in future migration. +-- =====================================================