Add CLAUDE guidelines for consolidated pipeline
This commit is contained in:
@@ -0,0 +1,13 @@
|
||||
-- Migration: Add dispensary_id to wp_dutchie_api_permissions
|
||||
-- This allows API tokens to be associated with a specific dispensary
|
||||
|
||||
-- Add dispensary_id column to wp_dutchie_api_permissions
|
||||
ALTER TABLE wp_dutchie_api_permissions
|
||||
ADD COLUMN IF NOT EXISTS dispensary_id INTEGER REFERENCES dispensaries(id);
|
||||
|
||||
-- Add index for faster lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_wp_api_permissions_dispensary_id ON wp_dutchie_api_permissions(dispensary_id);
|
||||
|
||||
-- Add dispensary_name column to return dispensary info without join
|
||||
ALTER TABLE wp_dutchie_api_permissions
|
||||
ADD COLUMN IF NOT EXISTS dispensary_name VARCHAR(255);
|
||||
@@ -896,6 +896,7 @@ SELECT
|
||||
subcategory,
|
||||
COUNT(*) as product_count,
|
||||
COUNT(DISTINCT dispensary_id) as dispensary_count,
|
||||
COUNT(DISTINCT brand_name) as brand_count,
|
||||
AVG(thc) as avg_thc,
|
||||
MIN(thc) as min_thc,
|
||||
MAX(thc) as max_thc
|
||||
|
||||
36
backend/migrations/031_product_normalized_fields.sql
Normal file
36
backend/migrations/031_product_normalized_fields.sql
Normal file
@@ -0,0 +1,36 @@
|
||||
-- Migration 031: Add Normalized Fields to Products
|
||||
-- For improved product matching and deduplication
|
||||
|
||||
-- Add normalized columns to products table
|
||||
ALTER TABLE products ADD COLUMN IF NOT EXISTS name_normalized VARCHAR(500);
|
||||
ALTER TABLE products ADD COLUMN IF NOT EXISTS brand_normalized VARCHAR(255);
|
||||
ALTER TABLE products ADD COLUMN IF NOT EXISTS external_id VARCHAR(255); -- Platform-specific ID (Dutchie, Treez, etc)
|
||||
ALTER TABLE products ADD COLUMN IF NOT EXISTS source_platform VARCHAR(50); -- 'dutchie', 'treez', 'jane', 'wp'
|
||||
|
||||
-- Create indexes for efficient matching
|
||||
CREATE INDEX IF NOT EXISTS idx_products_external_id ON products(external_id) WHERE external_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_products_name_normalized ON products(store_id, name_normalized);
|
||||
CREATE INDEX IF NOT EXISTS idx_products_matching ON products(store_id, name_normalized, brand_normalized, category_id);
|
||||
|
||||
-- Backfill normalized names for existing products
|
||||
UPDATE products SET
|
||||
name_normalized = LOWER(TRIM(REGEXP_REPLACE(name, '[^a-zA-Z0-9 ]', ' ', 'g'))),
|
||||
brand_normalized = LOWER(TRIM(COALESCE(brand, ''))),
|
||||
external_id = COALESCE(external_id, dutchie_product_id),
|
||||
source_platform = COALESCE(source_platform, 'dutchie')
|
||||
WHERE name_normalized IS NULL;
|
||||
|
||||
-- Add constraint to prevent true duplicates going forward
|
||||
-- Note: We use a partial unique index to allow multiple NULLs
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_products_no_duplicate_external_id
|
||||
ON products(store_id, external_id)
|
||||
WHERE external_id IS NOT NULL;
|
||||
|
||||
-- Comments
|
||||
COMMENT ON COLUMN products.name_normalized IS 'Lowercase, trimmed product name with punctuation removed for matching';
|
||||
COMMENT ON COLUMN products.brand_normalized IS 'Lowercase, trimmed brand name for matching';
|
||||
COMMENT ON COLUMN products.external_id IS 'Platform-specific product ID (Dutchie ID, Treez SKU, etc)';
|
||||
COMMENT ON COLUMN products.source_platform IS 'Source platform: dutchie, treez, jane, wp';
|
||||
|
||||
-- Grant permissions
|
||||
GRANT SELECT, INSERT, UPDATE ON products TO scraper;
|
||||
61
backend/migrations/032_menu_type_and_local_images.sql
Normal file
61
backend/migrations/032_menu_type_and_local_images.sql
Normal file
@@ -0,0 +1,61 @@
|
||||
-- Migration 032: Add menu_type column and local image storage columns
|
||||
-- Run with: psql $DATABASE_URL -f migrations/032_menu_type_and_local_images.sql
|
||||
|
||||
-- ============================================
|
||||
-- 1. Add menu_type column to dispensaries
|
||||
-- ============================================
|
||||
|
||||
-- menu_type: canonical, admin-editable field for menu provider type
|
||||
-- Separate from menu_provider (auto-detected) to allow manual override
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS menu_type VARCHAR(50);
|
||||
|
||||
-- Index for filtering by menu_type
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_menu_type ON dispensaries(menu_type) WHERE menu_type IS NOT NULL;
|
||||
|
||||
-- Backfill menu_type from existing detection data:
|
||||
-- Priority: menu_provider (if set) > menu_url pattern matching
|
||||
UPDATE dispensaries
|
||||
SET menu_type = CASE
|
||||
-- Use existing menu_provider if set
|
||||
WHEN menu_provider IS NOT NULL AND menu_provider != '' THEN menu_provider
|
||||
-- Detect from menu_url patterns
|
||||
WHEN menu_url ILIKE '%dutchie%' THEN 'dutchie'
|
||||
WHEN menu_url ILIKE '%treez%' THEN 'treez'
|
||||
WHEN menu_url ILIKE '%jane%' OR menu_url ILIKE '%iheartjane%' THEN 'jane'
|
||||
WHEN menu_url ILIKE '%weedmaps%' THEN 'weedmaps'
|
||||
WHEN menu_url ILIKE '%leafly%' THEN 'leafly'
|
||||
WHEN menu_url ILIKE '%meadow%' OR menu_url ILIKE '%getmeadow%' THEN 'meadow'
|
||||
WHEN menu_url ILIKE '%blaze%' THEN 'blaze'
|
||||
WHEN menu_url ILIKE '%flowhub%' THEN 'flowhub'
|
||||
WHEN menu_url ILIKE '%dispenseapp%' THEN 'dispense'
|
||||
WHEN menu_url ILIKE '%cova%' THEN 'cova'
|
||||
ELSE NULL
|
||||
END
|
||||
WHERE menu_type IS NULL;
|
||||
|
||||
-- ============================================
|
||||
-- 2. Add local image columns to dutchie_products
|
||||
-- ============================================
|
||||
|
||||
-- local_image_url: the URL path for serving the downloaded image (e.g., /images/products/123/456.webp)
|
||||
ALTER TABLE dutchie_products ADD COLUMN IF NOT EXISTS local_image_url TEXT;
|
||||
|
||||
-- local_image_thumb_url: thumbnail version
|
||||
ALTER TABLE dutchie_products ADD COLUMN IF NOT EXISTS local_image_thumb_url TEXT;
|
||||
|
||||
-- local_image_medium_url: medium version
|
||||
ALTER TABLE dutchie_products ADD COLUMN IF NOT EXISTS local_image_medium_url TEXT;
|
||||
|
||||
-- original_image_url: preserved third-party URL for fallback/reference
|
||||
-- (primary_image_url will be updated to local path when downloaded)
|
||||
ALTER TABLE dutchie_products ADD COLUMN IF NOT EXISTS original_image_url TEXT;
|
||||
|
||||
-- Backfill original_image_url from primary_image_url (preserve third-party URLs)
|
||||
UPDATE dutchie_products
|
||||
SET original_image_url = primary_image_url
|
||||
WHERE original_image_url IS NULL AND primary_image_url IS NOT NULL;
|
||||
|
||||
-- ============================================
|
||||
-- Done
|
||||
-- ============================================
|
||||
SELECT 'Migration 032 completed: menu_type column added, local image columns added' as status;
|
||||
63
backend/migrations/033_add_platform_id_to_crawl_status.sql
Normal file
63
backend/migrations/033_add_platform_id_to_crawl_status.sql
Normal file
@@ -0,0 +1,63 @@
|
||||
-- Migration 033: Add platform_dispensary_id to dispensary_crawl_status view
|
||||
-- This exposes platform ID status for scheduling transparency
|
||||
-- Works with both local (interval_minutes) and K8s (cron_expression) schema variants
|
||||
|
||||
-- Recreate the dispensary_crawl_status view with platform_dispensary_id
|
||||
DROP VIEW IF EXISTS public.dispensary_crawl_status CASCADE;
|
||||
CREATE OR REPLACE VIEW public.dispensary_crawl_status AS
|
||||
SELECT
|
||||
d.id AS dispensary_id,
|
||||
COALESCE(d.dba_name, d.name) AS dispensary_name,
|
||||
d.slug AS dispensary_slug,
|
||||
d.city,
|
||||
d.state,
|
||||
d.menu_url,
|
||||
d.menu_type,
|
||||
d.platform_dispensary_id,
|
||||
d.scrape_enabled,
|
||||
d.last_crawl_at,
|
||||
d.crawl_status,
|
||||
d.product_crawler_mode,
|
||||
d.product_provider,
|
||||
cs.interval_minutes,
|
||||
cs.is_active,
|
||||
cs.priority,
|
||||
cs.last_run_at,
|
||||
cs.next_run_at,
|
||||
cs.last_status AS schedule_last_status,
|
||||
cs.last_error AS schedule_last_error,
|
||||
cs.consecutive_failures,
|
||||
j.id AS latest_job_id,
|
||||
j.status AS latest_job_status,
|
||||
j.job_type AS latest_job_type,
|
||||
j.started_at AS latest_job_started,
|
||||
j.completed_at AS latest_job_completed,
|
||||
j.products_found AS latest_products_found,
|
||||
j.products_new AS latest_products_created,
|
||||
j.products_updated AS latest_products_updated,
|
||||
j.error_message AS latest_job_error,
|
||||
-- Computed scheduling eligibility
|
||||
CASE
|
||||
WHEN d.menu_type = 'dutchie' AND d.platform_dispensary_id IS NOT NULL THEN true
|
||||
ELSE false
|
||||
END AS can_crawl,
|
||||
CASE
|
||||
WHEN d.menu_type IS NULL OR d.menu_type = 'unknown' THEN 'menu_type not detected'
|
||||
WHEN d.menu_type != 'dutchie' THEN 'not dutchie platform'
|
||||
WHEN d.platform_dispensary_id IS NULL THEN 'platform ID not resolved'
|
||||
WHEN d.scrape_enabled = false THEN 'scraping disabled'
|
||||
ELSE 'ready'
|
||||
END AS schedule_status_reason
|
||||
FROM public.dispensaries d
|
||||
LEFT JOIN public.dispensary_crawl_schedule cs ON cs.dispensary_id = d.id
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT *
|
||||
FROM public.dispensary_crawl_jobs dj
|
||||
WHERE dj.dispensary_id = d.id
|
||||
ORDER BY dj.created_at DESC
|
||||
LIMIT 1
|
||||
) j ON true
|
||||
WHERE d.state = 'AZ';
|
||||
|
||||
-- Done!
|
||||
SELECT 'Migration 033 completed successfully' as status;
|
||||
Reference in New Issue
Block a user