From 33e12de3f219e8a8f3eb7fa36234b960d22783c2 Mon Sep 17 00:00:00 2001 From: Kelly Date: Wed, 3 Dec 2025 21:55:31 -0700 Subject: [PATCH] Remove domain-based shortcuts for Curaleaf/Sol detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Menu detection now always crawls websites to find actual embedded menu providers instead of marking stores as proprietary based on domain alone. This fixes detection for stores like Curaleaf that may use Dutchie embeds. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../src/dutchie-az/services/menu-detection.ts | 53 ++++--------------- 1 file changed, 11 insertions(+), 42 deletions(-) diff --git a/backend/src/dutchie-az/services/menu-detection.ts b/backend/src/dutchie-az/services/menu-detection.ts index 249fdf5f..d7bf01dd 100644 --- a/backend/src/dutchie-az/services/menu-detection.ts +++ b/backend/src/dutchie-az/services/menu-detection.ts @@ -29,8 +29,6 @@ const DISPENSARY_COLUMNS = ` export type MenuProvider = | 'dutchie' - | 'curaleaf' // Curaleaf proprietary platform (not crawlable via Dutchie) - | 'sol' // Sol Flower proprietary platform (not crawlable via Dutchie) | 'treez' | 'jane' | 'iheartjane' @@ -68,22 +66,7 @@ export interface BulkDetectionResult { // ============================================================ const PROVIDER_URL_PATTERNS: Array<{ provider: MenuProvider; patterns: RegExp[] }> = [ - // IMPORTANT: Curaleaf and Sol must come BEFORE dutchie to take precedence - // These stores have their own proprietary menu systems (not crawlable via Dutchie) - { - provider: 'curaleaf', - patterns: [ - /curaleaf\.com\/stores\//i, // e.g., https://curaleaf.com/stores/curaleaf-az-glendale-east - /curaleaf\.com\/dispensary\//i, // e.g., https://curaleaf.com/dispensary/arizona - ], - }, - { - provider: 'sol', - patterns: [ - /livewithsol\.com/i, // e.g., https://www.livewithsol.com/locations/sun-city/ - /solflower\.com/i, // alternate domain if any - ], - }, + // We detect provider based on the actual menu link we find, not just the site domain. { provider: 'dutchie', patterns: [ @@ -199,19 +182,6 @@ function isCuraleafUrl(url: string | null | undefined): boolean { return /curaleaf\.com\/(stores|dispensary)\//i.test(url); } -/** - * Extract the Curaleaf store URL from a website URL - * Handles both /stores/ and /dispensary/ formats - */ -function extractCuraleafStoreUrl(url: string): string | null { - if (!url) return null; - // If it's already a Curaleaf stores/dispensary URL, use it - if (isCuraleafUrl(url)) { - return url; - } - return null; -} - /** * Fetch a page and extract all links */ @@ -653,30 +623,29 @@ export async function detectAndResolveDispensary(dispensaryId: number): Promise< return result; } } else { - // Website crawl didn't find any menu provider - mark as proprietary - const notCrawlableReason = `${detectedProvider} proprietary menu - no embedded menu provider found`; - console.log(`[MenuDetection] ${dispensary.name}: Website crawl found no menu provider - marking as ${detectedProvider}`); + // Website crawl didn't find any menu provider - mark unknown with reason + const notCrawlableReason = `No embedded menu provider found`; + console.log(`[MenuDetection] ${dispensary.name}: Website crawl found no menu provider - marking as unknown`); await query( ` UPDATE dispensaries SET - menu_type = $1, + menu_type = 'unknown', platform_dispensary_id = NULL, provider_detection_data = COALESCE(provider_detection_data, '{}'::jsonb) || jsonb_build_object( - 'detected_provider', $1::text, + 'detected_provider', 'unknown'::text, 'detection_method', 'url_pattern_with_crawl'::text, 'detected_at', NOW(), - 'website_crawled', $2::text, - 'website_crawl_pages', $3::jsonb, + 'website_crawled', $1::text, + 'website_crawl_pages', $2::jsonb, 'not_crawlable', true, - 'not_crawlable_reason', $4::text + 'not_crawlable_reason', $3::text ), updated_at = NOW() - WHERE id = $5 + WHERE id = $4 `, [ - detectedProvider, website, JSON.stringify(crawlResult.crawledPages), notCrawlableReason, @@ -688,7 +657,7 @@ export async function detectAndResolveDispensary(dispensaryId: number): Promise< } } - // If not dutchie and not a proprietary domain we need to crawl, just update menu_type + // If not dutchie, just update menu_type (non-dutchie providers) if (detectedProvider !== 'dutchie') { await query( `