diff --git a/backend/src/dutchie-az/services/menu-detection.ts b/backend/src/dutchie-az/services/menu-detection.ts index 249fdf5f..d7bf01dd 100644 --- a/backend/src/dutchie-az/services/menu-detection.ts +++ b/backend/src/dutchie-az/services/menu-detection.ts @@ -29,8 +29,6 @@ const DISPENSARY_COLUMNS = ` export type MenuProvider = | 'dutchie' - | 'curaleaf' // Curaleaf proprietary platform (not crawlable via Dutchie) - | 'sol' // Sol Flower proprietary platform (not crawlable via Dutchie) | 'treez' | 'jane' | 'iheartjane' @@ -68,22 +66,7 @@ export interface BulkDetectionResult { // ============================================================ const PROVIDER_URL_PATTERNS: Array<{ provider: MenuProvider; patterns: RegExp[] }> = [ - // IMPORTANT: Curaleaf and Sol must come BEFORE dutchie to take precedence - // These stores have their own proprietary menu systems (not crawlable via Dutchie) - { - provider: 'curaleaf', - patterns: [ - /curaleaf\.com\/stores\//i, // e.g., https://curaleaf.com/stores/curaleaf-az-glendale-east - /curaleaf\.com\/dispensary\//i, // e.g., https://curaleaf.com/dispensary/arizona - ], - }, - { - provider: 'sol', - patterns: [ - /livewithsol\.com/i, // e.g., https://www.livewithsol.com/locations/sun-city/ - /solflower\.com/i, // alternate domain if any - ], - }, + // We detect provider based on the actual menu link we find, not just the site domain. { provider: 'dutchie', patterns: [ @@ -199,19 +182,6 @@ function isCuraleafUrl(url: string | null | undefined): boolean { return /curaleaf\.com\/(stores|dispensary)\//i.test(url); } -/** - * Extract the Curaleaf store URL from a website URL - * Handles both /stores/ and /dispensary/ formats - */ -function extractCuraleafStoreUrl(url: string): string | null { - if (!url) return null; - // If it's already a Curaleaf stores/dispensary URL, use it - if (isCuraleafUrl(url)) { - return url; - } - return null; -} - /** * Fetch a page and extract all links */ @@ -653,30 +623,29 @@ export async function detectAndResolveDispensary(dispensaryId: number): Promise< return result; } } else { - // Website crawl didn't find any menu provider - mark as proprietary - const notCrawlableReason = `${detectedProvider} proprietary menu - no embedded menu provider found`; - console.log(`[MenuDetection] ${dispensary.name}: Website crawl found no menu provider - marking as ${detectedProvider}`); + // Website crawl didn't find any menu provider - mark unknown with reason + const notCrawlableReason = `No embedded menu provider found`; + console.log(`[MenuDetection] ${dispensary.name}: Website crawl found no menu provider - marking as unknown`); await query( ` UPDATE dispensaries SET - menu_type = $1, + menu_type = 'unknown', platform_dispensary_id = NULL, provider_detection_data = COALESCE(provider_detection_data, '{}'::jsonb) || jsonb_build_object( - 'detected_provider', $1::text, + 'detected_provider', 'unknown'::text, 'detection_method', 'url_pattern_with_crawl'::text, 'detected_at', NOW(), - 'website_crawled', $2::text, - 'website_crawl_pages', $3::jsonb, + 'website_crawled', $1::text, + 'website_crawl_pages', $2::jsonb, 'not_crawlable', true, - 'not_crawlable_reason', $4::text + 'not_crawlable_reason', $3::text ), updated_at = NOW() - WHERE id = $5 + WHERE id = $4 `, [ - detectedProvider, website, JSON.stringify(crawlResult.crawledPages), notCrawlableReason, @@ -688,7 +657,7 @@ export async function detectAndResolveDispensary(dispensaryId: number): Promise< } } - // If not dutchie and not a proprietary domain we need to crawl, just update menu_type + // If not dutchie, just update menu_type (non-dutchie providers) if (detectedProvider !== 'dutchie') { await query( `