Remove domain-based shortcuts for Curaleaf/Sol detection
Menu detection now always crawls websites to find actual embedded menu providers instead of marking stores as proprietary based on domain alone. This fixes detection for stores like Curaleaf that may use Dutchie embeds. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -29,8 +29,6 @@ const DISPENSARY_COLUMNS = `
|
||||
|
||||
export type MenuProvider =
|
||||
| 'dutchie'
|
||||
| 'curaleaf' // Curaleaf proprietary platform (not crawlable via Dutchie)
|
||||
| 'sol' // Sol Flower proprietary platform (not crawlable via Dutchie)
|
||||
| 'treez'
|
||||
| 'jane'
|
||||
| 'iheartjane'
|
||||
@@ -68,22 +66,7 @@ export interface BulkDetectionResult {
|
||||
// ============================================================
|
||||
|
||||
const PROVIDER_URL_PATTERNS: Array<{ provider: MenuProvider; patterns: RegExp[] }> = [
|
||||
// IMPORTANT: Curaleaf and Sol must come BEFORE dutchie to take precedence
|
||||
// These stores have their own proprietary menu systems (not crawlable via Dutchie)
|
||||
{
|
||||
provider: 'curaleaf',
|
||||
patterns: [
|
||||
/curaleaf\.com\/stores\//i, // e.g., https://curaleaf.com/stores/curaleaf-az-glendale-east
|
||||
/curaleaf\.com\/dispensary\//i, // e.g., https://curaleaf.com/dispensary/arizona
|
||||
],
|
||||
},
|
||||
{
|
||||
provider: 'sol',
|
||||
patterns: [
|
||||
/livewithsol\.com/i, // e.g., https://www.livewithsol.com/locations/sun-city/
|
||||
/solflower\.com/i, // alternate domain if any
|
||||
],
|
||||
},
|
||||
// We detect provider based on the actual menu link we find, not just the site domain.
|
||||
{
|
||||
provider: 'dutchie',
|
||||
patterns: [
|
||||
@@ -199,19 +182,6 @@ function isCuraleafUrl(url: string | null | undefined): boolean {
|
||||
return /curaleaf\.com\/(stores|dispensary)\//i.test(url);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the Curaleaf store URL from a website URL
|
||||
* Handles both /stores/ and /dispensary/ formats
|
||||
*/
|
||||
function extractCuraleafStoreUrl(url: string): string | null {
|
||||
if (!url) return null;
|
||||
// If it's already a Curaleaf stores/dispensary URL, use it
|
||||
if (isCuraleafUrl(url)) {
|
||||
return url;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch a page and extract all links
|
||||
*/
|
||||
@@ -653,30 +623,29 @@ export async function detectAndResolveDispensary(dispensaryId: number): Promise<
|
||||
return result;
|
||||
}
|
||||
} else {
|
||||
// Website crawl didn't find any menu provider - mark as proprietary
|
||||
const notCrawlableReason = `${detectedProvider} proprietary menu - no embedded menu provider found`;
|
||||
console.log(`[MenuDetection] ${dispensary.name}: Website crawl found no menu provider - marking as ${detectedProvider}`);
|
||||
// Website crawl didn't find any menu provider - mark unknown with reason
|
||||
const notCrawlableReason = `No embedded menu provider found`;
|
||||
console.log(`[MenuDetection] ${dispensary.name}: Website crawl found no menu provider - marking as unknown`);
|
||||
|
||||
await query(
|
||||
`
|
||||
UPDATE dispensaries SET
|
||||
menu_type = $1,
|
||||
menu_type = 'unknown',
|
||||
platform_dispensary_id = NULL,
|
||||
provider_detection_data = COALESCE(provider_detection_data, '{}'::jsonb) ||
|
||||
jsonb_build_object(
|
||||
'detected_provider', $1::text,
|
||||
'detected_provider', 'unknown'::text,
|
||||
'detection_method', 'url_pattern_with_crawl'::text,
|
||||
'detected_at', NOW(),
|
||||
'website_crawled', $2::text,
|
||||
'website_crawl_pages', $3::jsonb,
|
||||
'website_crawled', $1::text,
|
||||
'website_crawl_pages', $2::jsonb,
|
||||
'not_crawlable', true,
|
||||
'not_crawlable_reason', $4::text
|
||||
'not_crawlable_reason', $3::text
|
||||
),
|
||||
updated_at = NOW()
|
||||
WHERE id = $5
|
||||
WHERE id = $4
|
||||
`,
|
||||
[
|
||||
detectedProvider,
|
||||
website,
|
||||
JSON.stringify(crawlResult.crawledPages),
|
||||
notCrawlableReason,
|
||||
@@ -688,7 +657,7 @@ export async function detectAndResolveDispensary(dispensaryId: number): Promise<
|
||||
}
|
||||
}
|
||||
|
||||
// If not dutchie and not a proprietary domain we need to crawl, just update menu_type
|
||||
// If not dutchie, just update menu_type (non-dutchie providers)
|
||||
if (detectedProvider !== 'dutchie') {
|
||||
await query(
|
||||
`
|
||||
|
||||
Reference in New Issue
Block a user