"use strict"; /** * Product Normalizer Utility * * Functions for normalizing product data to enable consistent matching * and prevent duplicate product entries. */ Object.defineProperty(exports, "__esModule", { value: true }); exports.normalizeProductName = normalizeProductName; exports.normalizeBrandName = normalizeBrandName; exports.normalizeWeight = normalizeWeight; exports.generateProductFingerprint = generateProductFingerprint; exports.stringSimilarity = stringSimilarity; exports.areProductsSimilar = areProductsSimilar; /** * Normalize product name for matching * - Lowercase * - Remove punctuation * - Remove THC/CBD percentages often appended to names * - Remove weight suffixes * - Remove emoji * - Normalize whitespace */ function normalizeProductName(name) { if (!name) return ''; return name .toLowerCase() .trim() // Remove special characters except alphanumeric and spaces .replace(/[^\w\s]/g, ' ') // Remove common suffixes like THC/CBD percentages appended to names .replace(/\s*(thc|cbd|cbg|cbn|tac)\s*[:=]?\s*[\d.]+\s*%?/gi, '') // Remove weight/size suffixes often appended .replace(/\s*\d+(\.\d+)?\s*(mg|g|oz|ml|gram|grams|ounce|ounces)\b/gi, '') // Remove emoji .replace(/[\u{1F300}-\u{1F9FF}]/gu, '') // Remove "special offer" type suffixes .replace(/\s*special\s*offer\s*/gi, '') // Normalize multiple spaces to single space .replace(/\s+/g, ' ') .trim(); } /** * Normalize brand name for matching */ function normalizeBrandName(brand) { if (!brand) return ''; return brand .toLowerCase() .trim() // Remove special characters .replace(/[^\w\s]/g, ' ') // Normalize whitespace .replace(/\s+/g, ' ') .trim(); } /** * Normalize weight string to standard format * e.g., "3.5 grams" -> "3.5g", "1/8 oz" -> "3.5g" */ function normalizeWeight(weight) { if (!weight) return ''; const w = weight.toLowerCase().trim(); // Handle fractional ounces if (w.includes('1/8') || w.includes('eighth')) { return '3.5g'; } if (w.includes('1/4') || w.includes('quarter')) { return '7g'; } if (w.includes('1/2') || w.includes('half')) { return '14g'; } if (w.includes('1 oz') || w === 'oz' || w === '1oz') { return '28g'; } // Extract numeric value and unit const match = w.match(/([\d.]+)\s*(mg|g|oz|ml|gram|grams?|ounce|ounces?)?/i); if (!match) return w; const value = parseFloat(match[1]); let unit = (match[2] || 'g').toLowerCase(); // Normalize unit names unit = unit.replace(/gram(s)?/, 'g').replace(/ounce(s)?/, 'oz'); // Convert oz to grams for consistency if (unit === 'oz') { return `${(value * 28).toFixed(1)}g`; } return `${value}${unit}`; } /** * Generate a matching fingerprint for a product * Used for deduplication */ function generateProductFingerprint(name, brand, weight, categoryId) { const parts = [ normalizeProductName(name), normalizeBrandName(brand), normalizeWeight(weight), categoryId?.toString() || '' ]; return parts.filter(Boolean).join('|'); } /** * Calculate similarity between two strings (0-100) * Uses Levenshtein distance */ function stringSimilarity(str1, str2) { if (str1 === str2) return 100; if (!str1 || !str2) return 0; const s1 = str1.toLowerCase(); const s2 = str2.toLowerCase(); if (s1 === s2) return 100; const longer = s1.length > s2.length ? s1 : s2; const shorter = s1.length > s2.length ? s2 : s1; const longerLength = longer.length; if (longerLength === 0) return 100; const distance = levenshteinDistance(longer, shorter); return Math.round(((longerLength - distance) / longerLength) * 100); } /** * Levenshtein distance between two strings */ function levenshteinDistance(str1, str2) { const m = str1.length; const n = str2.length; // Create distance matrix const dp = Array(m + 1).fill(null).map(() => Array(n + 1).fill(0)); // Initialize first row and column for (let i = 0; i <= m; i++) dp[i][0] = i; for (let j = 0; j <= n; j++) dp[0][j] = j; // Fill in the rest for (let i = 1; i <= m; i++) { for (let j = 1; j <= n; j++) { const cost = str1[i - 1] === str2[j - 1] ? 0 : 1; dp[i][j] = Math.min(dp[i - 1][j] + 1, // deletion dp[i][j - 1] + 1, // insertion dp[i - 1][j - 1] + cost // substitution ); } } return dp[m][n]; } /** * Check if two products are likely the same * Returns confidence score (0-100) */ function areProductsSimilar(product1, product2, threshold = 92) { const name1 = normalizeProductName(product1.name); const name2 = normalizeProductName(product2.name); const nameSimilarity = stringSimilarity(name1, name2); // If names are very similar, likely same product if (nameSimilarity >= threshold) { return { isSimilar: true, confidence: nameSimilarity }; } // Check brand match for additional confidence const brand1 = normalizeBrandName(product1.brand); const brand2 = normalizeBrandName(product2.brand); if (brand1 && brand2 && brand1 === brand2) { // Same brand, lower threshold for name match if (nameSimilarity >= threshold - 10) { return { isSimilar: true, confidence: nameSimilarity + 5 }; } } // Check weight match const weight1 = normalizeWeight(product1.weight); const weight2 = normalizeWeight(product2.weight); if (weight1 && weight2 && weight1 === weight2 && nameSimilarity >= threshold - 15) { return { isSimilar: true, confidence: nameSimilarity + 3 }; } return { isSimilar: false, confidence: nameSimilarity }; }