#!/usr/bin/env npx tsx "use strict"; /** * Backfill Store-Dispensary Mapping * * Links existing stores (scheduler) to dispensaries (master AZDHS directory) * by matching on name, city, and zip code. * * Usage: * npx tsx src/scripts/backfill-store-dispensary.ts # Preview matches * npx tsx src/scripts/backfill-store-dispensary.ts --apply # Apply matches * npx tsx src/scripts/backfill-store-dispensary.ts --verbose # Show all match details */ Object.defineProperty(exports, "__esModule", { value: true }); const migrate_1 = require("../db/migrate"); const logger_1 = require("../services/logger"); const args = process.argv.slice(2); const flags = { apply: args.includes('--apply'), verbose: args.includes('--verbose'), help: args.includes('--help') || args.includes('-h'), }; /** * Normalize a store/dispensary name for comparison * Removes common suffixes, punctuation, and extra whitespace */ function normalizeName(name) { return name .toLowerCase() .replace(/\s*[-–—]\s*/g, ' ') // Normalize dashes to spaces .replace(/\s*(dispensary|cannabis|marijuana|weed|shop|store|llc|inc)\s*/gi, ' ') .replace(/['']/g, "'") // Normalize apostrophes .replace(/[^\w\s']/g, '') // Remove other punctuation .replace(/\s+/g, ' ') // Collapse whitespace .trim(); } /** * Simple Levenshtein distance for fuzzy matching */ function levenshteinDistance(a, b) { const matrix = []; for (let i = 0; i <= b.length; i++) { matrix[i] = [i]; } for (let j = 0; j <= a.length; j++) { matrix[0][j] = j; } for (let i = 1; i <= b.length; i++) { for (let j = 1; j <= a.length; j++) { if (b.charAt(i - 1) === a.charAt(j - 1)) { matrix[i][j] = matrix[i - 1][j - 1]; } else { matrix[i][j] = Math.min(matrix[i - 1][j - 1] + 1, // substitution matrix[i][j - 1] + 1, // insertion matrix[i - 1][j] + 1 // deletion ); } } } return matrix[b.length][a.length]; } /** * Calculate similarity score (0-100) */ function similarityScore(a, b) { const maxLen = Math.max(a.length, b.length); if (maxLen === 0) return 100; const distance = levenshteinDistance(a, b); return Math.round((1 - distance / maxLen) * 100); } /** * Find the best dispensary match for a store */ function findBestMatch(store, dispensaries) { const normalizedStoreName = normalizeName(store.name); const storeSlug = store.slug.toLowerCase(); let bestMatch = { store, dispensary: null, matchType: 'none', score: 0, }; for (const disp of dispensaries) { const normalizedDispName = normalizeName(disp.name); const normalizedCompanyName = disp.company_name ? normalizeName(disp.company_name) : ''; const dispSlug = disp.slug.toLowerCase(); // 1. Exact name match (case-insensitive) if (store.name.toLowerCase() === disp.name.toLowerCase()) { return { store, dispensary: disp, matchType: 'exact_name', score: 100, }; } // 2. Normalized name match if (normalizedStoreName === normalizedDispName) { return { store, dispensary: disp, matchType: 'normalized_name', score: 95, }; } // 3. Store name matches company name if (normalizedCompanyName && normalizedStoreName === normalizedCompanyName) { return { store, dispensary: disp, matchType: 'company_name', score: 90, }; } // 4. Slug match if (storeSlug === dispSlug) { return { store, dispensary: disp, matchType: 'slug', score: 85, }; } // 5. Fuzzy matching (only if score > 70) const nameScore = similarityScore(normalizedStoreName, normalizedDispName); const companyScore = normalizedCompanyName ? similarityScore(normalizedStoreName, normalizedCompanyName) : 0; const fuzzyScore = Math.max(nameScore, companyScore); if (fuzzyScore > bestMatch.score && fuzzyScore >= 70) { bestMatch = { store, dispensary: disp, matchType: 'fuzzy', score: fuzzyScore, }; } } return bestMatch; } async function main() { if (flags.help) { console.log(` Backfill Store-Dispensary Mapping Links existing stores (scheduler) to dispensaries (master AZDHS directory) by matching on name, company name, or slug similarity. USAGE: npx tsx src/scripts/backfill-store-dispensary.ts [OPTIONS] OPTIONS: --apply Apply the mappings to the database (default: preview only) --verbose Show detailed match information for all stores --help, -h Show this help message EXAMPLES: # Preview what would be matched npx tsx src/scripts/backfill-store-dispensary.ts # Apply the mappings npx tsx src/scripts/backfill-store-dispensary.ts --apply # Show verbose output npx tsx src/scripts/backfill-store-dispensary.ts --verbose `); process.exit(0); } console.log('\nšŸ“¦ Backfill Store-Dispensary Mapping'); console.log('=====================================\n'); try { // Fetch all stores without a dispensary_id const storesResult = await migrate_1.pool.query(` SELECT id, name, slug, dispensary_id FROM stores WHERE dispensary_id IS NULL ORDER BY name `); const unmappedStores = storesResult.rows; // Fetch all already-mapped stores for context const mappedResult = await migrate_1.pool.query(` SELECT id, name, slug, dispensary_id FROM stores WHERE dispensary_id IS NOT NULL ORDER BY name `); const mappedStores = mappedResult.rows; // Fetch all dispensaries const dispResult = await migrate_1.pool.query(` SELECT id, name, company_name, city, address, slug FROM dispensaries ORDER BY name `); const dispensaries = dispResult.rows; console.log(`šŸ“Š Current Status:`); console.log(` Stores without dispensary_id: ${unmappedStores.length}`); console.log(` Stores already mapped: ${mappedStores.length}`); console.log(` Total dispensaries: ${dispensaries.length}\n`); if (unmappedStores.length === 0) { console.log('āœ… All stores are already mapped to dispensaries!\n'); await migrate_1.pool.end(); process.exit(0); } // Find matches for each unmapped store const matches = []; const noMatches = []; for (const store of unmappedStores) { const match = findBestMatch(store, dispensaries); if (match.dispensary) { matches.push(match); } else { noMatches.push(store); } } // Sort matches by score (highest first) matches.sort((a, b) => b.score - a.score); // Display results console.log(`\nšŸ”— Matches Found: ${matches.length}`); console.log('----------------------------------\n'); if (matches.length > 0) { // Group by match type const byType = {}; for (const m of matches) { if (!byType[m.matchType]) byType[m.matchType] = []; byType[m.matchType].push(m); } const typeLabels = { exact_name: 'āœ… Exact Name Match', normalized_name: 'āœ… Normalized Name Match', company_name: 'šŸ¢ Company Name Match', slug: 'šŸ”— Slug Match', fuzzy: 'šŸ” Fuzzy Match', }; for (const [type, results] of Object.entries(byType)) { console.log(`${typeLabels[type]} (${results.length}):`); for (const r of results) { const dispInfo = r.dispensary; console.log(` • "${r.store.name}" → "${dispInfo.name}" (${dispInfo.city}) [${r.score}%]`); } console.log(''); } } if (noMatches.length > 0) { console.log(`\nāŒ No Match Found: ${noMatches.length}`); console.log('----------------------------------\n'); for (const store of noMatches) { console.log(` • "${store.name}" (slug: ${store.slug})`); } console.log(''); } // Apply if requested if (flags.apply && matches.length > 0) { console.log('\nšŸ”§ Applying mappings...\n'); let updated = 0; for (const match of matches) { if (!match.dispensary) continue; await migrate_1.pool.query('UPDATE stores SET dispensary_id = $1 WHERE id = $2', [match.dispensary.id, match.store.id]); updated++; if (flags.verbose) { console.log(` āœ“ Linked store ${match.store.id} to dispensary ${match.dispensary.id}`); } } console.log(`\nāœ… Updated ${updated} stores with dispensary mappings\n`); logger_1.logger.info('system', `Backfill complete: linked ${updated} stores to dispensaries`); } else if (matches.length > 0 && !flags.apply) { console.log('\nšŸ’” Run with --apply to update the database\n'); } // Summary console.log('šŸ“ˆ Summary:'); console.log(` Would match: ${matches.length} stores`); console.log(` No match: ${noMatches.length} stores`); console.log(` Match rate: ${Math.round((matches.length / unmappedStores.length) * 100)}%\n`); } catch (error) { console.error('Error:', error); process.exit(1); } finally { await migrate_1.pool.end(); } } main().catch(console.error);