const fs = require('fs'); const path = require('path'); const INPUT = path.join(process.cwd(), 'scrape-output', 'deeply-rooted', 'inventory-by-brand.json'); const OUTPUT = path.join(process.cwd(), 'scrape-output', 'deeply-rooted', 'cleaned-inventory.json'); function extractPrice(text, fallback) { const prices = Array.from(text.matchAll(/\$([0-9]+(?:\.[0-9]{2})?)/g)).map((m) => parseFloat(m[1])); if (prices.length > 0) { return Math.min(...prices); } return fallback; } function cleanBrandAndName(rawName, rawBrand) { const parts = rawName.split('…').map((p) => p.trim()).filter(Boolean); const name = parts[0] || rawName.trim(); const inferredBrand = parts[1]?.replace(/[^a-z0-9\s\-\&']/gi, ' ').replace(/\s+/g, ' ').trim(); const brand = normalizeBrand((rawBrand || inferredBrand || 'Unknown').trim()); return { name, brand }; } function cleanProduct(p) { const { name, brand } = cleanBrandAndName(p.name, p.brand); const price = extractPrice(p.name, p.price); return { name, brand: brand || 'Unknown', price, size: p.size, category: p.category, url: p.url, imageUrl: p.imageUrl, inStock: p.inStock !== false, }; } function dedupe(products) { const seen = new Map(); for (const p of products) { const key = (p.url || `${p.name.toLowerCase()}|${p.brand.toLowerCase()}`).trim(); if (!seen.has(key)) { seen.set(key, p); } } return Array.from(seen.values()); } function groupByBrand(products) { const map = new Map(); for (const p of products) { const key = p.brand || 'Unknown'; if (!map.has(key)) map.set(key, []); map.get(key).push(p); } return Array.from(map.entries()).map(([brand, prods]) => ({ brand, products: prods })); } function normalizeBrand(brand) { const replacements = { 'Gr n': 'Gron', }; return replacements[brand] || brand; } function main() { const raw = JSON.parse(fs.readFileSync(INPUT, 'utf8')); const flattened = []; for (const group of raw) { for (const p of group.products) { flattened.push(cleanProduct(p)); } } const unique = dedupe(flattened); const grouped = groupByBrand(unique); fs.writeFileSync(OUTPUT, JSON.stringify(grouped, null, 2)); const total = unique.length; const outOfStock = unique.filter((p) => !p.inStock).length; console.log(`Cleaned products: ${total}`); console.log(`Out of stock: ${outOfStock}`); console.log(`Brands: ${grouped.length}`); console.log(`Saved to ${OUTPUT}`); } main();