import fs from 'fs/promises'; import path from 'path'; type RawProduct = { name: string; brand?: string; price?: number; size?: string; category?: string; url?: string; imageUrl?: string; inStock?: boolean; }; type BrandGroup = { brand: string; products: CleanProduct[]; }; type CleanProduct = { name: string; brand: string; price?: number; size?: string; category?: string; url?: string; imageUrl?: string; inStock: boolean; }; const INPUT = path.join(process.cwd(), 'scrape-output', 'deeply-rooted', 'inventory-by-brand.json'); const OUTPUT = path.join(process.cwd(), 'scrape-output', 'deeply-rooted', 'cleaned-inventory.json'); function extractPrice(text: string, fallback?: number): number | undefined { const prices = [...text.matchAll(/\$([0-9]+(?:\.[0-9]{2})?)/g)].map((m) => parseFloat(m[1])); if (prices.length > 0) { // Use the lowest price (usually the sale price) return Math.min(...prices); } return fallback; } function cleanBrandAndName(rawName: string, rawBrand?: string): { name: string; brand: string } { const parts = rawName.split('…').map((p) => p.trim()).filter(Boolean); const name = parts[0] || rawName.trim(); const inferredBrand = parts[1]?.replace(/[^a-z0-9\s\-\&']/gi, ' ').replace(/\s+/g, ' ').trim(); const brand = (rawBrand || inferredBrand || 'Unknown').trim(); return { name, brand }; } function cleanProduct(p: RawProduct): CleanProduct { const { name, brand } = cleanBrandAndName(p.name, p.brand); const price = extractPrice(p.name, p.price); return { name, brand: brand || 'Unknown', price, size: p.size, category: p.category, url: p.url, imageUrl: p.imageUrl, inStock: p.inStock !== false, }; } function dedupe(products: CleanProduct[]): CleanProduct[] { const seen = new Map(); for (const p of products) { const key = (p.url || `${p.name.toLowerCase()}|${p.brand.toLowerCase()}`).trim(); if (!seen.has(key)) { seen.set(key, p); } } return Array.from(seen.values()); } function groupByBrand(products: CleanProduct[]): BrandGroup[] { const map = new Map(); for (const p of products) { const key = p.brand || 'Unknown'; if (!map.has(key)) map.set(key, []); map.get(key)!.push(p); } return Array.from(map.entries()).map(([brand, prods]) => ({ brand, products: prods })); } async function main() { const raw = JSON.parse(await fs.readFile(INPUT, 'utf8')) as { brand: string; products: RawProduct[] }[]; const flattened: CleanProduct[] = []; for (const group of raw) { for (const p of group.products) { flattened.push(cleanProduct(p)); } } const unique = dedupe(flattened); const grouped = groupByBrand(unique); await fs.writeFile(OUTPUT, JSON.stringify(grouped, null, 2)); const total = unique.length; const outOfStock = unique.filter((p) => !p.inStock).length; console.log(`Cleaned products: ${total}`); console.log(`Out of stock: ${outOfStock}`); console.log(`Brands: ${grouped.length}`); console.log(`Saved to ${OUTPUT}`); } main().catch((err) => { console.error('Post-process failed:', err); process.exitCode = 1; });