The job_run_logs table tracks scheduled job orchestration, not individual worker jobs. Worker info (worker_id, worker_hostname) belongs on dispensary_crawl_jobs, not job_run_logs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
91 lines
2.5 KiB
JavaScript
91 lines
2.5 KiB
JavaScript
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
const INPUT = path.join(process.cwd(), 'scrape-output', 'deeply-rooted', 'inventory-by-brand.json');
|
|
const OUTPUT = path.join(process.cwd(), 'scrape-output', 'deeply-rooted', 'cleaned-inventory.json');
|
|
|
|
function extractPrice(text, fallback) {
|
|
const prices = Array.from(text.matchAll(/\$([0-9]+(?:\.[0-9]{2})?)/g)).map((m) => parseFloat(m[1]));
|
|
if (prices.length > 0) {
|
|
return Math.min(...prices);
|
|
}
|
|
return fallback;
|
|
}
|
|
|
|
function cleanBrandAndName(rawName, rawBrand) {
|
|
const parts = rawName.split('…').map((p) => p.trim()).filter(Boolean);
|
|
const name = parts[0] || rawName.trim();
|
|
const inferredBrand = parts[1]?.replace(/[^a-z0-9\s\-\&']/gi, ' ').replace(/\s+/g, ' ').trim();
|
|
const brand = normalizeBrand((rawBrand || inferredBrand || 'Unknown').trim());
|
|
return { name, brand };
|
|
}
|
|
|
|
function cleanProduct(p) {
|
|
const { name, brand } = cleanBrandAndName(p.name, p.brand);
|
|
const price = extractPrice(p.name, p.price);
|
|
return {
|
|
name,
|
|
brand: brand || 'Unknown',
|
|
price,
|
|
size: p.size,
|
|
category: p.category,
|
|
url: p.url,
|
|
imageUrl: p.imageUrl,
|
|
inStock: p.inStock !== false,
|
|
};
|
|
}
|
|
|
|
function dedupe(products) {
|
|
const seen = new Map();
|
|
for (const p of products) {
|
|
const key = (p.url || `${p.name.toLowerCase()}|${p.brand.toLowerCase()}`).trim();
|
|
if (!seen.has(key)) {
|
|
seen.set(key, p);
|
|
}
|
|
}
|
|
return Array.from(seen.values());
|
|
}
|
|
|
|
function groupByBrand(products) {
|
|
const map = new Map();
|
|
for (const p of products) {
|
|
const key = p.brand || 'Unknown';
|
|
if (!map.has(key)) map.set(key, []);
|
|
map.get(key).push(p);
|
|
}
|
|
return Array.from(map.entries()).map(([brand, prods]) => ({ brand, products: prods }));
|
|
}
|
|
|
|
function normalizeBrand(brand) {
|
|
const replacements = {
|
|
'Gr n': 'Gron',
|
|
};
|
|
return replacements[brand] || brand;
|
|
}
|
|
|
|
function main() {
|
|
const raw = JSON.parse(fs.readFileSync(INPUT, 'utf8'));
|
|
const flattened = [];
|
|
|
|
for (const group of raw) {
|
|
for (const p of group.products) {
|
|
flattened.push(cleanProduct(p));
|
|
}
|
|
}
|
|
|
|
const unique = dedupe(flattened);
|
|
const grouped = groupByBrand(unique);
|
|
|
|
fs.writeFileSync(OUTPUT, JSON.stringify(grouped, null, 2));
|
|
|
|
const total = unique.length;
|
|
const outOfStock = unique.filter((p) => !p.inStock).length;
|
|
|
|
console.log(`Cleaned products: ${total}`);
|
|
console.log(`Out of stock: ${outOfStock}`);
|
|
console.log(`Brands: ${grouped.length}`);
|
|
console.log(`Saved to ${OUTPUT}`);
|
|
}
|
|
|
|
main();
|