fix(monitor): remove non-existent worker columns from job_run_logs query
The job_run_logs table tracks scheduled job orchestration, not individual worker jobs. Worker info (worker_id, worker_hostname) belongs on dispensary_crawl_jobs, not job_run_logs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
90
backend/new-scrapers/postprocess-deeply-rooted-clean.js
Normal file
90
backend/new-scrapers/postprocess-deeply-rooted-clean.js
Normal file
@@ -0,0 +1,90 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const INPUT = path.join(process.cwd(), 'scrape-output', 'deeply-rooted', 'inventory-by-brand.json');
|
||||
const OUTPUT = path.join(process.cwd(), 'scrape-output', 'deeply-rooted', 'cleaned-inventory.json');
|
||||
|
||||
function extractPrice(text, fallback) {
|
||||
const prices = Array.from(text.matchAll(/\$([0-9]+(?:\.[0-9]{2})?)/g)).map((m) => parseFloat(m[1]));
|
||||
if (prices.length > 0) {
|
||||
return Math.min(...prices);
|
||||
}
|
||||
return fallback;
|
||||
}
|
||||
|
||||
function cleanBrandAndName(rawName, rawBrand) {
|
||||
const parts = rawName.split('…').map((p) => p.trim()).filter(Boolean);
|
||||
const name = parts[0] || rawName.trim();
|
||||
const inferredBrand = parts[1]?.replace(/[^a-z0-9\s\-\&']/gi, ' ').replace(/\s+/g, ' ').trim();
|
||||
const brand = normalizeBrand((rawBrand || inferredBrand || 'Unknown').trim());
|
||||
return { name, brand };
|
||||
}
|
||||
|
||||
function cleanProduct(p) {
|
||||
const { name, brand } = cleanBrandAndName(p.name, p.brand);
|
||||
const price = extractPrice(p.name, p.price);
|
||||
return {
|
||||
name,
|
||||
brand: brand || 'Unknown',
|
||||
price,
|
||||
size: p.size,
|
||||
category: p.category,
|
||||
url: p.url,
|
||||
imageUrl: p.imageUrl,
|
||||
inStock: p.inStock !== false,
|
||||
};
|
||||
}
|
||||
|
||||
function dedupe(products) {
|
||||
const seen = new Map();
|
||||
for (const p of products) {
|
||||
const key = (p.url || `${p.name.toLowerCase()}|${p.brand.toLowerCase()}`).trim();
|
||||
if (!seen.has(key)) {
|
||||
seen.set(key, p);
|
||||
}
|
||||
}
|
||||
return Array.from(seen.values());
|
||||
}
|
||||
|
||||
function groupByBrand(products) {
|
||||
const map = new Map();
|
||||
for (const p of products) {
|
||||
const key = p.brand || 'Unknown';
|
||||
if (!map.has(key)) map.set(key, []);
|
||||
map.get(key).push(p);
|
||||
}
|
||||
return Array.from(map.entries()).map(([brand, prods]) => ({ brand, products: prods }));
|
||||
}
|
||||
|
||||
function normalizeBrand(brand) {
|
||||
const replacements = {
|
||||
'Gr n': 'Gron',
|
||||
};
|
||||
return replacements[brand] || brand;
|
||||
}
|
||||
|
||||
function main() {
|
||||
const raw = JSON.parse(fs.readFileSync(INPUT, 'utf8'));
|
||||
const flattened = [];
|
||||
|
||||
for (const group of raw) {
|
||||
for (const p of group.products) {
|
||||
flattened.push(cleanProduct(p));
|
||||
}
|
||||
}
|
||||
|
||||
const unique = dedupe(flattened);
|
||||
const grouped = groupByBrand(unique);
|
||||
|
||||
fs.writeFileSync(OUTPUT, JSON.stringify(grouped, null, 2));
|
||||
|
||||
const total = unique.length;
|
||||
const outOfStock = unique.filter((p) => !p.inStock).length;
|
||||
|
||||
console.log(`Cleaned products: ${total}`);
|
||||
console.log(`Out of stock: ${outOfStock}`);
|
||||
console.log(`Brands: ${grouped.length}`);
|
||||
console.log(`Saved to ${OUTPUT}`);
|
||||
}
|
||||
|
||||
main();
|
||||
Reference in New Issue
Block a user