feat: Add stale process monitor, users route, landing page, archive old scripts
- Add backend stale process monitoring API (/api/stale-processes) - Add users management route - Add frontend landing page and stale process monitor UI on /scraper-tools - Move old development scripts to backend/archive/ - Update frontend build with new features 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
218
backend/archive/enrich-prices.ts
Normal file
218
backend/archive/enrich-prices.ts
Normal file
@@ -0,0 +1,218 @@
|
||||
import { firefox } from 'playwright';
|
||||
import { pool } from './src/db/migrate.js';
|
||||
import { getRandomProxy } from './src/utils/proxyManager.js';
|
||||
|
||||
const workerNum = process.argv[2] || `P${Date.now().toString().slice(-4)}`;
|
||||
const dispensaryId = parseInt(process.argv[3] || '112', 10);
|
||||
const batchSize = 10; // Process 10 products per batch
|
||||
|
||||
interface Product {
|
||||
id: number;
|
||||
slug: string;
|
||||
name: string;
|
||||
brand: string;
|
||||
dutchie_url: string;
|
||||
}
|
||||
|
||||
async function getProductsNeedingPrices(limit: number): Promise<Product[]> {
|
||||
const result = await pool.query(`
|
||||
SELECT id, slug, name, brand, dutchie_url
|
||||
FROM products
|
||||
WHERE dispensary_id = $1
|
||||
AND regular_price IS NULL
|
||||
AND dutchie_url IS NOT NULL
|
||||
ORDER BY id
|
||||
LIMIT $2
|
||||
`, [dispensaryId, limit]);
|
||||
|
||||
return result.rows;
|
||||
}
|
||||
|
||||
async function extractPriceFromPage(page: any, productUrl: string): Promise<{
|
||||
regularPrice?: number;
|
||||
salePrice?: number;
|
||||
}> {
|
||||
try {
|
||||
console.log(`[${workerNum}] Loading: ${productUrl}`);
|
||||
|
||||
await page.goto(productUrl, {
|
||||
waitUntil: 'domcontentloaded',
|
||||
timeout: 30000
|
||||
});
|
||||
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
// Extract price data from the page
|
||||
const priceData = await page.evaluate(() => {
|
||||
// Try JSON-LD structured data first
|
||||
const scripts = Array.from(document.querySelectorAll('script[type="application/ld+json"]'));
|
||||
|
||||
for (const script of scripts) {
|
||||
try {
|
||||
const data = JSON.parse(script.textContent || '');
|
||||
if (data['@type'] === 'Product' && data.offers) {
|
||||
return {
|
||||
regularPrice: parseFloat(data.offers.price) || undefined,
|
||||
salePrice: undefined
|
||||
};
|
||||
}
|
||||
} catch (e) {
|
||||
// Continue to next script
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: extract from page text
|
||||
const pageText = document.body.textContent || '';
|
||||
|
||||
// Look for price patterns like $30.00, $40.00
|
||||
const priceMatches = pageText.match(/\$(\d+\.?\d*)/g);
|
||||
|
||||
if (priceMatches && priceMatches.length > 0) {
|
||||
const prices = priceMatches.map(p => parseFloat(p.replace('$', '')));
|
||||
|
||||
// If we find multiple prices, assume first is sale, second is regular
|
||||
if (prices.length >= 2) {
|
||||
return {
|
||||
salePrice: Math.min(prices[0], prices[1]),
|
||||
regularPrice: Math.max(prices[0], prices[1])
|
||||
};
|
||||
} else if (prices.length === 1) {
|
||||
return {
|
||||
regularPrice: prices[0],
|
||||
salePrice: undefined
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return { regularPrice: undefined, salePrice: undefined };
|
||||
});
|
||||
|
||||
return priceData;
|
||||
|
||||
} catch (error: any) {
|
||||
console.log(`[${workerNum}] ⚠️ Error loading page: ${error.message}`);
|
||||
return { regularPrice: undefined, salePrice: undefined };
|
||||
}
|
||||
}
|
||||
|
||||
async function updateProductPrice(
|
||||
productId: number,
|
||||
regularPrice?: number,
|
||||
salePrice?: number
|
||||
): Promise<void> {
|
||||
await pool.query(`
|
||||
UPDATE products
|
||||
SET regular_price = $1,
|
||||
sale_price = $2,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
WHERE id = $3
|
||||
`, [regularPrice || null, salePrice || null, productId]);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log(`\n${'='.repeat(70)}`);
|
||||
console.log(`💰 PRICE ENRICHMENT WORKER - ${workerNum}`);
|
||||
console.log(` Dispensary ID: ${dispensaryId}`);
|
||||
console.log(` Batch Size: ${batchSize} products`);
|
||||
console.log(`${'='.repeat(70)}\n`);
|
||||
|
||||
// Get dispensary info
|
||||
const dispensaryResult = await pool.query(
|
||||
"SELECT id, name, menu_url FROM dispensaries WHERE id = $1",
|
||||
[dispensaryId]
|
||||
);
|
||||
|
||||
if (dispensaryResult.rows.length === 0) {
|
||||
console.error(`[${workerNum}] ❌ Dispensary ID ${dispensaryId} not found`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`[${workerNum}] ✅ Dispensary: ${dispensaryResult.rows[0].name}\n`);
|
||||
|
||||
// Get proxy
|
||||
const proxy = await getRandomProxy();
|
||||
if (!proxy) {
|
||||
console.log(`[${workerNum}] ❌ No proxy available`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`[${workerNum}] 🔐 Using proxy: ${proxy.server}\n`);
|
||||
|
||||
// Launch browser
|
||||
const browser = await firefox.launch({ headless: true });
|
||||
|
||||
const context = await browser.newContext({
|
||||
viewport: { width: 1920, height: 1080 },
|
||||
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
||||
proxy: {
|
||||
server: proxy.server,
|
||||
username: proxy.username,
|
||||
password: proxy.password
|
||||
}
|
||||
});
|
||||
|
||||
const page = await context.newPage();
|
||||
|
||||
let totalProcessed = 0;
|
||||
let totalWithPrices = 0;
|
||||
let totalNoPrices = 0;
|
||||
let batchNum = 0;
|
||||
|
||||
// Keep processing batches
|
||||
while (true) {
|
||||
const products = await getProductsNeedingPrices(batchSize);
|
||||
|
||||
if (products.length === 0) {
|
||||
console.log(`[${workerNum}] ℹ️ No more products need price enrichment`);
|
||||
break;
|
||||
}
|
||||
|
||||
batchNum++;
|
||||
console.log(`[${workerNum}] ${'─'.repeat(70)}`);
|
||||
console.log(`[${workerNum}] 📦 BATCH #${batchNum}: Processing ${products.length} products`);
|
||||
console.log(`[${workerNum}] ${'─'.repeat(70)}\n`);
|
||||
|
||||
for (let i = 0; i < products.length; i++) {
|
||||
const product = products[i];
|
||||
|
||||
console.log(`[${workerNum}] [${i + 1}/${products.length}] ${product.brand} - ${product.name.substring(0, 40)}`);
|
||||
|
||||
const { regularPrice, salePrice } = await extractPriceFromPage(page, product.dutchie_url);
|
||||
|
||||
await updateProductPrice(product.id, regularPrice, salePrice);
|
||||
|
||||
totalProcessed++;
|
||||
|
||||
if (regularPrice || salePrice) {
|
||||
totalWithPrices++;
|
||||
const priceStr = salePrice
|
||||
? `Sale: $${salePrice.toFixed(2)} (Reg: $${regularPrice?.toFixed(2) || 'N/A'})`
|
||||
: `Price: $${regularPrice?.toFixed(2)}`;
|
||||
console.log(`[${workerNum}] ✅ ${priceStr}`);
|
||||
} else {
|
||||
totalNoPrices++;
|
||||
console.log(`[${workerNum}] ⚠️ No price found`);
|
||||
}
|
||||
|
||||
// Small delay between products
|
||||
await page.waitForTimeout(500);
|
||||
}
|
||||
|
||||
console.log(`\n[${workerNum}] ✅ Batch #${batchNum} complete\n`);
|
||||
|
||||
// Delay between batches
|
||||
await page.waitForTimeout(2000);
|
||||
}
|
||||
|
||||
console.log(`\n[${workerNum}] ${'='.repeat(70)}`);
|
||||
console.log(`[${workerNum}] ✅ PRICE ENRICHMENT COMPLETE`);
|
||||
console.log(`[${workerNum}] Products processed: ${totalProcessed}`);
|
||||
console.log(`[${workerNum}] Products with prices: ${totalWithPrices}`);
|
||||
console.log(`[${workerNum}] Products without prices: ${totalNoPrices}`);
|
||||
console.log(`[${workerNum}] ${'='.repeat(70)}\n`);
|
||||
|
||||
await browser.close();
|
||||
await pool.end();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
Reference in New Issue
Block a user