feat: Add stale process monitor, users route, landing page, archive old scripts

- Add backend stale process monitoring API (/api/stale-processes)
- Add users management route
- Add frontend landing page and stale process monitor UI on /scraper-tools
- Move old development scripts to backend/archive/
- Update frontend build with new features

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-05 04:07:31 -07:00
parent d2d44d2aeb
commit d91c55a344
3115 changed files with 5755 additions and 719 deletions

View File

@@ -0,0 +1,218 @@
import { firefox } from 'playwright';
import { pool } from './src/db/migrate.js';
import { getRandomProxy } from './src/utils/proxyManager.js';
const workerNum = process.argv[2] || `P${Date.now().toString().slice(-4)}`;
const dispensaryId = parseInt(process.argv[3] || '112', 10);
const batchSize = 10; // Process 10 products per batch
interface Product {
id: number;
slug: string;
name: string;
brand: string;
dutchie_url: string;
}
async function getProductsNeedingPrices(limit: number): Promise<Product[]> {
const result = await pool.query(`
SELECT id, slug, name, brand, dutchie_url
FROM products
WHERE dispensary_id = $1
AND regular_price IS NULL
AND dutchie_url IS NOT NULL
ORDER BY id
LIMIT $2
`, [dispensaryId, limit]);
return result.rows;
}
async function extractPriceFromPage(page: any, productUrl: string): Promise<{
regularPrice?: number;
salePrice?: number;
}> {
try {
console.log(`[${workerNum}] Loading: ${productUrl}`);
await page.goto(productUrl, {
waitUntil: 'domcontentloaded',
timeout: 30000
});
await page.waitForTimeout(2000);
// Extract price data from the page
const priceData = await page.evaluate(() => {
// Try JSON-LD structured data first
const scripts = Array.from(document.querySelectorAll('script[type="application/ld+json"]'));
for (const script of scripts) {
try {
const data = JSON.parse(script.textContent || '');
if (data['@type'] === 'Product' && data.offers) {
return {
regularPrice: parseFloat(data.offers.price) || undefined,
salePrice: undefined
};
}
} catch (e) {
// Continue to next script
}
}
// Fallback: extract from page text
const pageText = document.body.textContent || '';
// Look for price patterns like $30.00, $40.00
const priceMatches = pageText.match(/\$(\d+\.?\d*)/g);
if (priceMatches && priceMatches.length > 0) {
const prices = priceMatches.map(p => parseFloat(p.replace('$', '')));
// If we find multiple prices, assume first is sale, second is regular
if (prices.length >= 2) {
return {
salePrice: Math.min(prices[0], prices[1]),
regularPrice: Math.max(prices[0], prices[1])
};
} else if (prices.length === 1) {
return {
regularPrice: prices[0],
salePrice: undefined
};
}
}
return { regularPrice: undefined, salePrice: undefined };
});
return priceData;
} catch (error: any) {
console.log(`[${workerNum}] ⚠️ Error loading page: ${error.message}`);
return { regularPrice: undefined, salePrice: undefined };
}
}
async function updateProductPrice(
productId: number,
regularPrice?: number,
salePrice?: number
): Promise<void> {
await pool.query(`
UPDATE products
SET regular_price = $1,
sale_price = $2,
updated_at = CURRENT_TIMESTAMP
WHERE id = $3
`, [regularPrice || null, salePrice || null, productId]);
}
async function main() {
console.log(`\n${'='.repeat(70)}`);
console.log(`💰 PRICE ENRICHMENT WORKER - ${workerNum}`);
console.log(` Dispensary ID: ${dispensaryId}`);
console.log(` Batch Size: ${batchSize} products`);
console.log(`${'='.repeat(70)}\n`);
// Get dispensary info
const dispensaryResult = await pool.query(
"SELECT id, name, menu_url FROM dispensaries WHERE id = $1",
[dispensaryId]
);
if (dispensaryResult.rows.length === 0) {
console.error(`[${workerNum}] ❌ Dispensary ID ${dispensaryId} not found`);
process.exit(1);
}
console.log(`[${workerNum}] ✅ Dispensary: ${dispensaryResult.rows[0].name}\n`);
// Get proxy
const proxy = await getRandomProxy();
if (!proxy) {
console.log(`[${workerNum}] ❌ No proxy available`);
process.exit(1);
}
console.log(`[${workerNum}] 🔐 Using proxy: ${proxy.server}\n`);
// Launch browser
const browser = await firefox.launch({ headless: true });
const context = await browser.newContext({
viewport: { width: 1920, height: 1080 },
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
proxy: {
server: proxy.server,
username: proxy.username,
password: proxy.password
}
});
const page = await context.newPage();
let totalProcessed = 0;
let totalWithPrices = 0;
let totalNoPrices = 0;
let batchNum = 0;
// Keep processing batches
while (true) {
const products = await getProductsNeedingPrices(batchSize);
if (products.length === 0) {
console.log(`[${workerNum}] No more products need price enrichment`);
break;
}
batchNum++;
console.log(`[${workerNum}] ${'─'.repeat(70)}`);
console.log(`[${workerNum}] 📦 BATCH #${batchNum}: Processing ${products.length} products`);
console.log(`[${workerNum}] ${'─'.repeat(70)}\n`);
for (let i = 0; i < products.length; i++) {
const product = products[i];
console.log(`[${workerNum}] [${i + 1}/${products.length}] ${product.brand} - ${product.name.substring(0, 40)}`);
const { regularPrice, salePrice } = await extractPriceFromPage(page, product.dutchie_url);
await updateProductPrice(product.id, regularPrice, salePrice);
totalProcessed++;
if (regularPrice || salePrice) {
totalWithPrices++;
const priceStr = salePrice
? `Sale: $${salePrice.toFixed(2)} (Reg: $${regularPrice?.toFixed(2) || 'N/A'})`
: `Price: $${regularPrice?.toFixed(2)}`;
console.log(`[${workerNum}] ✅ ${priceStr}`);
} else {
totalNoPrices++;
console.log(`[${workerNum}] ⚠️ No price found`);
}
// Small delay between products
await page.waitForTimeout(500);
}
console.log(`\n[${workerNum}] ✅ Batch #${batchNum} complete\n`);
// Delay between batches
await page.waitForTimeout(2000);
}
console.log(`\n[${workerNum}] ${'='.repeat(70)}`);
console.log(`[${workerNum}] ✅ PRICE ENRICHMENT COMPLETE`);
console.log(`[${workerNum}] Products processed: ${totalProcessed}`);
console.log(`[${workerNum}] Products with prices: ${totalWithPrices}`);
console.log(`[${workerNum}] Products without prices: ${totalNoPrices}`);
console.log(`[${workerNum}] ${'='.repeat(70)}\n`);
await browser.close();
await pool.end();
}
main().catch(console.error);