Files
cannaiq/backend/enrich-prices.ts
2025-11-28 19:45:44 -07:00

219 lines
6.4 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { firefox } from 'playwright';
import { pool } from './src/db/migrate.js';
import { getRandomProxy } from './src/utils/proxyManager.js';
const workerNum = process.argv[2] || `P${Date.now().toString().slice(-4)}`;
const dispensaryId = parseInt(process.argv[3] || '112', 10);
const batchSize = 10; // Process 10 products per batch
interface Product {
id: number;
slug: string;
name: string;
brand: string;
dutchie_url: string;
}
async function getProductsNeedingPrices(limit: number): Promise<Product[]> {
const result = await pool.query(`
SELECT id, slug, name, brand, dutchie_url
FROM products
WHERE dispensary_id = $1
AND regular_price IS NULL
AND dutchie_url IS NOT NULL
ORDER BY id
LIMIT $2
`, [dispensaryId, limit]);
return result.rows;
}
async function extractPriceFromPage(page: any, productUrl: string): Promise<{
regularPrice?: number;
salePrice?: number;
}> {
try {
console.log(`[${workerNum}] Loading: ${productUrl}`);
await page.goto(productUrl, {
waitUntil: 'domcontentloaded',
timeout: 30000
});
await page.waitForTimeout(2000);
// Extract price data from the page
const priceData = await page.evaluate(() => {
// Try JSON-LD structured data first
const scripts = Array.from(document.querySelectorAll('script[type="application/ld+json"]'));
for (const script of scripts) {
try {
const data = JSON.parse(script.textContent || '');
if (data['@type'] === 'Product' && data.offers) {
return {
regularPrice: parseFloat(data.offers.price) || undefined,
salePrice: undefined
};
}
} catch (e) {
// Continue to next script
}
}
// Fallback: extract from page text
const pageText = document.body.textContent || '';
// Look for price patterns like $30.00, $40.00
const priceMatches = pageText.match(/\$(\d+\.?\d*)/g);
if (priceMatches && priceMatches.length > 0) {
const prices = priceMatches.map(p => parseFloat(p.replace('$', '')));
// If we find multiple prices, assume first is sale, second is regular
if (prices.length >= 2) {
return {
salePrice: Math.min(prices[0], prices[1]),
regularPrice: Math.max(prices[0], prices[1])
};
} else if (prices.length === 1) {
return {
regularPrice: prices[0],
salePrice: undefined
};
}
}
return { regularPrice: undefined, salePrice: undefined };
});
return priceData;
} catch (error: any) {
console.log(`[${workerNum}] ⚠️ Error loading page: ${error.message}`);
return { regularPrice: undefined, salePrice: undefined };
}
}
async function updateProductPrice(
productId: number,
regularPrice?: number,
salePrice?: number
): Promise<void> {
await pool.query(`
UPDATE products
SET regular_price = $1,
sale_price = $2,
updated_at = CURRENT_TIMESTAMP
WHERE id = $3
`, [regularPrice || null, salePrice || null, productId]);
}
async function main() {
console.log(`\n${'='.repeat(70)}`);
console.log(`💰 PRICE ENRICHMENT WORKER - ${workerNum}`);
console.log(` Dispensary ID: ${dispensaryId}`);
console.log(` Batch Size: ${batchSize} products`);
console.log(`${'='.repeat(70)}\n`);
// Get dispensary info
const dispensaryResult = await pool.query(
"SELECT id, name, menu_url FROM dispensaries WHERE id = $1",
[dispensaryId]
);
if (dispensaryResult.rows.length === 0) {
console.error(`[${workerNum}] ❌ Dispensary ID ${dispensaryId} not found`);
process.exit(1);
}
console.log(`[${workerNum}] ✅ Dispensary: ${dispensaryResult.rows[0].name}\n`);
// Get proxy
const proxy = await getRandomProxy();
if (!proxy) {
console.log(`[${workerNum}] ❌ No proxy available`);
process.exit(1);
}
console.log(`[${workerNum}] 🔐 Using proxy: ${proxy.server}\n`);
// Launch browser
const browser = await firefox.launch({ headless: true });
const context = await browser.newContext({
viewport: { width: 1920, height: 1080 },
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
proxy: {
server: proxy.server,
username: proxy.username,
password: proxy.password
}
});
const page = await context.newPage();
let totalProcessed = 0;
let totalWithPrices = 0;
let totalNoPrices = 0;
let batchNum = 0;
// Keep processing batches
while (true) {
const products = await getProductsNeedingPrices(batchSize);
if (products.length === 0) {
console.log(`[${workerNum}] No more products need price enrichment`);
break;
}
batchNum++;
console.log(`[${workerNum}] ${'─'.repeat(70)}`);
console.log(`[${workerNum}] 📦 BATCH #${batchNum}: Processing ${products.length} products`);
console.log(`[${workerNum}] ${'─'.repeat(70)}\n`);
for (let i = 0; i < products.length; i++) {
const product = products[i];
console.log(`[${workerNum}] [${i + 1}/${products.length}] ${product.brand} - ${product.name.substring(0, 40)}`);
const { regularPrice, salePrice } = await extractPriceFromPage(page, product.dutchie_url);
await updateProductPrice(product.id, regularPrice, salePrice);
totalProcessed++;
if (regularPrice || salePrice) {
totalWithPrices++;
const priceStr = salePrice
? `Sale: $${salePrice.toFixed(2)} (Reg: $${regularPrice?.toFixed(2) || 'N/A'})`
: `Price: $${regularPrice?.toFixed(2)}`;
console.log(`[${workerNum}] ✅ ${priceStr}`);
} else {
totalNoPrices++;
console.log(`[${workerNum}] ⚠️ No price found`);
}
// Small delay between products
await page.waitForTimeout(500);
}
console.log(`\n[${workerNum}] ✅ Batch #${batchNum} complete\n`);
// Delay between batches
await page.waitForTimeout(2000);
}
console.log(`\n[${workerNum}] ${'='.repeat(70)}`);
console.log(`[${workerNum}] ✅ PRICE ENRICHMENT COMPLETE`);
console.log(`[${workerNum}] Products processed: ${totalProcessed}`);
console.log(`[${workerNum}] Products with prices: ${totalWithPrices}`);
console.log(`[${workerNum}] Products without prices: ${totalNoPrices}`);
console.log(`[${workerNum}] ${'='.repeat(70)}\n`);
await browser.close();
await pool.end();
}
main().catch(console.error);