219 lines
6.4 KiB
TypeScript
219 lines
6.4 KiB
TypeScript
import { firefox } from 'playwright';
|
||
import { pool } from './src/db/migrate.js';
|
||
import { getRandomProxy } from './src/utils/proxyManager.js';
|
||
|
||
const workerNum = process.argv[2] || `P${Date.now().toString().slice(-4)}`;
|
||
const dispensaryId = parseInt(process.argv[3] || '112', 10);
|
||
const batchSize = 10; // Process 10 products per batch
|
||
|
||
interface Product {
|
||
id: number;
|
||
slug: string;
|
||
name: string;
|
||
brand: string;
|
||
dutchie_url: string;
|
||
}
|
||
|
||
async function getProductsNeedingPrices(limit: number): Promise<Product[]> {
|
||
const result = await pool.query(`
|
||
SELECT id, slug, name, brand, dutchie_url
|
||
FROM products
|
||
WHERE dispensary_id = $1
|
||
AND regular_price IS NULL
|
||
AND dutchie_url IS NOT NULL
|
||
ORDER BY id
|
||
LIMIT $2
|
||
`, [dispensaryId, limit]);
|
||
|
||
return result.rows;
|
||
}
|
||
|
||
async function extractPriceFromPage(page: any, productUrl: string): Promise<{
|
||
regularPrice?: number;
|
||
salePrice?: number;
|
||
}> {
|
||
try {
|
||
console.log(`[${workerNum}] Loading: ${productUrl}`);
|
||
|
||
await page.goto(productUrl, {
|
||
waitUntil: 'domcontentloaded',
|
||
timeout: 30000
|
||
});
|
||
|
||
await page.waitForTimeout(2000);
|
||
|
||
// Extract price data from the page
|
||
const priceData = await page.evaluate(() => {
|
||
// Try JSON-LD structured data first
|
||
const scripts = Array.from(document.querySelectorAll('script[type="application/ld+json"]'));
|
||
|
||
for (const script of scripts) {
|
||
try {
|
||
const data = JSON.parse(script.textContent || '');
|
||
if (data['@type'] === 'Product' && data.offers) {
|
||
return {
|
||
regularPrice: parseFloat(data.offers.price) || undefined,
|
||
salePrice: undefined
|
||
};
|
||
}
|
||
} catch (e) {
|
||
// Continue to next script
|
||
}
|
||
}
|
||
|
||
// Fallback: extract from page text
|
||
const pageText = document.body.textContent || '';
|
||
|
||
// Look for price patterns like $30.00, $40.00
|
||
const priceMatches = pageText.match(/\$(\d+\.?\d*)/g);
|
||
|
||
if (priceMatches && priceMatches.length > 0) {
|
||
const prices = priceMatches.map(p => parseFloat(p.replace('$', '')));
|
||
|
||
// If we find multiple prices, assume first is sale, second is regular
|
||
if (prices.length >= 2) {
|
||
return {
|
||
salePrice: Math.min(prices[0], prices[1]),
|
||
regularPrice: Math.max(prices[0], prices[1])
|
||
};
|
||
} else if (prices.length === 1) {
|
||
return {
|
||
regularPrice: prices[0],
|
||
salePrice: undefined
|
||
};
|
||
}
|
||
}
|
||
|
||
return { regularPrice: undefined, salePrice: undefined };
|
||
});
|
||
|
||
return priceData;
|
||
|
||
} catch (error: any) {
|
||
console.log(`[${workerNum}] ⚠️ Error loading page: ${error.message}`);
|
||
return { regularPrice: undefined, salePrice: undefined };
|
||
}
|
||
}
|
||
|
||
async function updateProductPrice(
|
||
productId: number,
|
||
regularPrice?: number,
|
||
salePrice?: number
|
||
): Promise<void> {
|
||
await pool.query(`
|
||
UPDATE products
|
||
SET regular_price = $1,
|
||
sale_price = $2,
|
||
updated_at = CURRENT_TIMESTAMP
|
||
WHERE id = $3
|
||
`, [regularPrice || null, salePrice || null, productId]);
|
||
}
|
||
|
||
async function main() {
|
||
console.log(`\n${'='.repeat(70)}`);
|
||
console.log(`💰 PRICE ENRICHMENT WORKER - ${workerNum}`);
|
||
console.log(` Dispensary ID: ${dispensaryId}`);
|
||
console.log(` Batch Size: ${batchSize} products`);
|
||
console.log(`${'='.repeat(70)}\n`);
|
||
|
||
// Get dispensary info
|
||
const dispensaryResult = await pool.query(
|
||
"SELECT id, name, menu_url FROM dispensaries WHERE id = $1",
|
||
[dispensaryId]
|
||
);
|
||
|
||
if (dispensaryResult.rows.length === 0) {
|
||
console.error(`[${workerNum}] ❌ Dispensary ID ${dispensaryId} not found`);
|
||
process.exit(1);
|
||
}
|
||
|
||
console.log(`[${workerNum}] ✅ Dispensary: ${dispensaryResult.rows[0].name}\n`);
|
||
|
||
// Get proxy
|
||
const proxy = await getRandomProxy();
|
||
if (!proxy) {
|
||
console.log(`[${workerNum}] ❌ No proxy available`);
|
||
process.exit(1);
|
||
}
|
||
|
||
console.log(`[${workerNum}] 🔐 Using proxy: ${proxy.server}\n`);
|
||
|
||
// Launch browser
|
||
const browser = await firefox.launch({ headless: true });
|
||
|
||
const context = await browser.newContext({
|
||
viewport: { width: 1920, height: 1080 },
|
||
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
||
proxy: {
|
||
server: proxy.server,
|
||
username: proxy.username,
|
||
password: proxy.password
|
||
}
|
||
});
|
||
|
||
const page = await context.newPage();
|
||
|
||
let totalProcessed = 0;
|
||
let totalWithPrices = 0;
|
||
let totalNoPrices = 0;
|
||
let batchNum = 0;
|
||
|
||
// Keep processing batches
|
||
while (true) {
|
||
const products = await getProductsNeedingPrices(batchSize);
|
||
|
||
if (products.length === 0) {
|
||
console.log(`[${workerNum}] ℹ️ No more products need price enrichment`);
|
||
break;
|
||
}
|
||
|
||
batchNum++;
|
||
console.log(`[${workerNum}] ${'─'.repeat(70)}`);
|
||
console.log(`[${workerNum}] 📦 BATCH #${batchNum}: Processing ${products.length} products`);
|
||
console.log(`[${workerNum}] ${'─'.repeat(70)}\n`);
|
||
|
||
for (let i = 0; i < products.length; i++) {
|
||
const product = products[i];
|
||
|
||
console.log(`[${workerNum}] [${i + 1}/${products.length}] ${product.brand} - ${product.name.substring(0, 40)}`);
|
||
|
||
const { regularPrice, salePrice } = await extractPriceFromPage(page, product.dutchie_url);
|
||
|
||
await updateProductPrice(product.id, regularPrice, salePrice);
|
||
|
||
totalProcessed++;
|
||
|
||
if (regularPrice || salePrice) {
|
||
totalWithPrices++;
|
||
const priceStr = salePrice
|
||
? `Sale: $${salePrice.toFixed(2)} (Reg: $${regularPrice?.toFixed(2) || 'N/A'})`
|
||
: `Price: $${regularPrice?.toFixed(2)}`;
|
||
console.log(`[${workerNum}] ✅ ${priceStr}`);
|
||
} else {
|
||
totalNoPrices++;
|
||
console.log(`[${workerNum}] ⚠️ No price found`);
|
||
}
|
||
|
||
// Small delay between products
|
||
await page.waitForTimeout(500);
|
||
}
|
||
|
||
console.log(`\n[${workerNum}] ✅ Batch #${batchNum} complete\n`);
|
||
|
||
// Delay between batches
|
||
await page.waitForTimeout(2000);
|
||
}
|
||
|
||
console.log(`\n[${workerNum}] ${'='.repeat(70)}`);
|
||
console.log(`[${workerNum}] ✅ PRICE ENRICHMENT COMPLETE`);
|
||
console.log(`[${workerNum}] Products processed: ${totalProcessed}`);
|
||
console.log(`[${workerNum}] Products with prices: ${totalWithPrices}`);
|
||
console.log(`[${workerNum}] Products without prices: ${totalNoPrices}`);
|
||
console.log(`[${workerNum}] ${'='.repeat(70)}\n`);
|
||
|
||
await browser.close();
|
||
await pool.end();
|
||
}
|
||
|
||
main().catch(console.error);
|