fix(worker): Use Evomi API first, DB proxies as fallback

- Check Evomi API availability before waiting for DB proxies
- If EVOMI_USER/EVOMI_PASS configured, proceed immediately
- Only fall back to DB proxy polling if Evomi not configured
- Added clear comments explaining proxy initialization order

This fixes workers getting stuck waiting for DB proxies when
Evomi API is available for on-demand geo-targeted proxies.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-13 16:45:52 -07:00
parent 3d0ea21007
commit cf99ef9e09
3 changed files with 220 additions and 6 deletions

View File

@@ -0,0 +1,184 @@
/**
* Explore all Treez page URLs to find the full product catalog
*/
import puppeteer, { Page } from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function bypassAgeGate(page: Page): Promise<void> {
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log(' Age gate detected, bypassing...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
}
async function countProducts(page: Page): Promise<number> {
return page.evaluate(() =>
document.querySelectorAll('[class*="product_product__"]').length
);
}
async function scrollAndCount(page: Page, maxScrolls: number = 30): Promise<{ products: number; scrolls: number }> {
let previousHeight = 0;
let scrollCount = 0;
let sameHeightCount = 0;
while (scrollCount < maxScrolls) {
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) {
sameHeightCount++;
if (sameHeightCount >= 3) break;
} else {
sameHeightCount = 0;
}
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1500);
previousHeight = currentHeight;
scrollCount++;
}
const products = await countProducts(page);
return { products, scrolls: scrollCount };
}
async function testUrl(page: Page, path: string): Promise<{ products: number; scrolls: number; error?: string }> {
const url = `https://${STORE_ID}.treez.io${path}`;
console.log(`\nTesting: ${url}`);
try {
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(2000);
await bypassAgeGate(page);
await sleep(1000);
const initialCount = await countProducts(page);
console.log(` Initial products: ${initialCount}`);
if (initialCount > 0) {
const result = await scrollAndCount(page);
console.log(` After scroll: ${result.products} products (${result.scrolls} scrolls)`);
return result;
}
// Check for brand/category cards instead
const cardCount = await page.evaluate(() => {
const selectors = [
'[class*="brand"]',
'[class*="Brand"]',
'[class*="category"]',
'[class*="Category"]',
'[class*="card"]',
'a[href*="/brand/"]',
'a[href*="/category/"]',
];
let count = 0;
selectors.forEach(sel => {
count += document.querySelectorAll(sel).length;
});
return count;
});
console.log(` Cards/links found: ${cardCount}`);
return { products: initialCount, scrolls: 0 };
} catch (error: any) {
console.log(` Error: ${error.message}`);
return { products: 0, scrolls: 0, error: error.message };
}
}
async function main() {
console.log('='.repeat(60));
console.log('Exploring Treez Page URLs');
console.log('='.repeat(60));
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Block images to speed up
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media', 'stylesheet'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
const urlsToTest = [
'/onlinemenu/?customerType=ADULT', // Homepage
'/onlinemenu/brands?customerType=ADULT', // Brands page
'/onlinemenu/shop?customerType=ADULT', // Shop page?
'/onlinemenu/products?customerType=ADULT', // Products page?
'/onlinemenu/menu?customerType=ADULT', // Menu page?
'/onlinemenu/all?customerType=ADULT', // All products?
'/onlinemenu/flower?customerType=ADULT', // Flower category
'/onlinemenu/vapes?customerType=ADULT', // Vapes category
'/onlinemenu/edibles?customerType=ADULT', // Edibles category
'/onlinemenu/concentrates?customerType=ADULT', // Concentrates category
];
const results: { path: string; products: number; scrolls: number }[] = [];
for (const path of urlsToTest) {
const result = await testUrl(page, path);
results.push({ path, ...result });
}
// Look for navigation links on the main page
console.log('\n' + '='.repeat(60));
console.log('Checking navigation structure on homepage...');
console.log('='.repeat(60));
await page.goto(`https://${STORE_ID}.treez.io/onlinemenu/?customerType=ADULT`, {
waitUntil: 'networkidle2',
timeout: 30000,
});
await sleep(2000);
await bypassAgeGate(page);
await sleep(1000);
const navLinks = await page.evaluate(() => {
const links: { text: string; href: string }[] = [];
document.querySelectorAll('a[href*="/onlinemenu/"]').forEach(el => {
const text = el.textContent?.trim() || '';
const href = el.getAttribute('href') || '';
if (text && !links.some(l => l.href === href)) {
links.push({ text: text.slice(0, 50), href });
}
});
return links;
});
console.log('\nNavigation links found:');
navLinks.forEach(l => console.log(` "${l.text}" → ${l.href}`));
// Summary
console.log('\n' + '='.repeat(60));
console.log('Summary');
console.log('='.repeat(60));
results.sort((a, b) => b.products - a.products);
results.forEach(r => {
console.log(`${r.products.toString().padStart(4)} products | ${r.path}`);
});
await browser.close();
}
main().catch(console.error);

View File

@@ -298,9 +298,10 @@ export async function bypassAgeGate(page: Page): Promise<boolean> {
/**
* Build menu URL for a store
* Uses /brands page which contains all products (not just homepage carousels)
*/
export function buildMenuUrl(storeId: string, customerType: 'ADULT' | 'MEDICAL' = 'ADULT'): string {
return `https://${storeId}.treez.io/onlinemenu/?customerType=${customerType}`;
return `https://${storeId}.treez.io/onlinemenu/brands?customerType=${customerType}`;
}
/**

View File

@@ -533,24 +533,53 @@ export class TaskWorker {
}
try {
// ============================================================
// PROXY INITIALIZATION ORDER:
// 1. Check Evomi API first (dynamic residential proxies)
// 2. Fall back to DB proxies if Evomi not configured
//
// Evomi provides geo-targeted proxies on-demand via API.
// DB proxies are static/datacenter proxies as fallback.
// ============================================================
// Import Evomi config checker
const { getEvomiConfig } = await import('../services/crawl-rotator');
const evomiConfig = getEvomiConfig();
if (evomiConfig.enabled) {
// Evomi API is configured - we can get proxies on-demand
// No need to wait for DB proxies
console.log(`[TaskWorker] Evomi API configured (${evomiConfig.host}:${evomiConfig.port}) - proxies available on-demand`);
// Still initialize rotator for user-agent rotation
await this.crawlRotator.initialize();
setCrawlRotator(this.crawlRotator);
console.log(`[TaskWorker] Stealth initialized: ${this.crawlRotator.userAgent.getCount()} fingerprints, Evomi API for proxies`);
return;
}
// Evomi not configured - fall back to DB proxies
console.log(`[TaskWorker] Evomi API not configured, falling back to DB proxies...`);
while (attempts < maxAttempts) {
try {
// Load proxies from database
// Load proxies from database (fallback)
await this.crawlRotator.initialize();
const stats = this.crawlRotator.proxy.getStats();
if (stats.activeProxies > 0) {
console.log(`[TaskWorker] Loaded ${stats.activeProxies} proxies (${stats.avgSuccessRate.toFixed(1)}% avg success rate)`);
console.log(`[TaskWorker] Loaded ${stats.activeProxies} DB proxies (${stats.avgSuccessRate.toFixed(1)}% avg success rate)`);
// Wire rotator to Dutchie client - proxies will be used for ALL requests
setCrawlRotator(this.crawlRotator);
console.log(`[TaskWorker] Stealth initialized: ${this.crawlRotator.userAgent.getCount()} fingerprints, proxy REQUIRED for all requests`);
console.log(`[TaskWorker] Stealth initialized: ${this.crawlRotator.userAgent.getCount()} fingerprints, DB proxies`);
return;
}
attempts++;
console.log(`[TaskWorker] No active proxies available (attempt ${attempts}). Waiting for proxies...`);
console.log(`[TaskWorker] No DB proxies available (attempt ${attempts}). Waiting...`);
// Wait for either notification or timeout
await new Promise<void>((resolve) => {
@@ -564,7 +593,7 @@ export class TaskWorker {
}
}
throw new Error(`No active proxies available after waiting ${MAX_WAIT_MINUTES} minutes. Add proxies to the database.`);
throw new Error(`No proxies available after ${MAX_WAIT_MINUTES} minutes. Configure EVOMI_USER/EVOMI_PASS or add proxies to database.`);
} finally {
// Clean up LISTEN connection
if (notifyClient) {