fix(worker): Use Evomi API first, DB proxies as fallback
- Check Evomi API availability before waiting for DB proxies - If EVOMI_USER/EVOMI_PASS configured, proceed immediately - Only fall back to DB proxy polling if Evomi not configured - Added clear comments explaining proxy initialization order This fixes workers getting stuck waiting for DB proxies when Evomi API is available for on-demand geo-targeted proxies. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
184
backend/scripts/explore-treez-pages.ts
Normal file
184
backend/scripts/explore-treez-pages.ts
Normal file
@@ -0,0 +1,184 @@
|
||||
/**
|
||||
* Explore all Treez page URLs to find the full product catalog
|
||||
*/
|
||||
|
||||
import puppeteer, { Page } from 'puppeteer';
|
||||
|
||||
const STORE_ID = 'best';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function bypassAgeGate(page: Page): Promise<void> {
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
console.log(' Age gate detected, bypassing...');
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
}
|
||||
|
||||
async function countProducts(page: Page): Promise<number> {
|
||||
return page.evaluate(() =>
|
||||
document.querySelectorAll('[class*="product_product__"]').length
|
||||
);
|
||||
}
|
||||
|
||||
async function scrollAndCount(page: Page, maxScrolls: number = 30): Promise<{ products: number; scrolls: number }> {
|
||||
let previousHeight = 0;
|
||||
let scrollCount = 0;
|
||||
let sameHeightCount = 0;
|
||||
|
||||
while (scrollCount < maxScrolls) {
|
||||
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||
|
||||
if (currentHeight === previousHeight) {
|
||||
sameHeightCount++;
|
||||
if (sameHeightCount >= 3) break;
|
||||
} else {
|
||||
sameHeightCount = 0;
|
||||
}
|
||||
|
||||
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||
await sleep(1500);
|
||||
|
||||
previousHeight = currentHeight;
|
||||
scrollCount++;
|
||||
}
|
||||
|
||||
const products = await countProducts(page);
|
||||
return { products, scrolls: scrollCount };
|
||||
}
|
||||
|
||||
async function testUrl(page: Page, path: string): Promise<{ products: number; scrolls: number; error?: string }> {
|
||||
const url = `https://${STORE_ID}.treez.io${path}`;
|
||||
console.log(`\nTesting: ${url}`);
|
||||
|
||||
try {
|
||||
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||
await sleep(2000);
|
||||
await bypassAgeGate(page);
|
||||
await sleep(1000);
|
||||
|
||||
const initialCount = await countProducts(page);
|
||||
console.log(` Initial products: ${initialCount}`);
|
||||
|
||||
if (initialCount > 0) {
|
||||
const result = await scrollAndCount(page);
|
||||
console.log(` After scroll: ${result.products} products (${result.scrolls} scrolls)`);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Check for brand/category cards instead
|
||||
const cardCount = await page.evaluate(() => {
|
||||
const selectors = [
|
||||
'[class*="brand"]',
|
||||
'[class*="Brand"]',
|
||||
'[class*="category"]',
|
||||
'[class*="Category"]',
|
||||
'[class*="card"]',
|
||||
'a[href*="/brand/"]',
|
||||
'a[href*="/category/"]',
|
||||
];
|
||||
let count = 0;
|
||||
selectors.forEach(sel => {
|
||||
count += document.querySelectorAll(sel).length;
|
||||
});
|
||||
return count;
|
||||
});
|
||||
console.log(` Cards/links found: ${cardCount}`);
|
||||
|
||||
return { products: initialCount, scrolls: 0 };
|
||||
} catch (error: any) {
|
||||
console.log(` Error: ${error.message}`);
|
||||
return { products: 0, scrolls: 0, error: error.message };
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('Exploring Treez Page URLs');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
// Block images to speed up
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
if (['image', 'font', 'media', 'stylesheet'].includes(req.resourceType())) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
const urlsToTest = [
|
||||
'/onlinemenu/?customerType=ADULT', // Homepage
|
||||
'/onlinemenu/brands?customerType=ADULT', // Brands page
|
||||
'/onlinemenu/shop?customerType=ADULT', // Shop page?
|
||||
'/onlinemenu/products?customerType=ADULT', // Products page?
|
||||
'/onlinemenu/menu?customerType=ADULT', // Menu page?
|
||||
'/onlinemenu/all?customerType=ADULT', // All products?
|
||||
'/onlinemenu/flower?customerType=ADULT', // Flower category
|
||||
'/onlinemenu/vapes?customerType=ADULT', // Vapes category
|
||||
'/onlinemenu/edibles?customerType=ADULT', // Edibles category
|
||||
'/onlinemenu/concentrates?customerType=ADULT', // Concentrates category
|
||||
];
|
||||
|
||||
const results: { path: string; products: number; scrolls: number }[] = [];
|
||||
|
||||
for (const path of urlsToTest) {
|
||||
const result = await testUrl(page, path);
|
||||
results.push({ path, ...result });
|
||||
}
|
||||
|
||||
// Look for navigation links on the main page
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('Checking navigation structure on homepage...');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
await page.goto(`https://${STORE_ID}.treez.io/onlinemenu/?customerType=ADULT`, {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 30000,
|
||||
});
|
||||
await sleep(2000);
|
||||
await bypassAgeGate(page);
|
||||
await sleep(1000);
|
||||
|
||||
const navLinks = await page.evaluate(() => {
|
||||
const links: { text: string; href: string }[] = [];
|
||||
document.querySelectorAll('a[href*="/onlinemenu/"]').forEach(el => {
|
||||
const text = el.textContent?.trim() || '';
|
||||
const href = el.getAttribute('href') || '';
|
||||
if (text && !links.some(l => l.href === href)) {
|
||||
links.push({ text: text.slice(0, 50), href });
|
||||
}
|
||||
});
|
||||
return links;
|
||||
});
|
||||
|
||||
console.log('\nNavigation links found:');
|
||||
navLinks.forEach(l => console.log(` "${l.text}" → ${l.href}`));
|
||||
|
||||
// Summary
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('Summary');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
results.sort((a, b) => b.products - a.products);
|
||||
results.forEach(r => {
|
||||
console.log(`${r.products.toString().padStart(4)} products | ${r.path}`);
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -298,9 +298,10 @@ export async function bypassAgeGate(page: Page): Promise<boolean> {
|
||||
|
||||
/**
|
||||
* Build menu URL for a store
|
||||
* Uses /brands page which contains all products (not just homepage carousels)
|
||||
*/
|
||||
export function buildMenuUrl(storeId: string, customerType: 'ADULT' | 'MEDICAL' = 'ADULT'): string {
|
||||
return `https://${storeId}.treez.io/onlinemenu/?customerType=${customerType}`;
|
||||
return `https://${storeId}.treez.io/onlinemenu/brands?customerType=${customerType}`;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -533,24 +533,53 @@ export class TaskWorker {
|
||||
}
|
||||
|
||||
try {
|
||||
// ============================================================
|
||||
// PROXY INITIALIZATION ORDER:
|
||||
// 1. Check Evomi API first (dynamic residential proxies)
|
||||
// 2. Fall back to DB proxies if Evomi not configured
|
||||
//
|
||||
// Evomi provides geo-targeted proxies on-demand via API.
|
||||
// DB proxies are static/datacenter proxies as fallback.
|
||||
// ============================================================
|
||||
|
||||
// Import Evomi config checker
|
||||
const { getEvomiConfig } = await import('../services/crawl-rotator');
|
||||
const evomiConfig = getEvomiConfig();
|
||||
|
||||
if (evomiConfig.enabled) {
|
||||
// Evomi API is configured - we can get proxies on-demand
|
||||
// No need to wait for DB proxies
|
||||
console.log(`[TaskWorker] Evomi API configured (${evomiConfig.host}:${evomiConfig.port}) - proxies available on-demand`);
|
||||
|
||||
// Still initialize rotator for user-agent rotation
|
||||
await this.crawlRotator.initialize();
|
||||
setCrawlRotator(this.crawlRotator);
|
||||
|
||||
console.log(`[TaskWorker] Stealth initialized: ${this.crawlRotator.userAgent.getCount()} fingerprints, Evomi API for proxies`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Evomi not configured - fall back to DB proxies
|
||||
console.log(`[TaskWorker] Evomi API not configured, falling back to DB proxies...`);
|
||||
|
||||
while (attempts < maxAttempts) {
|
||||
try {
|
||||
// Load proxies from database
|
||||
// Load proxies from database (fallback)
|
||||
await this.crawlRotator.initialize();
|
||||
|
||||
const stats = this.crawlRotator.proxy.getStats();
|
||||
if (stats.activeProxies > 0) {
|
||||
console.log(`[TaskWorker] Loaded ${stats.activeProxies} proxies (${stats.avgSuccessRate.toFixed(1)}% avg success rate)`);
|
||||
console.log(`[TaskWorker] Loaded ${stats.activeProxies} DB proxies (${stats.avgSuccessRate.toFixed(1)}% avg success rate)`);
|
||||
|
||||
// Wire rotator to Dutchie client - proxies will be used for ALL requests
|
||||
setCrawlRotator(this.crawlRotator);
|
||||
|
||||
console.log(`[TaskWorker] Stealth initialized: ${this.crawlRotator.userAgent.getCount()} fingerprints, proxy REQUIRED for all requests`);
|
||||
console.log(`[TaskWorker] Stealth initialized: ${this.crawlRotator.userAgent.getCount()} fingerprints, DB proxies`);
|
||||
return;
|
||||
}
|
||||
|
||||
attempts++;
|
||||
console.log(`[TaskWorker] No active proxies available (attempt ${attempts}). Waiting for proxies...`);
|
||||
console.log(`[TaskWorker] No DB proxies available (attempt ${attempts}). Waiting...`);
|
||||
|
||||
// Wait for either notification or timeout
|
||||
await new Promise<void>((resolve) => {
|
||||
@@ -564,7 +593,7 @@ export class TaskWorker {
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`No active proxies available after waiting ${MAX_WAIT_MINUTES} minutes. Add proxies to the database.`);
|
||||
throw new Error(`No proxies available after ${MAX_WAIT_MINUTES} minutes. Configure EVOMI_USER/EVOMI_PASS or add proxies to database.`);
|
||||
} finally {
|
||||
// Clean up LISTEN connection
|
||||
if (notifyClient) {
|
||||
|
||||
Reference in New Issue
Block a user