fix(worker): Use Evomi API first, DB proxies as fallback
- Check Evomi API availability before waiting for DB proxies - If EVOMI_USER/EVOMI_PASS configured, proceed immediately - Only fall back to DB proxy polling if Evomi not configured - Added clear comments explaining proxy initialization order This fixes workers getting stuck waiting for DB proxies when Evomi API is available for on-demand geo-targeted proxies. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
184
backend/scripts/explore-treez-pages.ts
Normal file
184
backend/scripts/explore-treez-pages.ts
Normal file
@@ -0,0 +1,184 @@
|
|||||||
|
/**
|
||||||
|
* Explore all Treez page URLs to find the full product catalog
|
||||||
|
*/
|
||||||
|
|
||||||
|
import puppeteer, { Page } from 'puppeteer';
|
||||||
|
|
||||||
|
const STORE_ID = 'best';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function bypassAgeGate(page: Page): Promise<void> {
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
console.log(' Age gate detected, bypassing...');
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function countProducts(page: Page): Promise<number> {
|
||||||
|
return page.evaluate(() =>
|
||||||
|
document.querySelectorAll('[class*="product_product__"]').length
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function scrollAndCount(page: Page, maxScrolls: number = 30): Promise<{ products: number; scrolls: number }> {
|
||||||
|
let previousHeight = 0;
|
||||||
|
let scrollCount = 0;
|
||||||
|
let sameHeightCount = 0;
|
||||||
|
|
||||||
|
while (scrollCount < maxScrolls) {
|
||||||
|
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||||
|
|
||||||
|
if (currentHeight === previousHeight) {
|
||||||
|
sameHeightCount++;
|
||||||
|
if (sameHeightCount >= 3) break;
|
||||||
|
} else {
|
||||||
|
sameHeightCount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||||
|
await sleep(1500);
|
||||||
|
|
||||||
|
previousHeight = currentHeight;
|
||||||
|
scrollCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
const products = await countProducts(page);
|
||||||
|
return { products, scrolls: scrollCount };
|
||||||
|
}
|
||||||
|
|
||||||
|
async function testUrl(page: Page, path: string): Promise<{ products: number; scrolls: number; error?: string }> {
|
||||||
|
const url = `https://${STORE_ID}.treez.io${path}`;
|
||||||
|
console.log(`\nTesting: ${url}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||||
|
await sleep(2000);
|
||||||
|
await bypassAgeGate(page);
|
||||||
|
await sleep(1000);
|
||||||
|
|
||||||
|
const initialCount = await countProducts(page);
|
||||||
|
console.log(` Initial products: ${initialCount}`);
|
||||||
|
|
||||||
|
if (initialCount > 0) {
|
||||||
|
const result = await scrollAndCount(page);
|
||||||
|
console.log(` After scroll: ${result.products} products (${result.scrolls} scrolls)`);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for brand/category cards instead
|
||||||
|
const cardCount = await page.evaluate(() => {
|
||||||
|
const selectors = [
|
||||||
|
'[class*="brand"]',
|
||||||
|
'[class*="Brand"]',
|
||||||
|
'[class*="category"]',
|
||||||
|
'[class*="Category"]',
|
||||||
|
'[class*="card"]',
|
||||||
|
'a[href*="/brand/"]',
|
||||||
|
'a[href*="/category/"]',
|
||||||
|
];
|
||||||
|
let count = 0;
|
||||||
|
selectors.forEach(sel => {
|
||||||
|
count += document.querySelectorAll(sel).length;
|
||||||
|
});
|
||||||
|
return count;
|
||||||
|
});
|
||||||
|
console.log(` Cards/links found: ${cardCount}`);
|
||||||
|
|
||||||
|
return { products: initialCount, scrolls: 0 };
|
||||||
|
} catch (error: any) {
|
||||||
|
console.log(` Error: ${error.message}`);
|
||||||
|
return { products: 0, scrolls: 0, error: error.message };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('Exploring Treez Page URLs');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
// Block images to speed up
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => {
|
||||||
|
if (['image', 'font', 'media', 'stylesheet'].includes(req.resourceType())) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const urlsToTest = [
|
||||||
|
'/onlinemenu/?customerType=ADULT', // Homepage
|
||||||
|
'/onlinemenu/brands?customerType=ADULT', // Brands page
|
||||||
|
'/onlinemenu/shop?customerType=ADULT', // Shop page?
|
||||||
|
'/onlinemenu/products?customerType=ADULT', // Products page?
|
||||||
|
'/onlinemenu/menu?customerType=ADULT', // Menu page?
|
||||||
|
'/onlinemenu/all?customerType=ADULT', // All products?
|
||||||
|
'/onlinemenu/flower?customerType=ADULT', // Flower category
|
||||||
|
'/onlinemenu/vapes?customerType=ADULT', // Vapes category
|
||||||
|
'/onlinemenu/edibles?customerType=ADULT', // Edibles category
|
||||||
|
'/onlinemenu/concentrates?customerType=ADULT', // Concentrates category
|
||||||
|
];
|
||||||
|
|
||||||
|
const results: { path: string; products: number; scrolls: number }[] = [];
|
||||||
|
|
||||||
|
for (const path of urlsToTest) {
|
||||||
|
const result = await testUrl(page, path);
|
||||||
|
results.push({ path, ...result });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look for navigation links on the main page
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('Checking navigation structure on homepage...');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
await page.goto(`https://${STORE_ID}.treez.io/onlinemenu/?customerType=ADULT`, {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 30000,
|
||||||
|
});
|
||||||
|
await sleep(2000);
|
||||||
|
await bypassAgeGate(page);
|
||||||
|
await sleep(1000);
|
||||||
|
|
||||||
|
const navLinks = await page.evaluate(() => {
|
||||||
|
const links: { text: string; href: string }[] = [];
|
||||||
|
document.querySelectorAll('a[href*="/onlinemenu/"]').forEach(el => {
|
||||||
|
const text = el.textContent?.trim() || '';
|
||||||
|
const href = el.getAttribute('href') || '';
|
||||||
|
if (text && !links.some(l => l.href === href)) {
|
||||||
|
links.push({ text: text.slice(0, 50), href });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return links;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('\nNavigation links found:');
|
||||||
|
navLinks.forEach(l => console.log(` "${l.text}" → ${l.href}`));
|
||||||
|
|
||||||
|
// Summary
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('Summary');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
results.sort((a, b) => b.products - a.products);
|
||||||
|
results.forEach(r => {
|
||||||
|
console.log(`${r.products.toString().padStart(4)} products | ${r.path}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
@@ -298,9 +298,10 @@ export async function bypassAgeGate(page: Page): Promise<boolean> {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Build menu URL for a store
|
* Build menu URL for a store
|
||||||
|
* Uses /brands page which contains all products (not just homepage carousels)
|
||||||
*/
|
*/
|
||||||
export function buildMenuUrl(storeId: string, customerType: 'ADULT' | 'MEDICAL' = 'ADULT'): string {
|
export function buildMenuUrl(storeId: string, customerType: 'ADULT' | 'MEDICAL' = 'ADULT'): string {
|
||||||
return `https://${storeId}.treez.io/onlinemenu/?customerType=${customerType}`;
|
return `https://${storeId}.treez.io/onlinemenu/brands?customerType=${customerType}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -533,24 +533,53 @@ export class TaskWorker {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
// ============================================================
|
||||||
|
// PROXY INITIALIZATION ORDER:
|
||||||
|
// 1. Check Evomi API first (dynamic residential proxies)
|
||||||
|
// 2. Fall back to DB proxies if Evomi not configured
|
||||||
|
//
|
||||||
|
// Evomi provides geo-targeted proxies on-demand via API.
|
||||||
|
// DB proxies are static/datacenter proxies as fallback.
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
// Import Evomi config checker
|
||||||
|
const { getEvomiConfig } = await import('../services/crawl-rotator');
|
||||||
|
const evomiConfig = getEvomiConfig();
|
||||||
|
|
||||||
|
if (evomiConfig.enabled) {
|
||||||
|
// Evomi API is configured - we can get proxies on-demand
|
||||||
|
// No need to wait for DB proxies
|
||||||
|
console.log(`[TaskWorker] Evomi API configured (${evomiConfig.host}:${evomiConfig.port}) - proxies available on-demand`);
|
||||||
|
|
||||||
|
// Still initialize rotator for user-agent rotation
|
||||||
|
await this.crawlRotator.initialize();
|
||||||
|
setCrawlRotator(this.crawlRotator);
|
||||||
|
|
||||||
|
console.log(`[TaskWorker] Stealth initialized: ${this.crawlRotator.userAgent.getCount()} fingerprints, Evomi API for proxies`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Evomi not configured - fall back to DB proxies
|
||||||
|
console.log(`[TaskWorker] Evomi API not configured, falling back to DB proxies...`);
|
||||||
|
|
||||||
while (attempts < maxAttempts) {
|
while (attempts < maxAttempts) {
|
||||||
try {
|
try {
|
||||||
// Load proxies from database
|
// Load proxies from database (fallback)
|
||||||
await this.crawlRotator.initialize();
|
await this.crawlRotator.initialize();
|
||||||
|
|
||||||
const stats = this.crawlRotator.proxy.getStats();
|
const stats = this.crawlRotator.proxy.getStats();
|
||||||
if (stats.activeProxies > 0) {
|
if (stats.activeProxies > 0) {
|
||||||
console.log(`[TaskWorker] Loaded ${stats.activeProxies} proxies (${stats.avgSuccessRate.toFixed(1)}% avg success rate)`);
|
console.log(`[TaskWorker] Loaded ${stats.activeProxies} DB proxies (${stats.avgSuccessRate.toFixed(1)}% avg success rate)`);
|
||||||
|
|
||||||
// Wire rotator to Dutchie client - proxies will be used for ALL requests
|
// Wire rotator to Dutchie client - proxies will be used for ALL requests
|
||||||
setCrawlRotator(this.crawlRotator);
|
setCrawlRotator(this.crawlRotator);
|
||||||
|
|
||||||
console.log(`[TaskWorker] Stealth initialized: ${this.crawlRotator.userAgent.getCount()} fingerprints, proxy REQUIRED for all requests`);
|
console.log(`[TaskWorker] Stealth initialized: ${this.crawlRotator.userAgent.getCount()} fingerprints, DB proxies`);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
attempts++;
|
attempts++;
|
||||||
console.log(`[TaskWorker] No active proxies available (attempt ${attempts}). Waiting for proxies...`);
|
console.log(`[TaskWorker] No DB proxies available (attempt ${attempts}). Waiting...`);
|
||||||
|
|
||||||
// Wait for either notification or timeout
|
// Wait for either notification or timeout
|
||||||
await new Promise<void>((resolve) => {
|
await new Promise<void>((resolve) => {
|
||||||
@@ -564,7 +593,7 @@ export class TaskWorker {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new Error(`No active proxies available after waiting ${MAX_WAIT_MINUTES} minutes. Add proxies to the database.`);
|
throw new Error(`No proxies available after ${MAX_WAIT_MINUTES} minutes. Configure EVOMI_USER/EVOMI_PASS or add proxies to database.`);
|
||||||
} finally {
|
} finally {
|
||||||
// Clean up LISTEN connection
|
// Clean up LISTEN connection
|
||||||
if (notifyClient) {
|
if (notifyClient) {
|
||||||
|
|||||||
Reference in New Issue
Block a user