Skip proxies for Dutchie - datacenter IPs are blocked

Dutchie blocks all our datacenter proxy IPs, returning empty/different
content. Direct connection from pod IP works fine (100 products found).
Added PROXY_SKIP_DOMAINS list for sites that block datacenter IPs.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-01 09:53:55 -07:00
parent e518bb8169
commit 81606447d2

View File

@@ -69,6 +69,20 @@ export class UserAgentMiddleware implements Middleware {
}
}
// Domains that should skip proxy (datacenter IPs are blocked)
const PROXY_SKIP_DOMAINS = [
'dutchie.com',
];
function shouldSkipProxy(url: string): boolean {
try {
const urlObj = new URL(url);
return PROXY_SKIP_DOMAINS.some(domain => urlObj.hostname.includes(domain));
} catch {
return false;
}
}
/**
* Proxy Rotation Middleware - uses the central proxy service with timeout handling
*/
@@ -79,6 +93,12 @@ export class ProxyMiddleware implements Middleware {
private currentProxyId: number | null = null;
async processRequest(request: ScraperRequest): Promise<ScraperRequest> {
// Skip proxy for domains that block datacenter IPs
if (shouldSkipProxy(request.url)) {
logger.info('scraper', `⏭️ Skipping proxy for ${new URL(request.url).hostname} (datacenter IPs blocked)`);
return request;
}
// Always try to use a proxy from the central proxy service
// The service handles bot detection timeouts automatically
const forceRotation = request.retryCount > 0 || request.metadata.botDetected;