Skip proxies for Dutchie - datacenter IPs are blocked
Dutchie blocks all our datacenter proxy IPs, returning empty/different content. Direct connection from pod IP works fine (100 products found). Added PROXY_SKIP_DOMAINS list for sites that block datacenter IPs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -69,6 +69,20 @@ export class UserAgentMiddleware implements Middleware {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Domains that should skip proxy (datacenter IPs are blocked)
|
||||||
|
const PROXY_SKIP_DOMAINS = [
|
||||||
|
'dutchie.com',
|
||||||
|
];
|
||||||
|
|
||||||
|
function shouldSkipProxy(url: string): boolean {
|
||||||
|
try {
|
||||||
|
const urlObj = new URL(url);
|
||||||
|
return PROXY_SKIP_DOMAINS.some(domain => urlObj.hostname.includes(domain));
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Proxy Rotation Middleware - uses the central proxy service with timeout handling
|
* Proxy Rotation Middleware - uses the central proxy service with timeout handling
|
||||||
*/
|
*/
|
||||||
@@ -79,6 +93,12 @@ export class ProxyMiddleware implements Middleware {
|
|||||||
private currentProxyId: number | null = null;
|
private currentProxyId: number | null = null;
|
||||||
|
|
||||||
async processRequest(request: ScraperRequest): Promise<ScraperRequest> {
|
async processRequest(request: ScraperRequest): Promise<ScraperRequest> {
|
||||||
|
// Skip proxy for domains that block datacenter IPs
|
||||||
|
if (shouldSkipProxy(request.url)) {
|
||||||
|
logger.info('scraper', `⏭️ Skipping proxy for ${new URL(request.url).hostname} (datacenter IPs blocked)`);
|
||||||
|
return request;
|
||||||
|
}
|
||||||
|
|
||||||
// Always try to use a proxy from the central proxy service
|
// Always try to use a proxy from the central proxy service
|
||||||
// The service handles bot detection timeouts automatically
|
// The service handles bot detection timeouts automatically
|
||||||
const forceRotation = request.retryCount > 0 || request.metadata.botDetected;
|
const forceRotation = request.retryCount > 0 || request.metadata.botDetected;
|
||||||
|
|||||||
Reference in New Issue
Block a user