- Fix 403 handler to rotate BOTH proxy and fingerprint (was only fingerprint) - Add auto-retry logic to task service (retry up to max_retries before failing) - Add error tooltip on task status badge showing retry count and error message - Add DELETE /api/tasks/:id endpoint (only for non-running tasks) - Add delete button to JobQueue task table 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
662 lines
20 KiB
TypeScript
662 lines
20 KiB
TypeScript
/**
|
|
* ============================================================
|
|
* DUTCHIE PLATFORM CLIENT - LOCKED MODULE
|
|
* ============================================================
|
|
*
|
|
* DO NOT MODIFY THIS FILE WITHOUT EXPLICIT AUTHORIZATION.
|
|
*
|
|
* This is the canonical HTTP client for all Dutchie communication.
|
|
* All Dutchie workers (Alice, Bella, etc.) MUST use this client.
|
|
*
|
|
* IMPLEMENTATION:
|
|
* - Uses curl via child_process.execSync (bypasses TLS fingerprinting)
|
|
* - NO Puppeteer, NO axios, NO fetch
|
|
* - Fingerprint rotation on 403
|
|
* - Residential IP compatible
|
|
*
|
|
* USAGE:
|
|
* import { curlPost, curlGet, executeGraphQL } from '@dutchie/client';
|
|
*
|
|
* ============================================================
|
|
*/
|
|
|
|
import { execSync } from 'child_process';
|
|
|
|
// ============================================================
|
|
// TYPES
|
|
// ============================================================
|
|
|
|
export interface CurlResponse {
|
|
status: number;
|
|
data: any;
|
|
error?: string;
|
|
}
|
|
|
|
export interface Fingerprint {
|
|
userAgent: string;
|
|
acceptLanguage: string;
|
|
secChUa?: string;
|
|
secChUaPlatform?: string;
|
|
secChUaMobile?: string;
|
|
}
|
|
|
|
// ============================================================
|
|
// CONFIGURATION
|
|
// ============================================================
|
|
|
|
export const DUTCHIE_CONFIG = {
|
|
graphqlEndpoint: 'https://dutchie.com/api-3/graphql',
|
|
baseUrl: 'https://dutchie.com',
|
|
timeout: 30000,
|
|
maxRetries: 3,
|
|
perPage: 100,
|
|
maxPages: 200,
|
|
pageDelayMs: 500,
|
|
modeDelayMs: 2000,
|
|
};
|
|
|
|
// ============================================================
|
|
// PROXY SUPPORT
|
|
// ============================================================
|
|
// Integrates with the CrawlRotator system from proxy-rotator.ts
|
|
// On 403 errors:
|
|
// 1. Record failure on current proxy
|
|
// 2. Rotate to next proxy
|
|
// 3. Retry with new proxy
|
|
// ============================================================
|
|
|
|
import type { CrawlRotator, Proxy } from '../../services/crawl-rotator';
|
|
|
|
let currentProxy: string | null = null;
|
|
let crawlRotator: CrawlRotator | null = null;
|
|
|
|
/**
|
|
* Set proxy for all Dutchie requests
|
|
* Format: http://user:pass@host:port or socks5://host:port
|
|
*/
|
|
export function setProxy(proxy: string | null): void {
|
|
currentProxy = proxy;
|
|
if (proxy) {
|
|
console.log(`[Dutchie Client] Proxy set: ${proxy.replace(/:[^:@]+@/, ':***@')}`);
|
|
} else {
|
|
console.log('[Dutchie Client] Proxy disabled (direct connection)');
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get current proxy URL
|
|
*/
|
|
export function getProxy(): string | null {
|
|
return currentProxy;
|
|
}
|
|
|
|
/**
|
|
* Set CrawlRotator for proxy rotation on 403s
|
|
* This enables automatic proxy rotation when blocked
|
|
*/
|
|
export function setCrawlRotator(rotator: CrawlRotator | null): void {
|
|
crawlRotator = rotator;
|
|
if (rotator) {
|
|
console.log('[Dutchie Client] CrawlRotator attached - proxy rotation enabled');
|
|
// Set initial proxy from rotator
|
|
const proxy = rotator.proxy.getCurrent();
|
|
if (proxy) {
|
|
currentProxy = rotator.proxy.getProxyUrl(proxy);
|
|
console.log(`[Dutchie Client] Initial proxy: ${currentProxy.replace(/:[^:@]+@/, ':***@')}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get attached CrawlRotator
|
|
*/
|
|
export function getCrawlRotator(): CrawlRotator | null {
|
|
return crawlRotator;
|
|
}
|
|
|
|
/**
|
|
* Rotate to next proxy (called on 403)
|
|
*/
|
|
async function rotateProxyOn403(error?: string): Promise<boolean> {
|
|
if (!crawlRotator) {
|
|
return false;
|
|
}
|
|
|
|
// Record failure on current proxy
|
|
await crawlRotator.recordFailure(error || '403 Forbidden');
|
|
|
|
// Rotate to next proxy
|
|
const nextProxy = crawlRotator.rotateProxy();
|
|
if (nextProxy) {
|
|
currentProxy = crawlRotator.proxy.getProxyUrl(nextProxy);
|
|
console.log(`[Dutchie Client] Rotated proxy: ${currentProxy.replace(/:[^:@]+@/, ':***@')}`);
|
|
return true;
|
|
}
|
|
|
|
console.warn('[Dutchie Client] No more proxies available');
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Record success on current proxy
|
|
*/
|
|
async function recordProxySuccess(responseTimeMs?: number): Promise<void> {
|
|
if (crawlRotator) {
|
|
await crawlRotator.recordSuccess(responseTimeMs);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Build curl proxy argument
|
|
*/
|
|
function getProxyArg(): string {
|
|
if (!currentProxy) return '';
|
|
return `--proxy '${currentProxy}'`;
|
|
}
|
|
|
|
export const GRAPHQL_HASHES = {
|
|
FilteredProducts: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0',
|
|
GetAddressBasedDispensaryData: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b',
|
|
ConsumerDispensaries: '0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b',
|
|
DispensaryInfo: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b',
|
|
GetAllCitiesByState: 'ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6',
|
|
};
|
|
|
|
// ============================================================
|
|
// FINGERPRINTS - Browser profiles for anti-detect
|
|
// ============================================================
|
|
|
|
const FINGERPRINTS: Fingerprint[] = [
|
|
// Chrome Windows (latest) - typical residential user, use first
|
|
{
|
|
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
|
acceptLanguage: 'en-US,en;q=0.9',
|
|
secChUa: '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
|
secChUaPlatform: '"Windows"',
|
|
secChUaMobile: '?0',
|
|
},
|
|
// Chrome Mac (latest)
|
|
{
|
|
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
|
acceptLanguage: 'en-US,en;q=0.9',
|
|
secChUa: '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
|
secChUaPlatform: '"macOS"',
|
|
secChUaMobile: '?0',
|
|
},
|
|
// Chrome Windows (120)
|
|
{
|
|
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
acceptLanguage: 'en-US,en;q=0.9',
|
|
secChUa: '"Chromium";v="120", "Google Chrome";v="120", "Not-A.Brand";v="99"',
|
|
secChUaPlatform: '"Windows"',
|
|
secChUaMobile: '?0',
|
|
},
|
|
// Firefox Windows
|
|
{
|
|
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0',
|
|
acceptLanguage: 'en-US,en;q=0.5',
|
|
},
|
|
// Safari Mac
|
|
{
|
|
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15',
|
|
acceptLanguage: 'en-US,en;q=0.9',
|
|
},
|
|
// Edge Windows
|
|
{
|
|
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0',
|
|
acceptLanguage: 'en-US,en;q=0.9',
|
|
secChUa: '"Microsoft Edge";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
|
secChUaPlatform: '"Windows"',
|
|
secChUaMobile: '?0',
|
|
},
|
|
];
|
|
|
|
let currentFingerprintIndex = 0;
|
|
|
|
// Forward declaration for session (actual CrawlSession interface defined later)
|
|
let currentSession: {
|
|
sessionId: string;
|
|
fingerprint: Fingerprint;
|
|
proxyUrl: string | null;
|
|
stateCode?: string;
|
|
timezone?: string;
|
|
startedAt: Date;
|
|
} | null = null;
|
|
|
|
/**
|
|
* Get current fingerprint - returns session fingerprint if active, otherwise default
|
|
*/
|
|
export function getFingerprint(): Fingerprint {
|
|
// Use session fingerprint if a session is active
|
|
if (currentSession) {
|
|
return currentSession.fingerprint;
|
|
}
|
|
return FINGERPRINTS[currentFingerprintIndex];
|
|
}
|
|
|
|
export function rotateFingerprint(): Fingerprint {
|
|
currentFingerprintIndex = (currentFingerprintIndex + 1) % FINGERPRINTS.length;
|
|
const fp = FINGERPRINTS[currentFingerprintIndex];
|
|
console.log(`[Dutchie Client] Rotated to fingerprint: ${fp.userAgent.slice(0, 50)}...`);
|
|
return fp;
|
|
}
|
|
|
|
export function resetFingerprint(): void {
|
|
currentFingerprintIndex = 0;
|
|
}
|
|
|
|
/**
|
|
* Get a random fingerprint from the pool
|
|
*/
|
|
export function getRandomFingerprint(): Fingerprint {
|
|
const index = Math.floor(Math.random() * FINGERPRINTS.length);
|
|
return FINGERPRINTS[index];
|
|
}
|
|
|
|
// ============================================================
|
|
// SESSION MANAGEMENT
|
|
// Per-session fingerprint rotation for stealth
|
|
// ============================================================
|
|
|
|
export interface CrawlSession {
|
|
sessionId: string;
|
|
fingerprint: Fingerprint;
|
|
proxyUrl: string | null;
|
|
stateCode?: string;
|
|
timezone?: string;
|
|
startedAt: Date;
|
|
}
|
|
|
|
// Note: currentSession variable declared earlier in file for proper scoping
|
|
|
|
/**
|
|
* Timezone to Accept-Language mapping
|
|
* US timezones all use en-US but this can be extended for international
|
|
*/
|
|
const TIMEZONE_TO_LOCALE: Record<string, string> = {
|
|
'America/Phoenix': 'en-US,en;q=0.9',
|
|
'America/Los_Angeles': 'en-US,en;q=0.9',
|
|
'America/Denver': 'en-US,en;q=0.9',
|
|
'America/Chicago': 'en-US,en;q=0.9',
|
|
'America/New_York': 'en-US,en;q=0.9',
|
|
'America/Detroit': 'en-US,en;q=0.9',
|
|
'America/Anchorage': 'en-US,en;q=0.9',
|
|
'Pacific/Honolulu': 'en-US,en;q=0.9',
|
|
};
|
|
|
|
/**
|
|
* Get Accept-Language header for a given timezone
|
|
*/
|
|
export function getLocaleForTimezone(timezone?: string): string {
|
|
if (!timezone) return 'en-US,en;q=0.9';
|
|
return TIMEZONE_TO_LOCALE[timezone] || 'en-US,en;q=0.9';
|
|
}
|
|
|
|
/**
|
|
* Start a new crawl session with a random fingerprint
|
|
* Call this before crawling a store to get a fresh identity
|
|
*/
|
|
export function startSession(stateCode?: string, timezone?: string): CrawlSession {
|
|
const baseFp = getRandomFingerprint();
|
|
|
|
// Override Accept-Language based on timezone for geographic consistency
|
|
const fingerprint: Fingerprint = {
|
|
...baseFp,
|
|
acceptLanguage: getLocaleForTimezone(timezone),
|
|
};
|
|
|
|
currentSession = {
|
|
sessionId: `session_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
|
|
fingerprint,
|
|
proxyUrl: currentProxy,
|
|
stateCode,
|
|
timezone,
|
|
startedAt: new Date(),
|
|
};
|
|
|
|
console.log(`[Dutchie Client] Started session ${currentSession.sessionId}`);
|
|
console.log(`[Dutchie Client] Fingerprint: ${fingerprint.userAgent.slice(0, 50)}...`);
|
|
console.log(`[Dutchie Client] Accept-Language: ${fingerprint.acceptLanguage}`);
|
|
if (timezone) {
|
|
console.log(`[Dutchie Client] Timezone: ${timezone}`);
|
|
}
|
|
|
|
return currentSession;
|
|
}
|
|
|
|
/**
|
|
* End the current crawl session
|
|
*/
|
|
export function endSession(): void {
|
|
if (currentSession) {
|
|
const duration = Math.round((Date.now() - currentSession.startedAt.getTime()) / 1000);
|
|
console.log(`[Dutchie Client] Ended session ${currentSession.sessionId} (${duration}s)`);
|
|
currentSession = null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get current active session
|
|
*/
|
|
export function getCurrentSession(): CrawlSession | null {
|
|
return currentSession;
|
|
}
|
|
|
|
// ============================================================
|
|
// CURL HTTP CLIENT
|
|
// ============================================================
|
|
|
|
/**
|
|
* Build headers for Dutchie requests
|
|
*/
|
|
export function buildHeaders(refererPath: string, fingerprint?: Fingerprint): Record<string, string> {
|
|
const fp = fingerprint || getFingerprint();
|
|
const refererUrl = `https://dutchie.com${refererPath}`;
|
|
|
|
const headers: Record<string, string> = {
|
|
'accept': 'application/json, text/plain, */*',
|
|
'accept-language': fp.acceptLanguage,
|
|
'content-type': 'application/json',
|
|
'origin': 'https://dutchie.com',
|
|
'referer': refererUrl,
|
|
'user-agent': fp.userAgent,
|
|
'apollographql-client-name': 'Marketplace (production)',
|
|
};
|
|
|
|
if (fp.secChUa) {
|
|
headers['sec-ch-ua'] = fp.secChUa;
|
|
headers['sec-ch-ua-mobile'] = fp.secChUaMobile || '?0';
|
|
headers['sec-ch-ua-platform'] = fp.secChUaPlatform || '"Windows"';
|
|
headers['sec-fetch-dest'] = 'empty';
|
|
headers['sec-fetch-mode'] = 'cors';
|
|
headers['sec-fetch-site'] = 'same-site';
|
|
}
|
|
|
|
return headers;
|
|
}
|
|
|
|
/**
|
|
* Execute HTTP POST using curl (bypasses TLS fingerprinting)
|
|
*/
|
|
export function curlPost(url: string, body: any, headers: Record<string, string>, timeout = 30000): CurlResponse {
|
|
const filteredHeaders = Object.entries(headers)
|
|
.filter(([k]) => k.toLowerCase() !== 'accept-encoding')
|
|
.map(([k, v]) => `-H '${k}: ${v}'`)
|
|
.join(' ');
|
|
|
|
const bodyJson = JSON.stringify(body).replace(/'/g, "'\\''");
|
|
const timeoutSec = Math.ceil(timeout / 1000);
|
|
const separator = '___HTTP_STATUS___';
|
|
const proxyArg = getProxyArg();
|
|
const cmd = `curl -s --compressed ${proxyArg} -w '${separator}%{http_code}' --max-time ${timeoutSec} ${filteredHeaders} -d '${bodyJson}' '${url}'`;
|
|
|
|
try {
|
|
const output = execSync(cmd, {
|
|
encoding: 'utf-8',
|
|
maxBuffer: 10 * 1024 * 1024,
|
|
timeout: timeout + 5000
|
|
});
|
|
|
|
const separatorIndex = output.lastIndexOf(separator);
|
|
if (separatorIndex === -1) {
|
|
const lines = output.trim().split('\n');
|
|
const statusCode = parseInt(lines.pop() || '0', 10);
|
|
const responseBody = lines.join('\n');
|
|
try {
|
|
return { status: statusCode, data: JSON.parse(responseBody) };
|
|
} catch {
|
|
return { status: statusCode, data: responseBody };
|
|
}
|
|
}
|
|
|
|
const responseBody = output.slice(0, separatorIndex);
|
|
const statusCode = parseInt(output.slice(separatorIndex + separator.length).trim(), 10);
|
|
|
|
try {
|
|
return { status: statusCode, data: JSON.parse(responseBody) };
|
|
} catch {
|
|
return { status: statusCode, data: responseBody };
|
|
}
|
|
} catch (error: any) {
|
|
return {
|
|
status: 0,
|
|
data: null,
|
|
error: error.message || 'curl request failed'
|
|
};
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Execute HTTP GET using curl (bypasses TLS fingerprinting)
|
|
* Returns HTML or JSON depending on response content-type
|
|
*/
|
|
export function curlGet(url: string, headers: Record<string, string>, timeout = 30000): CurlResponse {
|
|
const filteredHeaders = Object.entries(headers)
|
|
.filter(([k]) => k.toLowerCase() !== 'accept-encoding')
|
|
.map(([k, v]) => `-H '${k}: ${v}'`)
|
|
.join(' ');
|
|
|
|
const timeoutSec = Math.ceil(timeout / 1000);
|
|
const separator = '___HTTP_STATUS___';
|
|
const proxyArg = getProxyArg();
|
|
const cmd = `curl -s --compressed ${proxyArg} -w '${separator}%{http_code}' --max-time ${timeoutSec} ${filteredHeaders} '${url}'`;
|
|
|
|
try {
|
|
const output = execSync(cmd, {
|
|
encoding: 'utf-8',
|
|
maxBuffer: 10 * 1024 * 1024,
|
|
timeout: timeout + 5000
|
|
});
|
|
|
|
const separatorIndex = output.lastIndexOf(separator);
|
|
if (separatorIndex === -1) {
|
|
const lines = output.trim().split('\n');
|
|
const statusCode = parseInt(lines.pop() || '0', 10);
|
|
const responseBody = lines.join('\n');
|
|
return { status: statusCode, data: responseBody };
|
|
}
|
|
|
|
const responseBody = output.slice(0, separatorIndex);
|
|
const statusCode = parseInt(output.slice(separatorIndex + separator.length).trim(), 10);
|
|
|
|
// Try to parse as JSON, otherwise return as string (HTML)
|
|
try {
|
|
return { status: statusCode, data: JSON.parse(responseBody) };
|
|
} catch {
|
|
return { status: statusCode, data: responseBody };
|
|
}
|
|
} catch (error: any) {
|
|
return {
|
|
status: 0,
|
|
data: null,
|
|
error: error.message || 'curl request failed'
|
|
};
|
|
}
|
|
}
|
|
|
|
// ============================================================
|
|
// GRAPHQL EXECUTION
|
|
// ============================================================
|
|
|
|
export interface ExecuteGraphQLOptions {
|
|
maxRetries?: number;
|
|
retryOn403?: boolean;
|
|
cName?: string; // Optional - used for Referer header, defaults to 'cities'
|
|
}
|
|
|
|
/**
|
|
* Execute GraphQL query with curl (bypasses TLS fingerprinting)
|
|
*/
|
|
export async function executeGraphQL(
|
|
operationName: string,
|
|
variables: any,
|
|
hash: string,
|
|
options: ExecuteGraphQLOptions
|
|
): Promise<any> {
|
|
const { maxRetries = 3, retryOn403 = true, cName = 'cities' } = options;
|
|
|
|
const body = {
|
|
operationName,
|
|
variables,
|
|
extensions: {
|
|
persistedQuery: { version: 1, sha256Hash: hash },
|
|
},
|
|
};
|
|
|
|
let lastError: Error | null = null;
|
|
let attempt = 0;
|
|
|
|
while (attempt <= maxRetries) {
|
|
const fingerprint = getFingerprint();
|
|
const headers = buildHeaders(`/embedded-menu/${cName}`, fingerprint);
|
|
|
|
console.log(`[Dutchie Client] curl POST ${operationName} (attempt ${attempt + 1}/${maxRetries + 1})`);
|
|
|
|
const response = curlPost(DUTCHIE_CONFIG.graphqlEndpoint, body, headers, DUTCHIE_CONFIG.timeout);
|
|
|
|
console.log(`[Dutchie Client] Response status: ${response.status}`);
|
|
|
|
if (response.error) {
|
|
console.error(`[Dutchie Client] curl error: ${response.error}`);
|
|
lastError = new Error(response.error);
|
|
attempt++;
|
|
if (attempt <= maxRetries) {
|
|
await sleep(1000 * attempt);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (response.status === 200) {
|
|
if (response.data?.errors?.length > 0) {
|
|
console.warn(`[Dutchie Client] GraphQL errors: ${JSON.stringify(response.data.errors[0])}`);
|
|
}
|
|
return response.data;
|
|
}
|
|
|
|
if (response.status === 403 && retryOn403) {
|
|
console.warn(`[Dutchie Client] 403 blocked - rotating proxy and fingerprint...`);
|
|
await rotateProxyOn403('403 Forbidden on GraphQL');
|
|
rotateFingerprint();
|
|
attempt++;
|
|
await sleep(1000 * attempt);
|
|
continue;
|
|
}
|
|
|
|
const bodyPreview = typeof response.data === 'string'
|
|
? response.data.slice(0, 200)
|
|
: JSON.stringify(response.data).slice(0, 200);
|
|
console.error(`[Dutchie Client] HTTP ${response.status}: ${bodyPreview}`);
|
|
lastError = new Error(`HTTP ${response.status}`);
|
|
|
|
attempt++;
|
|
if (attempt <= maxRetries) {
|
|
await sleep(1000 * attempt);
|
|
}
|
|
}
|
|
|
|
throw lastError || new Error('Max retries exceeded');
|
|
}
|
|
|
|
// ============================================================
|
|
// HTML PAGE FETCHING
|
|
// ============================================================
|
|
|
|
export interface FetchPageOptions {
|
|
maxRetries?: number;
|
|
retryOn403?: boolean;
|
|
}
|
|
|
|
/**
|
|
* Fetch HTML page from Dutchie (for city pages, dispensary pages, etc.)
|
|
* Returns raw HTML string
|
|
*/
|
|
export async function fetchPage(
|
|
path: string,
|
|
options: FetchPageOptions = {}
|
|
): Promise<{ html: string; status: number } | null> {
|
|
const { maxRetries = 3, retryOn403 = true } = options;
|
|
const url = `${DUTCHIE_CONFIG.baseUrl}${path}`;
|
|
|
|
let attempt = 0;
|
|
|
|
while (attempt <= maxRetries) {
|
|
const fingerprint = getFingerprint();
|
|
const headers: Record<string, string> = {
|
|
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
|
'accept-language': fingerprint.acceptLanguage,
|
|
'user-agent': fingerprint.userAgent,
|
|
};
|
|
|
|
if (fingerprint.secChUa) {
|
|
headers['sec-ch-ua'] = fingerprint.secChUa;
|
|
headers['sec-ch-ua-mobile'] = fingerprint.secChUaMobile || '?0';
|
|
headers['sec-ch-ua-platform'] = fingerprint.secChUaPlatform || '"Windows"';
|
|
headers['sec-fetch-dest'] = 'document';
|
|
headers['sec-fetch-mode'] = 'navigate';
|
|
headers['sec-fetch-site'] = 'none';
|
|
headers['sec-fetch-user'] = '?1';
|
|
headers['upgrade-insecure-requests'] = '1';
|
|
}
|
|
|
|
console.log(`[Dutchie Client] curl GET ${path} (attempt ${attempt + 1}/${maxRetries + 1})`);
|
|
|
|
const response = curlGet(url, headers, DUTCHIE_CONFIG.timeout);
|
|
|
|
console.log(`[Dutchie Client] Response status: ${response.status}`);
|
|
|
|
if (response.error) {
|
|
console.error(`[Dutchie Client] curl error: ${response.error}`);
|
|
attempt++;
|
|
if (attempt <= maxRetries) {
|
|
await sleep(1000 * attempt);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (response.status === 200) {
|
|
return { html: response.data, status: response.status };
|
|
}
|
|
|
|
if (response.status === 403 && retryOn403) {
|
|
console.warn(`[Dutchie Client] 403 blocked - rotating proxy and fingerprint...`);
|
|
await rotateProxyOn403('403 Forbidden on page fetch');
|
|
rotateFingerprint();
|
|
attempt++;
|
|
await sleep(1000 * attempt);
|
|
continue;
|
|
}
|
|
|
|
console.error(`[Dutchie Client] HTTP ${response.status}`);
|
|
attempt++;
|
|
if (attempt <= maxRetries) {
|
|
await sleep(1000 * attempt);
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Extract __NEXT_DATA__ from HTML page
|
|
*/
|
|
export function extractNextData(html: string): any | null {
|
|
const match = html.match(/<script id="__NEXT_DATA__" type="application\/json">([^<]+)<\/script>/);
|
|
if (match && match[1]) {
|
|
try {
|
|
return JSON.parse(match[1]);
|
|
} catch (e) {
|
|
console.error('[Dutchie Client] Failed to parse __NEXT_DATA__:', e);
|
|
return null;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
// ============================================================
|
|
// UTILITY
|
|
// ============================================================
|
|
|
|
function sleep(ms: number): Promise<void> {
|
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
}
|