feat(images): Add local image storage with on-demand resizing

- Store product images locally with hierarchy: /images/products/<state>/<store>/<brand>/<product>/
- Add /img/* proxy endpoint for on-demand resizing via Sharp
- Implement per-product image checking to skip existing downloads
- Fix pathToUrl() to correctly generate /images/... URLs
- Add frontend getImageUrl() helper with preset sizes (thumb, medium, large)
- Update all product pages to use optimized image URLs
- Add stealth session support for Dutchie GraphQL crawls
- Include test scripts for crawl and image verification

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-09 11:04:42 -07:00
parent aa776226b0
commit 91efd1d03d
28 changed files with 2027 additions and 205 deletions

View File

@@ -213,7 +213,24 @@ const FINGERPRINTS: Fingerprint[] = [
let currentFingerprintIndex = 0;
// Forward declaration for session (actual CrawlSession interface defined later)
let currentSession: {
sessionId: string;
fingerprint: Fingerprint;
proxyUrl: string | null;
stateCode?: string;
timezone?: string;
startedAt: Date;
} | null = null;
/**
* Get current fingerprint - returns session fingerprint if active, otherwise default
*/
export function getFingerprint(): Fingerprint {
// Use session fingerprint if a session is active
if (currentSession) {
return currentSession.fingerprint;
}
return FINGERPRINTS[currentFingerprintIndex];
}
@@ -228,6 +245,103 @@ export function resetFingerprint(): void {
currentFingerprintIndex = 0;
}
/**
* Get a random fingerprint from the pool
*/
export function getRandomFingerprint(): Fingerprint {
const index = Math.floor(Math.random() * FINGERPRINTS.length);
return FINGERPRINTS[index];
}
// ============================================================
// SESSION MANAGEMENT
// Per-session fingerprint rotation for stealth
// ============================================================
export interface CrawlSession {
sessionId: string;
fingerprint: Fingerprint;
proxyUrl: string | null;
stateCode?: string;
timezone?: string;
startedAt: Date;
}
// Note: currentSession variable declared earlier in file for proper scoping
/**
* Timezone to Accept-Language mapping
* US timezones all use en-US but this can be extended for international
*/
const TIMEZONE_TO_LOCALE: Record<string, string> = {
'America/Phoenix': 'en-US,en;q=0.9',
'America/Los_Angeles': 'en-US,en;q=0.9',
'America/Denver': 'en-US,en;q=0.9',
'America/Chicago': 'en-US,en;q=0.9',
'America/New_York': 'en-US,en;q=0.9',
'America/Detroit': 'en-US,en;q=0.9',
'America/Anchorage': 'en-US,en;q=0.9',
'Pacific/Honolulu': 'en-US,en;q=0.9',
};
/**
* Get Accept-Language header for a given timezone
*/
export function getLocaleForTimezone(timezone?: string): string {
if (!timezone) return 'en-US,en;q=0.9';
return TIMEZONE_TO_LOCALE[timezone] || 'en-US,en;q=0.9';
}
/**
* Start a new crawl session with a random fingerprint
* Call this before crawling a store to get a fresh identity
*/
export function startSession(stateCode?: string, timezone?: string): CrawlSession {
const baseFp = getRandomFingerprint();
// Override Accept-Language based on timezone for geographic consistency
const fingerprint: Fingerprint = {
...baseFp,
acceptLanguage: getLocaleForTimezone(timezone),
};
currentSession = {
sessionId: `session_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
fingerprint,
proxyUrl: currentProxy,
stateCode,
timezone,
startedAt: new Date(),
};
console.log(`[Dutchie Client] Started session ${currentSession.sessionId}`);
console.log(`[Dutchie Client] Fingerprint: ${fingerprint.userAgent.slice(0, 50)}...`);
console.log(`[Dutchie Client] Accept-Language: ${fingerprint.acceptLanguage}`);
if (timezone) {
console.log(`[Dutchie Client] Timezone: ${timezone}`);
}
return currentSession;
}
/**
* End the current crawl session
*/
export function endSession(): void {
if (currentSession) {
const duration = Math.round((Date.now() - currentSession.startedAt.getTime()) / 1000);
console.log(`[Dutchie Client] Ended session ${currentSession.sessionId} (${duration}s)`);
currentSession = null;
}
}
/**
* Get current active session
*/
export function getCurrentSession(): CrawlSession | null {
return currentSession;
}
// ============================================================
// CURL HTTP CLIENT
// ============================================================