feat(images): Add local image storage with on-demand resizing

- Store product images locally with hierarchy: /images/products/<state>/<store>/<brand>/<product>/
- Add /img/* proxy endpoint for on-demand resizing via Sharp
- Implement per-product image checking to skip existing downloads
- Fix pathToUrl() to correctly generate /images/... URLs
- Add frontend getImageUrl() helper with preset sizes (thumb, medium, large)
- Update all product pages to use optimized image URLs
- Add stealth session support for Dutchie GraphQL crawls
- Include test scripts for crawl and image verification

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-09 11:04:42 -07:00
parent aa776226b0
commit 91efd1d03d
28 changed files with 2027 additions and 205 deletions

View File

@@ -0,0 +1,250 @@
#!/usr/bin/env npx tsx
/**
* Crawl Single Store - Verbose test showing each step
*
* Usage:
* DATABASE_URL="postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \
* npx tsx src/scripts/crawl-single-store.ts <dispensaryId>
*
* Example:
* DATABASE_URL="..." npx tsx src/scripts/crawl-single-store.ts 112
*/
import { Pool } from 'pg';
import dotenv from 'dotenv';
import {
executeGraphQL,
startSession,
endSession,
getFingerprint,
GRAPHQL_HASHES,
DUTCHIE_CONFIG,
} from '../platforms/dutchie';
dotenv.config();
// ============================================================
// DATABASE CONNECTION
// ============================================================
function getConnectionString(): string {
if (process.env.DATABASE_URL) {
return process.env.DATABASE_URL;
}
if (process.env.CANNAIQ_DB_URL) {
return process.env.CANNAIQ_DB_URL;
}
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
const port = process.env.CANNAIQ_DB_PORT || '54320';
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
}
const pool = new Pool({ connectionString: getConnectionString() });
// ============================================================
// MAIN
// ============================================================
async function main() {
const dispensaryId = parseInt(process.argv[2], 10);
if (!dispensaryId) {
console.error('Usage: npx tsx src/scripts/crawl-single-store.ts <dispensaryId>');
console.error('Example: npx tsx src/scripts/crawl-single-store.ts 112');
process.exit(1);
}
console.log('');
console.log('╔════════════════════════════════════════════════════════════╗');
console.log('║ SINGLE STORE CRAWL - VERBOSE OUTPUT ║');
console.log('╚════════════════════════════════════════════════════════════╝');
console.log('');
try {
// ============================================================
// STEP 1: Get dispensary info from database
// ============================================================
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 1: Load Dispensary Info from Database │');
console.log('└─────────────────────────────────────────────────────────────┘');
const dispResult = await pool.query(`
SELECT
id,
name,
platform_dispensary_id,
menu_url,
menu_type,
city,
state
FROM dispensaries
WHERE id = $1
`, [dispensaryId]);
if (dispResult.rows.length === 0) {
throw new Error(`Dispensary ${dispensaryId} not found`);
}
const disp = dispResult.rows[0];
console.log(` Dispensary ID: ${disp.id}`);
console.log(` Name: ${disp.name}`);
console.log(` City, State: ${disp.city}, ${disp.state}`);
console.log(` Menu Type: ${disp.menu_type}`);
console.log(` Platform ID: ${disp.platform_dispensary_id}`);
console.log(` Menu URL: ${disp.menu_url}`);
if (!disp.platform_dispensary_id) {
throw new Error('Dispensary does not have a platform_dispensary_id - cannot crawl');
}
// Extract cName from menu_url
const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
console.log(` cName (derived): ${cName}`);
console.log('');
// ============================================================
// STEP 2: Start stealth session
// ============================================================
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 2: Start Stealth Session │');
console.log('└─────────────────────────────────────────────────────────────┘');
// Use Arizona timezone for this store
const session = startSession(disp.state || 'AZ', 'America/Phoenix');
const fp = getFingerprint();
console.log(` Session ID: ${session.sessionId}`);
console.log(` User-Agent: ${fp.userAgent.slice(0, 60)}...`);
console.log(` Accept-Language: ${fp.acceptLanguage}`);
console.log(` Sec-CH-UA: ${fp.secChUa || '(not set)'}`);
console.log('');
// ============================================================
// STEP 3: Execute GraphQL query
// ============================================================
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 3: Execute GraphQL Query (FilteredProducts) │');
console.log('└─────────────────────────────────────────────────────────────┘');
const variables = {
includeEnterpriseSpecials: false,
productsFilter: {
dispensaryId: disp.platform_dispensary_id,
pricingType: 'rec',
Status: 'Active',
types: [],
useCache: true,
isDefaultSort: true,
sortBy: 'popularSortIdx',
sortDirection: 1,
bypassOnlineThresholds: true,
isKioskMenu: false,
removeProductsBelowOptionThresholds: false,
},
page: 0,
perPage: 100,
};
console.log(` Endpoint: ${DUTCHIE_CONFIG.graphqlEndpoint}`);
console.log(` Operation: FilteredProducts`);
console.log(` Hash: ${GRAPHQL_HASHES.FilteredProducts.slice(0, 20)}...`);
console.log(` dispensaryId: ${variables.productsFilter.dispensaryId}`);
console.log(` pricingType: ${variables.productsFilter.pricingType}`);
console.log(` Status: ${variables.productsFilter.Status}`);
console.log(` perPage: ${variables.perPage}`);
console.log('');
console.log(' Sending request...');
const startTime = Date.now();
const result = await executeGraphQL(
'FilteredProducts',
variables,
GRAPHQL_HASHES.FilteredProducts,
{ cName, maxRetries: 3 }
);
const elapsed = Date.now() - startTime;
console.log(` Response time: ${elapsed}ms`);
console.log('');
// ============================================================
// STEP 4: Process response
// ============================================================
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 4: Process Response │');
console.log('└─────────────────────────────────────────────────────────────┘');
const data = result?.data?.filteredProducts;
if (!data) {
console.log(' ERROR: No data returned from GraphQL');
console.log(' Raw result:', JSON.stringify(result, null, 2).slice(0, 500));
endSession();
return;
}
const products = data.products || [];
const totalCount = data.queryInfo?.totalCount || 0;
const totalPages = Math.ceil(totalCount / 100);
console.log(` Total products: ${totalCount}`);
console.log(` Products in page: ${products.length}`);
console.log(` Total pages: ${totalPages}`);
console.log('');
// Show first few products
console.log(' First 5 products:');
console.log(' ─────────────────────────────────────────────────────────');
for (let i = 0; i < Math.min(5, products.length); i++) {
const p = products[i];
const name = (p.name || 'Unknown').slice(0, 40);
const brand = (p.brand?.name || 'Unknown').slice(0, 15);
const price = p.Prices?.[0]?.price || p.medPrice || p.recPrice || 'N/A';
const category = p.type || p.category || 'N/A';
console.log(` ${i + 1}. ${name.padEnd(42)} | ${brand.padEnd(17)} | $${price}`);
}
console.log('');
// ============================================================
// STEP 5: End session
// ============================================================
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 5: End Session │');
console.log('└─────────────────────────────────────────────────────────────┘');
endSession();
console.log('');
// ============================================================
// SUMMARY
// ============================================================
console.log('╔════════════════════════════════════════════════════════════╗');
console.log('║ SUMMARY ║');
console.log('╠════════════════════════════════════════════════════════════╣');
console.log(`║ Store: ${disp.name.slice(0, 38).padEnd(38)}`);
console.log(`║ Products Found: ${String(totalCount).padEnd(38)}`);
console.log(`║ Response Time: ${(elapsed + 'ms').padEnd(38)}`);
console.log(`║ Status: ${'SUCCESS'.padEnd(38)}`);
console.log('╚════════════════════════════════════════════════════════════╝');
} catch (error: any) {
console.error('');
console.error('╔════════════════════════════════════════════════════════════╗');
console.error('║ ERROR ║');
console.error('╚════════════════════════════════════════════════════════════╝');
console.error(` ${error.message}`);
if (error.stack) {
console.error('');
console.error('Stack trace:');
console.error(error.stack.split('\n').slice(0, 5).join('\n'));
}
process.exit(1);
} finally {
await pool.end();
}
}
main();

View File

@@ -23,6 +23,7 @@ import {
DutchieNormalizer,
hydrateToCanonical,
} from '../hydration';
import { initializeImageStorage } from '../utils/image-storage';
dotenv.config();
@@ -137,6 +138,11 @@ async function main() {
console.log(`Test Crawl to Canonical - Dispensary ${dispensaryId}`);
console.log('============================================================\n');
// Initialize image storage
console.log('[Init] Initializing image storage...');
await initializeImageStorage();
console.log(' Image storage ready\n');
try {
// Step 1: Get dispensary info
console.log('[Step 1] Getting dispensary info...');

View File

@@ -0,0 +1,268 @@
#!/usr/bin/env npx tsx
/**
* Test Image Download - Tests image downloading with a small batch of products
*
* Usage:
* DATABASE_URL="postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \
* STORAGE_DRIVER=local STORAGE_BASE_PATH=./storage \
* npx tsx src/scripts/test-image-download.ts <dispensaryId> [limit]
*
* Example:
* DATABASE_URL="..." npx tsx src/scripts/test-image-download.ts 112 5
*/
import { Pool } from 'pg';
import dotenv from 'dotenv';
import {
executeGraphQL,
startSession,
endSession,
GRAPHQL_HASHES,
} from '../platforms/dutchie';
import { DutchieNormalizer } from '../hydration/normalizers/dutchie';
import { hydrateToCanonical } from '../hydration/canonical-upsert';
import { initializeImageStorage, getStorageStats } from '../utils/image-storage';
dotenv.config();
// ============================================================
// DATABASE CONNECTION
// ============================================================
function getConnectionString(): string {
if (process.env.DATABASE_URL) {
return process.env.DATABASE_URL;
}
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
const port = process.env.CANNAIQ_DB_PORT || '54320';
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
}
const pool = new Pool({ connectionString: getConnectionString() });
// ============================================================
// MAIN
// ============================================================
async function main() {
const dispensaryId = parseInt(process.argv[2], 10);
const limit = parseInt(process.argv[3], 10) || 5;
if (!dispensaryId) {
console.error('Usage: npx tsx src/scripts/test-image-download.ts <dispensaryId> [limit]');
console.error('Example: npx tsx src/scripts/test-image-download.ts 112 5');
process.exit(1);
}
console.log('');
console.log('╔════════════════════════════════════════════════════════════╗');
console.log('║ IMAGE DOWNLOAD TEST ║');
console.log('╚════════════════════════════════════════════════════════════╝');
console.log('');
try {
// Initialize image storage
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 1: Initialize Image Storage │');
console.log('└─────────────────────────────────────────────────────────────┘');
await initializeImageStorage();
const statsBefore = await getStorageStats();
console.log(` Base path: ${statsBefore.basePath}`);
console.log(` Products before: ${statsBefore.productCount}`);
console.log(` Brands before: ${statsBefore.brandCount}`);
console.log('');
// Get dispensary info
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 2: Load Dispensary Info │');
console.log('└─────────────────────────────────────────────────────────────┘');
const dispResult = await pool.query(`
SELECT
id, name, platform_dispensary_id, menu_url, state, slug
FROM dispensaries
WHERE id = $1
`, [dispensaryId]);
if (dispResult.rows.length === 0) {
throw new Error(`Dispensary ${dispensaryId} not found`);
}
const disp = dispResult.rows[0];
console.log(` Dispensary: ${disp.name}`);
console.log(` State: ${disp.state}`);
console.log(` Slug: ${disp.slug}`);
console.log(` Platform ID: ${disp.platform_dispensary_id}`);
console.log('');
// Delete some existing store_products to force "new" products
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 3: Clear Store Products (to test new product flow) │');
console.log('└─────────────────────────────────────────────────────────────┘');
const deleteResult = await pool.query(`
DELETE FROM store_products
WHERE dispensary_id = $1
RETURNING id
`, [dispensaryId]);
console.log(` Deleted ${deleteResult.rowCount} existing store_products`);
console.log('');
// Fetch products from Dutchie
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 4: Fetch Products from Dutchie (limited) │');
console.log('└─────────────────────────────────────────────────────────────┘');
const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
const session = startSession(disp.state || 'AZ', 'America/Phoenix');
console.log(` Session ID: ${session.sessionId}`);
console.log(` cName: ${cName}`);
console.log(` Limit: ${limit} products`);
const variables = {
includeEnterpriseSpecials: false,
productsFilter: {
dispensaryId: disp.platform_dispensary_id,
pricingType: 'rec',
Status: 'Active',
types: [],
useCache: true,
isDefaultSort: true,
sortBy: 'popularSortIdx',
sortDirection: 1,
bypassOnlineThresholds: true,
isKioskMenu: false,
removeProductsBelowOptionThresholds: false,
},
page: 0,
perPage: limit, // Only fetch limited products
};
const startTime = Date.now();
const result = await executeGraphQL(
'FilteredProducts',
variables,
GRAPHQL_HASHES.FilteredProducts,
{ cName, maxRetries: 3 }
);
const elapsed = Date.now() - startTime;
endSession();
const products = result?.data?.filteredProducts?.products || [];
console.log(` Fetched: ${products.length} products in ${elapsed}ms`);
// Show products with images
console.log('');
console.log(' Products with images:');
for (let i = 0; i < products.length; i++) {
const p = products[i];
const hasImage = !!p.Image;
const brandName = p.brand?.name || 'Unknown';
console.log(` ${i + 1}. ${p.name?.slice(0, 40).padEnd(42)} | ${brandName.slice(0, 15).padEnd(17)} | ${hasImage ? '✓ has image' : '✗ no image'}`);
}
console.log('');
// Normalize and hydrate
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 5: Normalize and Hydrate (with image download) │');
console.log('└─────────────────────────────────────────────────────────────┘');
const normalizer = new DutchieNormalizer();
// Wrap products in expected payload format
const payload = {
raw_json: products, // DutchieNormalizer.extractProducts handles arrays
dispensary_id: dispensaryId,
};
const normResult = normalizer.normalize(payload);
console.log(` Normalized products: ${normResult.products.length}`);
console.log(` Brands found: ${normResult.brands.length}`);
const hydrateStart = Date.now();
const hydrateResult = await hydrateToCanonical(
pool,
dispensaryId,
normResult,
null, // no crawl run ID for test
{ dryRun: false, downloadImages: true }
);
const hydrateElapsed = Date.now() - hydrateStart;
console.log('');
console.log(` Hydration time: ${hydrateElapsed}ms`);
console.log(` Products new: ${hydrateResult.productsNew}`);
console.log(` Products updated: ${hydrateResult.productsUpdated}`);
console.log(` Images downloaded: ${hydrateResult.imagesDownloaded}`);
console.log(` Images skipped: ${hydrateResult.imagesSkipped}`);
console.log(` Images failed: ${hydrateResult.imagesFailed}`);
console.log(` Image bytes: ${(hydrateResult.imagesBytesTotal / 1024).toFixed(1)} KB`);
console.log('');
// Check storage stats
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 6: Verify Storage │');
console.log('└─────────────────────────────────────────────────────────────┘');
const statsAfter = await getStorageStats();
console.log(` Products after: ${statsAfter.productCount}`);
console.log(` Brands after: ${statsAfter.brandCount}`);
console.log(` Total size: ${(statsAfter.totalSizeBytes / 1024).toFixed(1)} KB`);
console.log('');
// Check database for local_image_path
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 7: Check Database for Local Image Paths │');
console.log('└─────────────────────────────────────────────────────────────┘');
const dbCheck = await pool.query(`
SELECT
id, name_raw, local_image_path, images
FROM store_products
WHERE dispensary_id = $1
LIMIT 10
`, [dispensaryId]);
for (const row of dbCheck.rows) {
const hasLocal = !!row.local_image_path;
const hasImages = !!row.images;
console.log(` ${row.id}: ${row.name_raw?.slice(0, 40).padEnd(42)} | local: ${hasLocal ? '✓' : '✗'} | images: ${hasImages ? '✓' : '✗'}`);
if (row.local_image_path) {
console.log(`${row.local_image_path}`);
}
}
console.log('');
// Summary
console.log('╔════════════════════════════════════════════════════════════╗');
console.log('║ SUMMARY ║');
console.log('╠════════════════════════════════════════════════════════════╣');
console.log(`║ Dispensary: ${disp.name.slice(0, 37).padEnd(37)}`);
console.log(`║ Products crawled: ${String(products.length).padEnd(37)}`);
console.log(`║ Images downloaded: ${String(hydrateResult.imagesDownloaded).padEnd(37)}`);
console.log(`║ Total image bytes: ${((hydrateResult.imagesBytesTotal / 1024).toFixed(1) + ' KB').padEnd(37)}`);
console.log(`║ Status: ${'SUCCESS'.padEnd(37)}`);
console.log('╚════════════════════════════════════════════════════════════╝');
} catch (error: any) {
console.error('');
console.error('╔════════════════════════════════════════════════════════════╗');
console.error('║ ERROR ║');
console.error('╚════════════════════════════════════════════════════════════╝');
console.error(` ${error.message}`);
if (error.stack) {
console.error('');
console.error('Stack trace:');
console.error(error.stack.split('\n').slice(0, 5).join('\n'));
}
process.exit(1);
} finally {
await pool.end();
}
}
main();

View File

@@ -0,0 +1,80 @@
#!/usr/bin/env npx tsx
/**
* Test Image Proxy - Standalone test without backend
*
* Usage:
* npx tsx src/scripts/test-image-proxy.ts
*/
import express from 'express';
import imageProxyRoutes from '../routes/image-proxy';
const app = express();
const PORT = 3099;
// Mount the image proxy
app.use('/img', imageProxyRoutes);
// Start server
app.listen(PORT, async () => {
console.log(`Test image proxy running on http://localhost:${PORT}`);
console.log('');
console.log('Testing image proxy...');
console.log('');
const axios = require('axios');
// Test cases
const tests = [
{
name: 'Original image',
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp',
},
{
name: 'Resize to 200px width',
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200',
},
{
name: 'Resize to 100x100 cover',
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=100&h=100&fit=cover',
},
{
name: 'Grayscale + blur',
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200&gray=1&blur=2',
},
{
name: 'Convert to JPEG',
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200&format=jpeg&q=70',
},
{
name: 'Non-existent image',
url: '/img/products/az/nonexistent/image.webp',
},
];
for (const test of tests) {
try {
const response = await axios.get(`http://localhost:${PORT}${test.url}`, {
responseType: 'arraybuffer',
validateStatus: () => true,
});
const contentType = response.headers['content-type'];
const size = response.data.length;
const status = response.status;
console.log(`${test.name}:`);
console.log(` URL: ${test.url.slice(0, 80)}${test.url.length > 80 ? '...' : ''}`);
console.log(` Status: ${status}`);
console.log(` Content-Type: ${contentType}`);
console.log(` Size: ${(size / 1024).toFixed(1)} KB`);
console.log('');
} catch (error: any) {
console.log(`${test.name}: ERROR - ${error.message}`);
console.log('');
}
}
console.log('Tests complete!');
process.exit(0);
});

View File

@@ -0,0 +1,117 @@
/**
* Test script for stealth session management
*
* Tests:
* 1. Per-session fingerprint rotation
* 2. Geographic consistency (timezone → Accept-Language)
* 3. Proxy location loading from database
*
* Usage:
* npx tsx src/scripts/test-stealth-session.ts
*/
import {
startSession,
endSession,
getCurrentSession,
getFingerprint,
getRandomFingerprint,
getLocaleForTimezone,
buildHeaders,
} from '../platforms/dutchie';
console.log('='.repeat(60));
console.log('STEALTH SESSION TEST');
console.log('='.repeat(60));
// Test 1: Timezone to Locale mapping
console.log('\n[Test 1] Timezone to Locale Mapping:');
const testTimezones = [
'America/Phoenix',
'America/Los_Angeles',
'America/New_York',
'America/Chicago',
undefined,
'Invalid/Timezone',
];
for (const tz of testTimezones) {
const locale = getLocaleForTimezone(tz);
console.log(` ${tz || '(undefined)'}${locale}`);
}
// Test 2: Random fingerprint selection
console.log('\n[Test 2] Random Fingerprint Selection (5 samples):');
for (let i = 0; i < 5; i++) {
const fp = getRandomFingerprint();
console.log(` ${i + 1}. ${fp.userAgent.slice(0, 60)}...`);
}
// Test 3: Session Management
console.log('\n[Test 3] Session Management:');
// Before session - should use default fingerprint
console.log(' Before session:');
const beforeFp = getFingerprint();
console.log(` getFingerprint(): ${beforeFp.userAgent.slice(0, 50)}...`);
console.log(` getCurrentSession(): ${getCurrentSession()}`);
// Start session with Arizona timezone
console.log('\n Starting session (AZ, America/Phoenix):');
const session1 = startSession('AZ', 'America/Phoenix');
console.log(` Session ID: ${session1.sessionId}`);
console.log(` Fingerprint UA: ${session1.fingerprint.userAgent.slice(0, 50)}...`);
console.log(` Accept-Language: ${session1.fingerprint.acceptLanguage}`);
console.log(` Timezone: ${session1.timezone}`);
// During session - should use session fingerprint
console.log('\n During session:');
const duringFp = getFingerprint();
console.log(` getFingerprint(): ${duringFp.userAgent.slice(0, 50)}...`);
console.log(` Same as session? ${duringFp.userAgent === session1.fingerprint.userAgent}`);
// Test buildHeaders with session
console.log('\n buildHeaders() during session:');
const headers = buildHeaders('/embedded-menu/test-store');
console.log(` User-Agent: ${headers['user-agent'].slice(0, 50)}...`);
console.log(` Accept-Language: ${headers['accept-language']}`);
console.log(` Origin: ${headers['origin']}`);
console.log(` Referer: ${headers['referer']}`);
// End session
console.log('\n Ending session:');
endSession();
console.log(` getCurrentSession(): ${getCurrentSession()}`);
// Test 4: Multiple sessions should have different fingerprints
console.log('\n[Test 4] Multiple Sessions (fingerprint variety):');
const fingerprints: string[] = [];
for (let i = 0; i < 10; i++) {
const session = startSession('CA', 'America/Los_Angeles');
fingerprints.push(session.fingerprint.userAgent);
endSession();
}
const uniqueCount = new Set(fingerprints).size;
console.log(` 10 sessions created, ${uniqueCount} unique fingerprints`);
console.log(` Variety: ${uniqueCount >= 3 ? '✅ Good' : '⚠️ Low - may need more fingerprint options'}`);
// Test 5: Geographic consistency check
console.log('\n[Test 5] Geographic Consistency:');
const geoTests = [
{ state: 'AZ', tz: 'America/Phoenix' },
{ state: 'CA', tz: 'America/Los_Angeles' },
{ state: 'NY', tz: 'America/New_York' },
{ state: 'IL', tz: 'America/Chicago' },
];
for (const { state, tz } of geoTests) {
const session = startSession(state, tz);
const consistent = session.fingerprint.acceptLanguage.includes('en-US');
console.log(` ${state} (${tz}): Accept-Language=${session.fingerprint.acceptLanguage} ${consistent ? '✅' : '❌'}`);
endSession();
}
console.log('\n' + '='.repeat(60));
console.log('TEST COMPLETE');
console.log('='.repeat(60));

View File

@@ -0,0 +1,144 @@
/**
* Test script for stealth session with REAL proxy data from database
*
* Tests:
* 1. Load proxies from database (with location data)
* 2. Verify location fields (city, state, timezone) are loaded
* 3. Start session with proxy's timezone
* 4. Verify Accept-Language matches timezone
*
* Usage:
* DATABASE_URL="postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" npx tsx src/scripts/test-stealth-with-db.ts
*/
import { Pool } from 'pg';
import {
CrawlRotator,
ProxyRotator,
} from '../services/crawl-rotator';
import {
startSession,
endSession,
getLocaleForTimezone,
} from '../platforms/dutchie';
const DATABASE_URL = process.env.DATABASE_URL;
if (!DATABASE_URL) {
console.error('ERROR: DATABASE_URL environment variable is required');
process.exit(1);
}
async function main() {
console.log('='.repeat(60));
console.log('STEALTH SESSION TEST WITH DATABASE');
console.log('='.repeat(60));
const pool = new Pool({ connectionString: DATABASE_URL });
try {
// Test 1: Load proxies with location data
console.log('\n[Test 1] Loading proxies from database...');
const rotator = new CrawlRotator(pool);
await rotator.initialize();
const stats = rotator.proxy.getStats();
console.log(` Total proxies: ${stats.totalProxies}`);
console.log(` Active proxies: ${stats.activeProxies}`);
if (stats.activeProxies === 0) {
console.log('\n WARNING: No active proxies in database!');
console.log(' Insert test proxies with:');
console.log(` INSERT INTO proxies (host, port, protocol, city, state, country_code, timezone, active)`);
console.log(` VALUES ('proxy1.example.com', 8080, 'http', 'Phoenix', 'AZ', 'US', 'America/Phoenix', true);`);
return;
}
// Test 2: Check location data on proxies
console.log('\n[Test 2] Checking proxy location data...');
let proxyCount = 0;
let withLocationCount = 0;
// Iterate through proxies
for (let i = 0; i < stats.totalProxies; i++) {
const proxy = rotator.proxy.getNext();
if (!proxy) break;
proxyCount++;
const hasLocation = !!(proxy.stateCode || proxy.timezone);
if (hasLocation) withLocationCount++;
console.log(` Proxy ${proxy.id}: ${proxy.host}:${proxy.port}`);
console.log(` City: ${proxy.city || '(not set)'}`);
console.log(` State: ${proxy.stateCode || '(not set)'}`);
console.log(` Country: ${proxy.countryCode || '(not set)'}`);
console.log(` Timezone: ${proxy.timezone || '(not set)'}`);
console.log(` Has location data: ${hasLocation ? '✅' : '❌'}`);
}
console.log(`\n Summary: ${withLocationCount}/${proxyCount} proxies have location data`);
// Test 3: Start session using proxy's timezone
console.log('\n[Test 3] Starting session with proxy timezone...');
// Get first proxy with timezone
const firstProxy = rotator.proxy.getNext();
if (firstProxy && firstProxy.timezone) {
console.log(` Using proxy: ${firstProxy.host} (${firstProxy.city}, ${firstProxy.stateCode})`);
console.log(` Proxy timezone: ${firstProxy.timezone}`);
const session = startSession(firstProxy.stateCode, firstProxy.timezone);
console.log(` Session ID: ${session.sessionId}`);
console.log(` Session timezone: ${session.timezone}`);
console.log(` Session Accept-Language: ${session.fingerprint.acceptLanguage}`);
// Verify Accept-Language matches expected locale for timezone
const expectedLocale = getLocaleForTimezone(firstProxy.timezone);
const matches = session.fingerprint.acceptLanguage === expectedLocale;
console.log(` Expected locale: ${expectedLocale}`);
console.log(` Locale matches: ${matches ? '✅' : '❌'}`);
endSession();
} else {
console.log(' WARNING: No proxy with timezone data found');
}
// Test 4: Test each timezone in database
console.log('\n[Test 4] Testing all proxy timezones...');
const seenTimezones = new Set<string>();
// Reset to beginning
for (let i = 0; i < stats.totalProxies; i++) {
const proxy = rotator.proxy.getNext();
if (!proxy || !proxy.timezone) continue;
if (seenTimezones.has(proxy.timezone)) continue;
seenTimezones.add(proxy.timezone);
const session = startSession(proxy.stateCode, proxy.timezone);
console.log(` ${proxy.timezone}:`);
console.log(` State: ${proxy.stateCode || 'unknown'}`);
console.log(` Accept-Language: ${session.fingerprint.acceptLanguage}`);
endSession();
}
console.log('\n' + '='.repeat(60));
console.log('TEST COMPLETE');
console.log('='.repeat(60));
if (withLocationCount === 0) {
console.log('\n⚠ No proxies have location data.');
console.log(' Geographic consistency will use default locale (en-US).');
console.log(' To enable geo-consistency, populate city/state/timezone on proxies.');
} else {
console.log('\n✅ Stealth session with geo-consistency is working!');
console.log(' Sessions will use Accept-Language matching proxy timezone.');
}
} catch (error) {
console.error('Error:', error);
} finally {
await pool.end();
}
}
main();