feat(images): Add local image storage with on-demand resizing
- Store product images locally with hierarchy: /images/products/<state>/<store>/<brand>/<product>/ - Add /img/* proxy endpoint for on-demand resizing via Sharp - Implement per-product image checking to skip existing downloads - Fix pathToUrl() to correctly generate /images/... URLs - Add frontend getImageUrl() helper with preset sizes (thumb, medium, large) - Update all product pages to use optimized image URLs - Add stealth session support for Dutchie GraphQL crawls - Include test scripts for crawl and image verification 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
250
backend/src/scripts/crawl-single-store.ts
Normal file
250
backend/src/scripts/crawl-single-store.ts
Normal file
@@ -0,0 +1,250 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
/**
|
||||
* Crawl Single Store - Verbose test showing each step
|
||||
*
|
||||
* Usage:
|
||||
* DATABASE_URL="postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \
|
||||
* npx tsx src/scripts/crawl-single-store.ts <dispensaryId>
|
||||
*
|
||||
* Example:
|
||||
* DATABASE_URL="..." npx tsx src/scripts/crawl-single-store.ts 112
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import dotenv from 'dotenv';
|
||||
import {
|
||||
executeGraphQL,
|
||||
startSession,
|
||||
endSession,
|
||||
getFingerprint,
|
||||
GRAPHQL_HASHES,
|
||||
DUTCHIE_CONFIG,
|
||||
} from '../platforms/dutchie';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
// ============================================================
|
||||
// DATABASE CONNECTION
|
||||
// ============================================================
|
||||
|
||||
function getConnectionString(): string {
|
||||
if (process.env.DATABASE_URL) {
|
||||
return process.env.DATABASE_URL;
|
||||
}
|
||||
if (process.env.CANNAIQ_DB_URL) {
|
||||
return process.env.CANNAIQ_DB_URL;
|
||||
}
|
||||
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
|
||||
const port = process.env.CANNAIQ_DB_PORT || '54320';
|
||||
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
|
||||
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
|
||||
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
|
||||
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
|
||||
}
|
||||
|
||||
const pool = new Pool({ connectionString: getConnectionString() });
|
||||
|
||||
// ============================================================
|
||||
// MAIN
|
||||
// ============================================================
|
||||
|
||||
async function main() {
|
||||
const dispensaryId = parseInt(process.argv[2], 10);
|
||||
|
||||
if (!dispensaryId) {
|
||||
console.error('Usage: npx tsx src/scripts/crawl-single-store.ts <dispensaryId>');
|
||||
console.error('Example: npx tsx src/scripts/crawl-single-store.ts 112');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log('');
|
||||
console.log('╔════════════════════════════════════════════════════════════╗');
|
||||
console.log('║ SINGLE STORE CRAWL - VERBOSE OUTPUT ║');
|
||||
console.log('╚════════════════════════════════════════════════════════════╝');
|
||||
console.log('');
|
||||
|
||||
try {
|
||||
// ============================================================
|
||||
// STEP 1: Get dispensary info from database
|
||||
// ============================================================
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 1: Load Dispensary Info from Database │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
const dispResult = await pool.query(`
|
||||
SELECT
|
||||
id,
|
||||
name,
|
||||
platform_dispensary_id,
|
||||
menu_url,
|
||||
menu_type,
|
||||
city,
|
||||
state
|
||||
FROM dispensaries
|
||||
WHERE id = $1
|
||||
`, [dispensaryId]);
|
||||
|
||||
if (dispResult.rows.length === 0) {
|
||||
throw new Error(`Dispensary ${dispensaryId} not found`);
|
||||
}
|
||||
|
||||
const disp = dispResult.rows[0];
|
||||
console.log(` Dispensary ID: ${disp.id}`);
|
||||
console.log(` Name: ${disp.name}`);
|
||||
console.log(` City, State: ${disp.city}, ${disp.state}`);
|
||||
console.log(` Menu Type: ${disp.menu_type}`);
|
||||
console.log(` Platform ID: ${disp.platform_dispensary_id}`);
|
||||
console.log(` Menu URL: ${disp.menu_url}`);
|
||||
|
||||
if (!disp.platform_dispensary_id) {
|
||||
throw new Error('Dispensary does not have a platform_dispensary_id - cannot crawl');
|
||||
}
|
||||
|
||||
// Extract cName from menu_url
|
||||
const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
|
||||
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
|
||||
console.log(` cName (derived): ${cName}`);
|
||||
console.log('');
|
||||
|
||||
// ============================================================
|
||||
// STEP 2: Start stealth session
|
||||
// ============================================================
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 2: Start Stealth Session │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
// Use Arizona timezone for this store
|
||||
const session = startSession(disp.state || 'AZ', 'America/Phoenix');
|
||||
|
||||
const fp = getFingerprint();
|
||||
console.log(` Session ID: ${session.sessionId}`);
|
||||
console.log(` User-Agent: ${fp.userAgent.slice(0, 60)}...`);
|
||||
console.log(` Accept-Language: ${fp.acceptLanguage}`);
|
||||
console.log(` Sec-CH-UA: ${fp.secChUa || '(not set)'}`);
|
||||
console.log('');
|
||||
|
||||
// ============================================================
|
||||
// STEP 3: Execute GraphQL query
|
||||
// ============================================================
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 3: Execute GraphQL Query (FilteredProducts) │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
const variables = {
|
||||
includeEnterpriseSpecials: false,
|
||||
productsFilter: {
|
||||
dispensaryId: disp.platform_dispensary_id,
|
||||
pricingType: 'rec',
|
||||
Status: 'Active',
|
||||
types: [],
|
||||
useCache: true,
|
||||
isDefaultSort: true,
|
||||
sortBy: 'popularSortIdx',
|
||||
sortDirection: 1,
|
||||
bypassOnlineThresholds: true,
|
||||
isKioskMenu: false,
|
||||
removeProductsBelowOptionThresholds: false,
|
||||
},
|
||||
page: 0,
|
||||
perPage: 100,
|
||||
};
|
||||
|
||||
console.log(` Endpoint: ${DUTCHIE_CONFIG.graphqlEndpoint}`);
|
||||
console.log(` Operation: FilteredProducts`);
|
||||
console.log(` Hash: ${GRAPHQL_HASHES.FilteredProducts.slice(0, 20)}...`);
|
||||
console.log(` dispensaryId: ${variables.productsFilter.dispensaryId}`);
|
||||
console.log(` pricingType: ${variables.productsFilter.pricingType}`);
|
||||
console.log(` Status: ${variables.productsFilter.Status}`);
|
||||
console.log(` perPage: ${variables.perPage}`);
|
||||
console.log('');
|
||||
console.log(' Sending request...');
|
||||
|
||||
const startTime = Date.now();
|
||||
const result = await executeGraphQL(
|
||||
'FilteredProducts',
|
||||
variables,
|
||||
GRAPHQL_HASHES.FilteredProducts,
|
||||
{ cName, maxRetries: 3 }
|
||||
);
|
||||
const elapsed = Date.now() - startTime;
|
||||
|
||||
console.log(` Response time: ${elapsed}ms`);
|
||||
console.log('');
|
||||
|
||||
// ============================================================
|
||||
// STEP 4: Process response
|
||||
// ============================================================
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 4: Process Response │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
const data = result?.data?.filteredProducts;
|
||||
if (!data) {
|
||||
console.log(' ERROR: No data returned from GraphQL');
|
||||
console.log(' Raw result:', JSON.stringify(result, null, 2).slice(0, 500));
|
||||
endSession();
|
||||
return;
|
||||
}
|
||||
|
||||
const products = data.products || [];
|
||||
const totalCount = data.queryInfo?.totalCount || 0;
|
||||
const totalPages = Math.ceil(totalCount / 100);
|
||||
|
||||
console.log(` Total products: ${totalCount}`);
|
||||
console.log(` Products in page: ${products.length}`);
|
||||
console.log(` Total pages: ${totalPages}`);
|
||||
console.log('');
|
||||
|
||||
// Show first few products
|
||||
console.log(' First 5 products:');
|
||||
console.log(' ─────────────────────────────────────────────────────────');
|
||||
for (let i = 0; i < Math.min(5, products.length); i++) {
|
||||
const p = products[i];
|
||||
const name = (p.name || 'Unknown').slice(0, 40);
|
||||
const brand = (p.brand?.name || 'Unknown').slice(0, 15);
|
||||
const price = p.Prices?.[0]?.price || p.medPrice || p.recPrice || 'N/A';
|
||||
const category = p.type || p.category || 'N/A';
|
||||
console.log(` ${i + 1}. ${name.padEnd(42)} | ${brand.padEnd(17)} | $${price}`);
|
||||
}
|
||||
console.log('');
|
||||
|
||||
// ============================================================
|
||||
// STEP 5: End session
|
||||
// ============================================================
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 5: End Session │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
endSession();
|
||||
console.log('');
|
||||
|
||||
// ============================================================
|
||||
// SUMMARY
|
||||
// ============================================================
|
||||
console.log('╔════════════════════════════════════════════════════════════╗');
|
||||
console.log('║ SUMMARY ║');
|
||||
console.log('╠════════════════════════════════════════════════════════════╣');
|
||||
console.log(`║ Store: ${disp.name.slice(0, 38).padEnd(38)} ║`);
|
||||
console.log(`║ Products Found: ${String(totalCount).padEnd(38)} ║`);
|
||||
console.log(`║ Response Time: ${(elapsed + 'ms').padEnd(38)} ║`);
|
||||
console.log(`║ Status: ${'SUCCESS'.padEnd(38)} ║`);
|
||||
console.log('╚════════════════════════════════════════════════════════════╝');
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('');
|
||||
console.error('╔════════════════════════════════════════════════════════════╗');
|
||||
console.error('║ ERROR ║');
|
||||
console.error('╚════════════════════════════════════════════════════════════╝');
|
||||
console.error(` ${error.message}`);
|
||||
if (error.stack) {
|
||||
console.error('');
|
||||
console.error('Stack trace:');
|
||||
console.error(error.stack.split('\n').slice(0, 5).join('\n'));
|
||||
}
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
@@ -23,6 +23,7 @@ import {
|
||||
DutchieNormalizer,
|
||||
hydrateToCanonical,
|
||||
} from '../hydration';
|
||||
import { initializeImageStorage } from '../utils/image-storage';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
@@ -137,6 +138,11 @@ async function main() {
|
||||
console.log(`Test Crawl to Canonical - Dispensary ${dispensaryId}`);
|
||||
console.log('============================================================\n');
|
||||
|
||||
// Initialize image storage
|
||||
console.log('[Init] Initializing image storage...');
|
||||
await initializeImageStorage();
|
||||
console.log(' Image storage ready\n');
|
||||
|
||||
try {
|
||||
// Step 1: Get dispensary info
|
||||
console.log('[Step 1] Getting dispensary info...');
|
||||
|
||||
268
backend/src/scripts/test-image-download.ts
Normal file
268
backend/src/scripts/test-image-download.ts
Normal file
@@ -0,0 +1,268 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
/**
|
||||
* Test Image Download - Tests image downloading with a small batch of products
|
||||
*
|
||||
* Usage:
|
||||
* DATABASE_URL="postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \
|
||||
* STORAGE_DRIVER=local STORAGE_BASE_PATH=./storage \
|
||||
* npx tsx src/scripts/test-image-download.ts <dispensaryId> [limit]
|
||||
*
|
||||
* Example:
|
||||
* DATABASE_URL="..." npx tsx src/scripts/test-image-download.ts 112 5
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import dotenv from 'dotenv';
|
||||
import {
|
||||
executeGraphQL,
|
||||
startSession,
|
||||
endSession,
|
||||
GRAPHQL_HASHES,
|
||||
} from '../platforms/dutchie';
|
||||
import { DutchieNormalizer } from '../hydration/normalizers/dutchie';
|
||||
import { hydrateToCanonical } from '../hydration/canonical-upsert';
|
||||
import { initializeImageStorage, getStorageStats } from '../utils/image-storage';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
// ============================================================
|
||||
// DATABASE CONNECTION
|
||||
// ============================================================
|
||||
|
||||
function getConnectionString(): string {
|
||||
if (process.env.DATABASE_URL) {
|
||||
return process.env.DATABASE_URL;
|
||||
}
|
||||
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
|
||||
const port = process.env.CANNAIQ_DB_PORT || '54320';
|
||||
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
|
||||
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
|
||||
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
|
||||
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
|
||||
}
|
||||
|
||||
const pool = new Pool({ connectionString: getConnectionString() });
|
||||
|
||||
// ============================================================
|
||||
// MAIN
|
||||
// ============================================================
|
||||
|
||||
async function main() {
|
||||
const dispensaryId = parseInt(process.argv[2], 10);
|
||||
const limit = parseInt(process.argv[3], 10) || 5;
|
||||
|
||||
if (!dispensaryId) {
|
||||
console.error('Usage: npx tsx src/scripts/test-image-download.ts <dispensaryId> [limit]');
|
||||
console.error('Example: npx tsx src/scripts/test-image-download.ts 112 5');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log('');
|
||||
console.log('╔════════════════════════════════════════════════════════════╗');
|
||||
console.log('║ IMAGE DOWNLOAD TEST ║');
|
||||
console.log('╚════════════════════════════════════════════════════════════╝');
|
||||
console.log('');
|
||||
|
||||
try {
|
||||
// Initialize image storage
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 1: Initialize Image Storage │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
await initializeImageStorage();
|
||||
const statsBefore = await getStorageStats();
|
||||
console.log(` Base path: ${statsBefore.basePath}`);
|
||||
console.log(` Products before: ${statsBefore.productCount}`);
|
||||
console.log(` Brands before: ${statsBefore.brandCount}`);
|
||||
console.log('');
|
||||
|
||||
// Get dispensary info
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 2: Load Dispensary Info │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
const dispResult = await pool.query(`
|
||||
SELECT
|
||||
id, name, platform_dispensary_id, menu_url, state, slug
|
||||
FROM dispensaries
|
||||
WHERE id = $1
|
||||
`, [dispensaryId]);
|
||||
|
||||
if (dispResult.rows.length === 0) {
|
||||
throw new Error(`Dispensary ${dispensaryId} not found`);
|
||||
}
|
||||
|
||||
const disp = dispResult.rows[0];
|
||||
console.log(` Dispensary: ${disp.name}`);
|
||||
console.log(` State: ${disp.state}`);
|
||||
console.log(` Slug: ${disp.slug}`);
|
||||
console.log(` Platform ID: ${disp.platform_dispensary_id}`);
|
||||
console.log('');
|
||||
|
||||
// Delete some existing store_products to force "new" products
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 3: Clear Store Products (to test new product flow) │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
const deleteResult = await pool.query(`
|
||||
DELETE FROM store_products
|
||||
WHERE dispensary_id = $1
|
||||
RETURNING id
|
||||
`, [dispensaryId]);
|
||||
console.log(` Deleted ${deleteResult.rowCount} existing store_products`);
|
||||
console.log('');
|
||||
|
||||
// Fetch products from Dutchie
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 4: Fetch Products from Dutchie (limited) │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
|
||||
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
|
||||
|
||||
const session = startSession(disp.state || 'AZ', 'America/Phoenix');
|
||||
console.log(` Session ID: ${session.sessionId}`);
|
||||
console.log(` cName: ${cName}`);
|
||||
console.log(` Limit: ${limit} products`);
|
||||
|
||||
const variables = {
|
||||
includeEnterpriseSpecials: false,
|
||||
productsFilter: {
|
||||
dispensaryId: disp.platform_dispensary_id,
|
||||
pricingType: 'rec',
|
||||
Status: 'Active',
|
||||
types: [],
|
||||
useCache: true,
|
||||
isDefaultSort: true,
|
||||
sortBy: 'popularSortIdx',
|
||||
sortDirection: 1,
|
||||
bypassOnlineThresholds: true,
|
||||
isKioskMenu: false,
|
||||
removeProductsBelowOptionThresholds: false,
|
||||
},
|
||||
page: 0,
|
||||
perPage: limit, // Only fetch limited products
|
||||
};
|
||||
|
||||
const startTime = Date.now();
|
||||
const result = await executeGraphQL(
|
||||
'FilteredProducts',
|
||||
variables,
|
||||
GRAPHQL_HASHES.FilteredProducts,
|
||||
{ cName, maxRetries: 3 }
|
||||
);
|
||||
const elapsed = Date.now() - startTime;
|
||||
|
||||
endSession();
|
||||
|
||||
const products = result?.data?.filteredProducts?.products || [];
|
||||
console.log(` Fetched: ${products.length} products in ${elapsed}ms`);
|
||||
|
||||
// Show products with images
|
||||
console.log('');
|
||||
console.log(' Products with images:');
|
||||
for (let i = 0; i < products.length; i++) {
|
||||
const p = products[i];
|
||||
const hasImage = !!p.Image;
|
||||
const brandName = p.brand?.name || 'Unknown';
|
||||
console.log(` ${i + 1}. ${p.name?.slice(0, 40).padEnd(42)} | ${brandName.slice(0, 15).padEnd(17)} | ${hasImage ? '✓ has image' : '✗ no image'}`);
|
||||
}
|
||||
console.log('');
|
||||
|
||||
// Normalize and hydrate
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 5: Normalize and Hydrate (with image download) │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
const normalizer = new DutchieNormalizer();
|
||||
// Wrap products in expected payload format
|
||||
const payload = {
|
||||
raw_json: products, // DutchieNormalizer.extractProducts handles arrays
|
||||
dispensary_id: dispensaryId,
|
||||
};
|
||||
const normResult = normalizer.normalize(payload);
|
||||
console.log(` Normalized products: ${normResult.products.length}`);
|
||||
console.log(` Brands found: ${normResult.brands.length}`);
|
||||
|
||||
const hydrateStart = Date.now();
|
||||
const hydrateResult = await hydrateToCanonical(
|
||||
pool,
|
||||
dispensaryId,
|
||||
normResult,
|
||||
null, // no crawl run ID for test
|
||||
{ dryRun: false, downloadImages: true }
|
||||
);
|
||||
const hydrateElapsed = Date.now() - hydrateStart;
|
||||
|
||||
console.log('');
|
||||
console.log(` Hydration time: ${hydrateElapsed}ms`);
|
||||
console.log(` Products new: ${hydrateResult.productsNew}`);
|
||||
console.log(` Products updated: ${hydrateResult.productsUpdated}`);
|
||||
console.log(` Images downloaded: ${hydrateResult.imagesDownloaded}`);
|
||||
console.log(` Images skipped: ${hydrateResult.imagesSkipped}`);
|
||||
console.log(` Images failed: ${hydrateResult.imagesFailed}`);
|
||||
console.log(` Image bytes: ${(hydrateResult.imagesBytesTotal / 1024).toFixed(1)} KB`);
|
||||
console.log('');
|
||||
|
||||
// Check storage stats
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 6: Verify Storage │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
const statsAfter = await getStorageStats();
|
||||
console.log(` Products after: ${statsAfter.productCount}`);
|
||||
console.log(` Brands after: ${statsAfter.brandCount}`);
|
||||
console.log(` Total size: ${(statsAfter.totalSizeBytes / 1024).toFixed(1)} KB`);
|
||||
console.log('');
|
||||
|
||||
// Check database for local_image_path
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 7: Check Database for Local Image Paths │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
const dbCheck = await pool.query(`
|
||||
SELECT
|
||||
id, name_raw, local_image_path, images
|
||||
FROM store_products
|
||||
WHERE dispensary_id = $1
|
||||
LIMIT 10
|
||||
`, [dispensaryId]);
|
||||
|
||||
for (const row of dbCheck.rows) {
|
||||
const hasLocal = !!row.local_image_path;
|
||||
const hasImages = !!row.images;
|
||||
console.log(` ${row.id}: ${row.name_raw?.slice(0, 40).padEnd(42)} | local: ${hasLocal ? '✓' : '✗'} | images: ${hasImages ? '✓' : '✗'}`);
|
||||
if (row.local_image_path) {
|
||||
console.log(` → ${row.local_image_path}`);
|
||||
}
|
||||
}
|
||||
console.log('');
|
||||
|
||||
// Summary
|
||||
console.log('╔════════════════════════════════════════════════════════════╗');
|
||||
console.log('║ SUMMARY ║');
|
||||
console.log('╠════════════════════════════════════════════════════════════╣');
|
||||
console.log(`║ Dispensary: ${disp.name.slice(0, 37).padEnd(37)} ║`);
|
||||
console.log(`║ Products crawled: ${String(products.length).padEnd(37)} ║`);
|
||||
console.log(`║ Images downloaded: ${String(hydrateResult.imagesDownloaded).padEnd(37)} ║`);
|
||||
console.log(`║ Total image bytes: ${((hydrateResult.imagesBytesTotal / 1024).toFixed(1) + ' KB').padEnd(37)} ║`);
|
||||
console.log(`║ Status: ${'SUCCESS'.padEnd(37)} ║`);
|
||||
console.log('╚════════════════════════════════════════════════════════════╝');
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('');
|
||||
console.error('╔════════════════════════════════════════════════════════════╗');
|
||||
console.error('║ ERROR ║');
|
||||
console.error('╚════════════════════════════════════════════════════════════╝');
|
||||
console.error(` ${error.message}`);
|
||||
if (error.stack) {
|
||||
console.error('');
|
||||
console.error('Stack trace:');
|
||||
console.error(error.stack.split('\n').slice(0, 5).join('\n'));
|
||||
}
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
80
backend/src/scripts/test-image-proxy.ts
Normal file
80
backend/src/scripts/test-image-proxy.ts
Normal file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
/**
|
||||
* Test Image Proxy - Standalone test without backend
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx src/scripts/test-image-proxy.ts
|
||||
*/
|
||||
|
||||
import express from 'express';
|
||||
import imageProxyRoutes from '../routes/image-proxy';
|
||||
|
||||
const app = express();
|
||||
const PORT = 3099;
|
||||
|
||||
// Mount the image proxy
|
||||
app.use('/img', imageProxyRoutes);
|
||||
|
||||
// Start server
|
||||
app.listen(PORT, async () => {
|
||||
console.log(`Test image proxy running on http://localhost:${PORT}`);
|
||||
console.log('');
|
||||
console.log('Testing image proxy...');
|
||||
console.log('');
|
||||
|
||||
const axios = require('axios');
|
||||
|
||||
// Test cases
|
||||
const tests = [
|
||||
{
|
||||
name: 'Original image',
|
||||
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp',
|
||||
},
|
||||
{
|
||||
name: 'Resize to 200px width',
|
||||
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200',
|
||||
},
|
||||
{
|
||||
name: 'Resize to 100x100 cover',
|
||||
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=100&h=100&fit=cover',
|
||||
},
|
||||
{
|
||||
name: 'Grayscale + blur',
|
||||
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200&gray=1&blur=2',
|
||||
},
|
||||
{
|
||||
name: 'Convert to JPEG',
|
||||
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200&format=jpeg&q=70',
|
||||
},
|
||||
{
|
||||
name: 'Non-existent image',
|
||||
url: '/img/products/az/nonexistent/image.webp',
|
||||
},
|
||||
];
|
||||
|
||||
for (const test of tests) {
|
||||
try {
|
||||
const response = await axios.get(`http://localhost:${PORT}${test.url}`, {
|
||||
responseType: 'arraybuffer',
|
||||
validateStatus: () => true,
|
||||
});
|
||||
|
||||
const contentType = response.headers['content-type'];
|
||||
const size = response.data.length;
|
||||
const status = response.status;
|
||||
|
||||
console.log(`${test.name}:`);
|
||||
console.log(` URL: ${test.url.slice(0, 80)}${test.url.length > 80 ? '...' : ''}`);
|
||||
console.log(` Status: ${status}`);
|
||||
console.log(` Content-Type: ${contentType}`);
|
||||
console.log(` Size: ${(size / 1024).toFixed(1)} KB`);
|
||||
console.log('');
|
||||
} catch (error: any) {
|
||||
console.log(`${test.name}: ERROR - ${error.message}`);
|
||||
console.log('');
|
||||
}
|
||||
}
|
||||
|
||||
console.log('Tests complete!');
|
||||
process.exit(0);
|
||||
});
|
||||
117
backend/src/scripts/test-stealth-session.ts
Normal file
117
backend/src/scripts/test-stealth-session.ts
Normal file
@@ -0,0 +1,117 @@
|
||||
/**
|
||||
* Test script for stealth session management
|
||||
*
|
||||
* Tests:
|
||||
* 1. Per-session fingerprint rotation
|
||||
* 2. Geographic consistency (timezone → Accept-Language)
|
||||
* 3. Proxy location loading from database
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx src/scripts/test-stealth-session.ts
|
||||
*/
|
||||
|
||||
import {
|
||||
startSession,
|
||||
endSession,
|
||||
getCurrentSession,
|
||||
getFingerprint,
|
||||
getRandomFingerprint,
|
||||
getLocaleForTimezone,
|
||||
buildHeaders,
|
||||
} from '../platforms/dutchie';
|
||||
|
||||
console.log('='.repeat(60));
|
||||
console.log('STEALTH SESSION TEST');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
// Test 1: Timezone to Locale mapping
|
||||
console.log('\n[Test 1] Timezone to Locale Mapping:');
|
||||
const testTimezones = [
|
||||
'America/Phoenix',
|
||||
'America/Los_Angeles',
|
||||
'America/New_York',
|
||||
'America/Chicago',
|
||||
undefined,
|
||||
'Invalid/Timezone',
|
||||
];
|
||||
|
||||
for (const tz of testTimezones) {
|
||||
const locale = getLocaleForTimezone(tz);
|
||||
console.log(` ${tz || '(undefined)'} → ${locale}`);
|
||||
}
|
||||
|
||||
// Test 2: Random fingerprint selection
|
||||
console.log('\n[Test 2] Random Fingerprint Selection (5 samples):');
|
||||
for (let i = 0; i < 5; i++) {
|
||||
const fp = getRandomFingerprint();
|
||||
console.log(` ${i + 1}. ${fp.userAgent.slice(0, 60)}...`);
|
||||
}
|
||||
|
||||
// Test 3: Session Management
|
||||
console.log('\n[Test 3] Session Management:');
|
||||
|
||||
// Before session - should use default fingerprint
|
||||
console.log(' Before session:');
|
||||
const beforeFp = getFingerprint();
|
||||
console.log(` getFingerprint(): ${beforeFp.userAgent.slice(0, 50)}...`);
|
||||
console.log(` getCurrentSession(): ${getCurrentSession()}`);
|
||||
|
||||
// Start session with Arizona timezone
|
||||
console.log('\n Starting session (AZ, America/Phoenix):');
|
||||
const session1 = startSession('AZ', 'America/Phoenix');
|
||||
console.log(` Session ID: ${session1.sessionId}`);
|
||||
console.log(` Fingerprint UA: ${session1.fingerprint.userAgent.slice(0, 50)}...`);
|
||||
console.log(` Accept-Language: ${session1.fingerprint.acceptLanguage}`);
|
||||
console.log(` Timezone: ${session1.timezone}`);
|
||||
|
||||
// During session - should use session fingerprint
|
||||
console.log('\n During session:');
|
||||
const duringFp = getFingerprint();
|
||||
console.log(` getFingerprint(): ${duringFp.userAgent.slice(0, 50)}...`);
|
||||
console.log(` Same as session? ${duringFp.userAgent === session1.fingerprint.userAgent}`);
|
||||
|
||||
// Test buildHeaders with session
|
||||
console.log('\n buildHeaders() during session:');
|
||||
const headers = buildHeaders('/embedded-menu/test-store');
|
||||
console.log(` User-Agent: ${headers['user-agent'].slice(0, 50)}...`);
|
||||
console.log(` Accept-Language: ${headers['accept-language']}`);
|
||||
console.log(` Origin: ${headers['origin']}`);
|
||||
console.log(` Referer: ${headers['referer']}`);
|
||||
|
||||
// End session
|
||||
console.log('\n Ending session:');
|
||||
endSession();
|
||||
console.log(` getCurrentSession(): ${getCurrentSession()}`);
|
||||
|
||||
// Test 4: Multiple sessions should have different fingerprints
|
||||
console.log('\n[Test 4] Multiple Sessions (fingerprint variety):');
|
||||
const fingerprints: string[] = [];
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const session = startSession('CA', 'America/Los_Angeles');
|
||||
fingerprints.push(session.fingerprint.userAgent);
|
||||
endSession();
|
||||
}
|
||||
|
||||
const uniqueCount = new Set(fingerprints).size;
|
||||
console.log(` 10 sessions created, ${uniqueCount} unique fingerprints`);
|
||||
console.log(` Variety: ${uniqueCount >= 3 ? '✅ Good' : '⚠️ Low - may need more fingerprint options'}`);
|
||||
|
||||
// Test 5: Geographic consistency check
|
||||
console.log('\n[Test 5] Geographic Consistency:');
|
||||
const geoTests = [
|
||||
{ state: 'AZ', tz: 'America/Phoenix' },
|
||||
{ state: 'CA', tz: 'America/Los_Angeles' },
|
||||
{ state: 'NY', tz: 'America/New_York' },
|
||||
{ state: 'IL', tz: 'America/Chicago' },
|
||||
];
|
||||
|
||||
for (const { state, tz } of geoTests) {
|
||||
const session = startSession(state, tz);
|
||||
const consistent = session.fingerprint.acceptLanguage.includes('en-US');
|
||||
console.log(` ${state} (${tz}): Accept-Language=${session.fingerprint.acceptLanguage} ${consistent ? '✅' : '❌'}`);
|
||||
endSession();
|
||||
}
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('TEST COMPLETE');
|
||||
console.log('='.repeat(60));
|
||||
144
backend/src/scripts/test-stealth-with-db.ts
Normal file
144
backend/src/scripts/test-stealth-with-db.ts
Normal file
@@ -0,0 +1,144 @@
|
||||
/**
|
||||
* Test script for stealth session with REAL proxy data from database
|
||||
*
|
||||
* Tests:
|
||||
* 1. Load proxies from database (with location data)
|
||||
* 2. Verify location fields (city, state, timezone) are loaded
|
||||
* 3. Start session with proxy's timezone
|
||||
* 4. Verify Accept-Language matches timezone
|
||||
*
|
||||
* Usage:
|
||||
* DATABASE_URL="postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" npx tsx src/scripts/test-stealth-with-db.ts
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import {
|
||||
CrawlRotator,
|
||||
ProxyRotator,
|
||||
} from '../services/crawl-rotator';
|
||||
import {
|
||||
startSession,
|
||||
endSession,
|
||||
getLocaleForTimezone,
|
||||
} from '../platforms/dutchie';
|
||||
|
||||
const DATABASE_URL = process.env.DATABASE_URL;
|
||||
|
||||
if (!DATABASE_URL) {
|
||||
console.error('ERROR: DATABASE_URL environment variable is required');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('STEALTH SESSION TEST WITH DATABASE');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const pool = new Pool({ connectionString: DATABASE_URL });
|
||||
|
||||
try {
|
||||
// Test 1: Load proxies with location data
|
||||
console.log('\n[Test 1] Loading proxies from database...');
|
||||
const rotator = new CrawlRotator(pool);
|
||||
await rotator.initialize();
|
||||
|
||||
const stats = rotator.proxy.getStats();
|
||||
console.log(` Total proxies: ${stats.totalProxies}`);
|
||||
console.log(` Active proxies: ${stats.activeProxies}`);
|
||||
|
||||
if (stats.activeProxies === 0) {
|
||||
console.log('\n WARNING: No active proxies in database!');
|
||||
console.log(' Insert test proxies with:');
|
||||
console.log(` INSERT INTO proxies (host, port, protocol, city, state, country_code, timezone, active)`);
|
||||
console.log(` VALUES ('proxy1.example.com', 8080, 'http', 'Phoenix', 'AZ', 'US', 'America/Phoenix', true);`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Test 2: Check location data on proxies
|
||||
console.log('\n[Test 2] Checking proxy location data...');
|
||||
let proxyCount = 0;
|
||||
let withLocationCount = 0;
|
||||
|
||||
// Iterate through proxies
|
||||
for (let i = 0; i < stats.totalProxies; i++) {
|
||||
const proxy = rotator.proxy.getNext();
|
||||
if (!proxy) break;
|
||||
|
||||
proxyCount++;
|
||||
const hasLocation = !!(proxy.stateCode || proxy.timezone);
|
||||
if (hasLocation) withLocationCount++;
|
||||
|
||||
console.log(` Proxy ${proxy.id}: ${proxy.host}:${proxy.port}`);
|
||||
console.log(` City: ${proxy.city || '(not set)'}`);
|
||||
console.log(` State: ${proxy.stateCode || '(not set)'}`);
|
||||
console.log(` Country: ${proxy.countryCode || '(not set)'}`);
|
||||
console.log(` Timezone: ${proxy.timezone || '(not set)'}`);
|
||||
console.log(` Has location data: ${hasLocation ? '✅' : '❌'}`);
|
||||
}
|
||||
|
||||
console.log(`\n Summary: ${withLocationCount}/${proxyCount} proxies have location data`);
|
||||
|
||||
// Test 3: Start session using proxy's timezone
|
||||
console.log('\n[Test 3] Starting session with proxy timezone...');
|
||||
|
||||
// Get first proxy with timezone
|
||||
const firstProxy = rotator.proxy.getNext();
|
||||
if (firstProxy && firstProxy.timezone) {
|
||||
console.log(` Using proxy: ${firstProxy.host} (${firstProxy.city}, ${firstProxy.stateCode})`);
|
||||
console.log(` Proxy timezone: ${firstProxy.timezone}`);
|
||||
|
||||
const session = startSession(firstProxy.stateCode, firstProxy.timezone);
|
||||
console.log(` Session ID: ${session.sessionId}`);
|
||||
console.log(` Session timezone: ${session.timezone}`);
|
||||
console.log(` Session Accept-Language: ${session.fingerprint.acceptLanguage}`);
|
||||
|
||||
// Verify Accept-Language matches expected locale for timezone
|
||||
const expectedLocale = getLocaleForTimezone(firstProxy.timezone);
|
||||
const matches = session.fingerprint.acceptLanguage === expectedLocale;
|
||||
console.log(` Expected locale: ${expectedLocale}`);
|
||||
console.log(` Locale matches: ${matches ? '✅' : '❌'}`);
|
||||
|
||||
endSession();
|
||||
} else {
|
||||
console.log(' WARNING: No proxy with timezone data found');
|
||||
}
|
||||
|
||||
// Test 4: Test each timezone in database
|
||||
console.log('\n[Test 4] Testing all proxy timezones...');
|
||||
const seenTimezones = new Set<string>();
|
||||
|
||||
// Reset to beginning
|
||||
for (let i = 0; i < stats.totalProxies; i++) {
|
||||
const proxy = rotator.proxy.getNext();
|
||||
if (!proxy || !proxy.timezone) continue;
|
||||
if (seenTimezones.has(proxy.timezone)) continue;
|
||||
|
||||
seenTimezones.add(proxy.timezone);
|
||||
const session = startSession(proxy.stateCode, proxy.timezone);
|
||||
console.log(` ${proxy.timezone}:`);
|
||||
console.log(` State: ${proxy.stateCode || 'unknown'}`);
|
||||
console.log(` Accept-Language: ${session.fingerprint.acceptLanguage}`);
|
||||
endSession();
|
||||
}
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('TEST COMPLETE');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
if (withLocationCount === 0) {
|
||||
console.log('\n⚠️ No proxies have location data.');
|
||||
console.log(' Geographic consistency will use default locale (en-US).');
|
||||
console.log(' To enable geo-consistency, populate city/state/timezone on proxies.');
|
||||
} else {
|
||||
console.log('\n✅ Stealth session with geo-consistency is working!');
|
||||
console.log(' Sessions will use Accept-Language matching proxy timezone.');
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error:', error);
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
Reference in New Issue
Block a user