feat(images): Add local image storage with on-demand resizing
- Store product images locally with hierarchy: /images/products/<state>/<store>/<brand>/<product>/ - Add /img/* proxy endpoint for on-demand resizing via Sharp - Implement per-product image checking to skip existing downloads - Fix pathToUrl() to correctly generate /images/... URLs - Add frontend getImageUrl() helper with preset sizes (thumb, medium, large) - Update all product pages to use optimized image URLs - Add stealth session support for Dutchie GraphQL crawls - Include test scripts for crawl and image verification 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
12
backend/migrations/073_proxy_timezone.sql
Normal file
12
backend/migrations/073_proxy_timezone.sql
Normal file
@@ -0,0 +1,12 @@
|
||||
-- Add timezone column to proxies table for geo-consistent fingerprinting
|
||||
-- This allows matching Accept-Language and other headers to proxy location
|
||||
|
||||
ALTER TABLE proxies
|
||||
ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
|
||||
|
||||
-- Add timezone to failed_proxies as well
|
||||
ALTER TABLE failed_proxies
|
||||
ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
|
||||
|
||||
-- Comment explaining usage
|
||||
COMMENT ON COLUMN proxies.timezone IS 'IANA timezone (e.g., America/Phoenix) for geo-consistent fingerprinting';
|
||||
@@ -16,6 +16,12 @@ import {
|
||||
NormalizedBrand,
|
||||
NormalizationResult,
|
||||
} from './types';
|
||||
import {
|
||||
downloadProductImage,
|
||||
ProductImageContext,
|
||||
isImageStorageReady,
|
||||
LocalImageSizes,
|
||||
} from '../utils/image-storage';
|
||||
|
||||
const BATCH_SIZE = 100;
|
||||
|
||||
@@ -23,10 +29,21 @@ const BATCH_SIZE = 100;
|
||||
// PRODUCT UPSERTS
|
||||
// ============================================================
|
||||
|
||||
export interface NewProductInfo {
|
||||
id: number; // store_products.id
|
||||
externalProductId: string; // provider_product_id
|
||||
name: string;
|
||||
brandName: string | null;
|
||||
primaryImageUrl: string | null;
|
||||
hasLocalImage?: boolean; // True if local_image_path is already set
|
||||
}
|
||||
|
||||
export interface UpsertProductsResult {
|
||||
upserted: number;
|
||||
new: number;
|
||||
updated: number;
|
||||
newProducts: NewProductInfo[]; // Details of newly created products
|
||||
productsNeedingImages: NewProductInfo[]; // Products (new or updated) that need image downloads
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -41,12 +58,14 @@ export async function upsertStoreProducts(
|
||||
options: { dryRun?: boolean } = {}
|
||||
): Promise<UpsertProductsResult> {
|
||||
if (products.length === 0) {
|
||||
return { upserted: 0, new: 0, updated: 0 };
|
||||
return { upserted: 0, new: 0, updated: 0, newProducts: [], productsNeedingImages: [] };
|
||||
}
|
||||
|
||||
const { dryRun = false } = options;
|
||||
let newCount = 0;
|
||||
let updatedCount = 0;
|
||||
const newProducts: NewProductInfo[] = [];
|
||||
const productsNeedingImages: NewProductInfo[] = [];
|
||||
|
||||
// Process in batches
|
||||
for (let i = 0; i < products.length; i += BATCH_SIZE) {
|
||||
@@ -104,7 +123,7 @@ export async function upsertStoreProducts(
|
||||
image_url = EXCLUDED.image_url,
|
||||
last_seen_at = NOW(),
|
||||
updated_at = NOW()
|
||||
RETURNING (xmax = 0) as is_new`,
|
||||
RETURNING id, (xmax = 0) as is_new, (local_image_path IS NOT NULL) as has_local_image`,
|
||||
[
|
||||
product.dispensaryId,
|
||||
product.platform,
|
||||
@@ -129,10 +148,30 @@ export async function upsertStoreProducts(
|
||||
]
|
||||
);
|
||||
|
||||
if (result.rows[0]?.is_new) {
|
||||
const row = result.rows[0];
|
||||
const productInfo: NewProductInfo = {
|
||||
id: row.id,
|
||||
externalProductId: product.externalProductId,
|
||||
name: product.name,
|
||||
brandName: product.brandName,
|
||||
primaryImageUrl: product.primaryImageUrl,
|
||||
hasLocalImage: row.has_local_image,
|
||||
};
|
||||
|
||||
if (row.is_new) {
|
||||
newCount++;
|
||||
// Track new products
|
||||
newProducts.push(productInfo);
|
||||
// New products always need images (if they have a source URL)
|
||||
if (product.primaryImageUrl && !row.has_local_image) {
|
||||
productsNeedingImages.push(productInfo);
|
||||
}
|
||||
} else {
|
||||
updatedCount++;
|
||||
// Updated products need images only if they don't have a local image yet
|
||||
if (product.primaryImageUrl && !row.has_local_image) {
|
||||
productsNeedingImages.push(productInfo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -149,6 +188,8 @@ export async function upsertStoreProducts(
|
||||
upserted: newCount + updatedCount,
|
||||
new: newCount,
|
||||
updated: updatedCount,
|
||||
newProducts,
|
||||
productsNeedingImages,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -564,6 +605,19 @@ export async function upsertBrands(
|
||||
// FULL HYDRATION
|
||||
// ============================================================
|
||||
|
||||
export interface ImageDownloadResult {
|
||||
downloaded: number;
|
||||
skipped: number;
|
||||
failed: number;
|
||||
bytesTotal: number;
|
||||
}
|
||||
|
||||
export interface DispensaryContext {
|
||||
stateCode: string;
|
||||
storeSlug: string;
|
||||
hasExistingProducts?: boolean; // True if store already has products with local images
|
||||
}
|
||||
|
||||
export interface HydratePayloadResult {
|
||||
productsUpserted: number;
|
||||
productsNew: number;
|
||||
@@ -574,6 +628,154 @@ export interface HydratePayloadResult {
|
||||
variantsUpserted: number;
|
||||
variantsNew: number;
|
||||
variantSnapshotsCreated: number;
|
||||
imagesDownloaded: number;
|
||||
imagesSkipped: number;
|
||||
imagesFailed: number;
|
||||
imagesBytesTotal: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to create slug from string
|
||||
*/
|
||||
function slugify(str: string): string {
|
||||
return str
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '-')
|
||||
.replace(/^-+|-+$/g, '')
|
||||
.substring(0, 50) || 'unknown';
|
||||
}
|
||||
|
||||
/**
|
||||
* Download images for new products and update their local paths
|
||||
*/
|
||||
export async function downloadProductImages(
|
||||
pool: Pool,
|
||||
newProducts: NewProductInfo[],
|
||||
dispensaryContext: DispensaryContext,
|
||||
options: { dryRun?: boolean; concurrency?: number } = {}
|
||||
): Promise<ImageDownloadResult> {
|
||||
const { dryRun = false, concurrency = 5 } = options;
|
||||
|
||||
// Filter products that have images to download
|
||||
const productsWithImages = newProducts.filter(p => p.primaryImageUrl);
|
||||
|
||||
if (productsWithImages.length === 0) {
|
||||
return { downloaded: 0, skipped: 0, failed: 0, bytesTotal: 0 };
|
||||
}
|
||||
|
||||
// Check if image storage is ready
|
||||
if (!isImageStorageReady()) {
|
||||
console.warn('[ImageDownload] Image storage not initialized, skipping downloads');
|
||||
return { downloaded: 0, skipped: productsWithImages.length, failed: 0, bytesTotal: 0 };
|
||||
}
|
||||
|
||||
if (dryRun) {
|
||||
console.log(`[DryRun] Would download ${productsWithImages.length} images`);
|
||||
return { downloaded: 0, skipped: productsWithImages.length, failed: 0, bytesTotal: 0 };
|
||||
}
|
||||
|
||||
let downloaded = 0;
|
||||
let skipped = 0;
|
||||
let failed = 0;
|
||||
let bytesTotal = 0;
|
||||
|
||||
// Process in batches with concurrency limit
|
||||
for (let i = 0; i < productsWithImages.length; i += concurrency) {
|
||||
const batch = productsWithImages.slice(i, i + concurrency);
|
||||
|
||||
const results = await Promise.allSettled(
|
||||
batch.map(async (product) => {
|
||||
const ctx: ProductImageContext = {
|
||||
stateCode: dispensaryContext.stateCode,
|
||||
storeSlug: dispensaryContext.storeSlug,
|
||||
brandSlug: slugify(product.brandName || 'unknown'),
|
||||
productId: product.externalProductId,
|
||||
};
|
||||
|
||||
const result = await downloadProductImage(product.primaryImageUrl!, ctx, { skipIfExists: true });
|
||||
|
||||
if (result.success) {
|
||||
// Update the database with local image path
|
||||
const imagesJson = JSON.stringify({
|
||||
full: result.urls!.full,
|
||||
medium: result.urls!.medium,
|
||||
thumb: result.urls!.thumb,
|
||||
});
|
||||
|
||||
await pool.query(
|
||||
`UPDATE store_products
|
||||
SET local_image_path = $1, images = $2
|
||||
WHERE id = $3`,
|
||||
[result.urls!.full, imagesJson, product.id]
|
||||
);
|
||||
}
|
||||
|
||||
return result;
|
||||
})
|
||||
);
|
||||
|
||||
for (const result of results) {
|
||||
if (result.status === 'fulfilled') {
|
||||
const downloadResult = result.value;
|
||||
if (downloadResult.success) {
|
||||
if (downloadResult.skipped) {
|
||||
skipped++;
|
||||
} else {
|
||||
downloaded++;
|
||||
bytesTotal += downloadResult.bytesDownloaded || 0;
|
||||
}
|
||||
} else {
|
||||
failed++;
|
||||
console.warn(`[ImageDownload] Failed: ${downloadResult.error}`);
|
||||
}
|
||||
} else {
|
||||
failed++;
|
||||
console.error(`[ImageDownload] Error:`, result.reason);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`[ImageDownload] Downloaded: ${downloaded}, Skipped: ${skipped}, Failed: ${failed}, Bytes: ${bytesTotal}`);
|
||||
return { downloaded, skipped, failed, bytesTotal };
|
||||
}
|
||||
|
||||
/**
|
||||
* Get dispensary context for image paths
|
||||
* Also checks if this dispensary already has products with local images
|
||||
* to skip unnecessary filesystem checks for existing stores
|
||||
*/
|
||||
async function getDispensaryContext(pool: Pool, dispensaryId: number): Promise<DispensaryContext | null> {
|
||||
try {
|
||||
const result = await pool.query(
|
||||
`SELECT
|
||||
d.state,
|
||||
d.slug,
|
||||
d.name,
|
||||
EXISTS(
|
||||
SELECT 1 FROM store_products sp
|
||||
WHERE sp.dispensary_id = d.id
|
||||
AND sp.local_image_path IS NOT NULL
|
||||
LIMIT 1
|
||||
) as has_local_images
|
||||
FROM dispensaries d
|
||||
WHERE d.id = $1`,
|
||||
[dispensaryId]
|
||||
);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const row = result.rows[0];
|
||||
return {
|
||||
stateCode: row.state || 'unknown',
|
||||
storeSlug: row.slug || slugify(row.name || `store-${dispensaryId}`),
|
||||
hasExistingProducts: row.has_local_images,
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('[getDispensaryContext] Error:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -584,9 +786,9 @@ export async function hydrateToCanonical(
|
||||
dispensaryId: number,
|
||||
normResult: NormalizationResult,
|
||||
crawlRunId: number | null,
|
||||
options: { dryRun?: boolean } = {}
|
||||
options: { dryRun?: boolean; downloadImages?: boolean } = {}
|
||||
): Promise<HydratePayloadResult> {
|
||||
const { dryRun = false } = options;
|
||||
const { dryRun = false, downloadImages: shouldDownloadImages = true } = options;
|
||||
|
||||
// 1. Upsert brands
|
||||
const brandResult = await upsertBrands(pool, normResult.brands, { dryRun });
|
||||
@@ -634,6 +836,36 @@ export async function hydrateToCanonical(
|
||||
{ dryRun }
|
||||
);
|
||||
|
||||
// 6. Download images for products that need them
|
||||
// This includes:
|
||||
// - New products (always need images)
|
||||
// - Updated products that don't have local images yet (backfill)
|
||||
// This avoids:
|
||||
// - Filesystem checks for products that already have local images
|
||||
// - Unnecessary HTTP requests for products with existing images
|
||||
let imageResult: ImageDownloadResult = { downloaded: 0, skipped: 0, failed: 0, bytesTotal: 0 };
|
||||
|
||||
if (shouldDownloadImages && productResult.productsNeedingImages.length > 0) {
|
||||
const dispensaryContext = await getDispensaryContext(pool, dispensaryId);
|
||||
|
||||
if (dispensaryContext) {
|
||||
const newCount = productResult.productsNeedingImages.filter(p => !p.hasLocalImage).length;
|
||||
const backfillCount = productResult.productsNeedingImages.length - newCount;
|
||||
console.log(`[Hydration] Downloading images for ${productResult.productsNeedingImages.length} products (${productResult.new} new, ${backfillCount} backfill)...`);
|
||||
imageResult = await downloadProductImages(
|
||||
pool,
|
||||
productResult.productsNeedingImages,
|
||||
dispensaryContext,
|
||||
{ dryRun }
|
||||
);
|
||||
} else {
|
||||
console.warn(`[Hydration] Could not get dispensary context for ID ${dispensaryId}, skipping image downloads`);
|
||||
}
|
||||
} else if (productResult.productsNeedingImages.length === 0 && productResult.upserted > 0) {
|
||||
// All products already have local images
|
||||
console.log(`[Hydration] All ${productResult.upserted} products already have local images, skipping downloads`);
|
||||
}
|
||||
|
||||
return {
|
||||
productsUpserted: productResult.upserted,
|
||||
productsNew: productResult.new,
|
||||
@@ -644,5 +876,9 @@ export async function hydrateToCanonical(
|
||||
variantsUpserted: variantResult.upserted,
|
||||
variantsNew: variantResult.new,
|
||||
variantSnapshotsCreated: variantResult.snapshotsCreated,
|
||||
imagesDownloaded: imageResult.downloaded,
|
||||
imagesSkipped: imageResult.skipped,
|
||||
imagesFailed: imageResult.failed,
|
||||
imagesBytesTotal: imageResult.bytesTotal,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import { initializeImageStorage } from './utils/image-storage';
|
||||
import { logger } from './services/logger';
|
||||
import { cleanupOrphanedJobs } from './services/proxyTestQueue';
|
||||
import healthRoutes from './routes/health';
|
||||
import imageProxyRoutes from './routes/image-proxy';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
@@ -29,6 +30,10 @@ app.use(express.json());
|
||||
const LOCAL_IMAGES_PATH = process.env.LOCAL_IMAGES_PATH || './public/images';
|
||||
app.use('/images', express.static(LOCAL_IMAGES_PATH));
|
||||
|
||||
// Image proxy with on-demand resizing
|
||||
// Usage: /img/products/az/store/brand/product/image.webp?w=200&h=200
|
||||
app.use('/img', imageProxyRoutes);
|
||||
|
||||
// Serve static downloads (plugin files, etc.)
|
||||
// Uses ./public/downloads relative to working directory (works for both Docker and local dev)
|
||||
const LOCAL_DOWNLOADS_PATH = process.env.LOCAL_DOWNLOADS_PATH || './public/downloads';
|
||||
|
||||
@@ -213,7 +213,24 @@ const FINGERPRINTS: Fingerprint[] = [
|
||||
|
||||
let currentFingerprintIndex = 0;
|
||||
|
||||
// Forward declaration for session (actual CrawlSession interface defined later)
|
||||
let currentSession: {
|
||||
sessionId: string;
|
||||
fingerprint: Fingerprint;
|
||||
proxyUrl: string | null;
|
||||
stateCode?: string;
|
||||
timezone?: string;
|
||||
startedAt: Date;
|
||||
} | null = null;
|
||||
|
||||
/**
|
||||
* Get current fingerprint - returns session fingerprint if active, otherwise default
|
||||
*/
|
||||
export function getFingerprint(): Fingerprint {
|
||||
// Use session fingerprint if a session is active
|
||||
if (currentSession) {
|
||||
return currentSession.fingerprint;
|
||||
}
|
||||
return FINGERPRINTS[currentFingerprintIndex];
|
||||
}
|
||||
|
||||
@@ -228,6 +245,103 @@ export function resetFingerprint(): void {
|
||||
currentFingerprintIndex = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a random fingerprint from the pool
|
||||
*/
|
||||
export function getRandomFingerprint(): Fingerprint {
|
||||
const index = Math.floor(Math.random() * FINGERPRINTS.length);
|
||||
return FINGERPRINTS[index];
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// SESSION MANAGEMENT
|
||||
// Per-session fingerprint rotation for stealth
|
||||
// ============================================================
|
||||
|
||||
export interface CrawlSession {
|
||||
sessionId: string;
|
||||
fingerprint: Fingerprint;
|
||||
proxyUrl: string | null;
|
||||
stateCode?: string;
|
||||
timezone?: string;
|
||||
startedAt: Date;
|
||||
}
|
||||
|
||||
// Note: currentSession variable declared earlier in file for proper scoping
|
||||
|
||||
/**
|
||||
* Timezone to Accept-Language mapping
|
||||
* US timezones all use en-US but this can be extended for international
|
||||
*/
|
||||
const TIMEZONE_TO_LOCALE: Record<string, string> = {
|
||||
'America/Phoenix': 'en-US,en;q=0.9',
|
||||
'America/Los_Angeles': 'en-US,en;q=0.9',
|
||||
'America/Denver': 'en-US,en;q=0.9',
|
||||
'America/Chicago': 'en-US,en;q=0.9',
|
||||
'America/New_York': 'en-US,en;q=0.9',
|
||||
'America/Detroit': 'en-US,en;q=0.9',
|
||||
'America/Anchorage': 'en-US,en;q=0.9',
|
||||
'Pacific/Honolulu': 'en-US,en;q=0.9',
|
||||
};
|
||||
|
||||
/**
|
||||
* Get Accept-Language header for a given timezone
|
||||
*/
|
||||
export function getLocaleForTimezone(timezone?: string): string {
|
||||
if (!timezone) return 'en-US,en;q=0.9';
|
||||
return TIMEZONE_TO_LOCALE[timezone] || 'en-US,en;q=0.9';
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a new crawl session with a random fingerprint
|
||||
* Call this before crawling a store to get a fresh identity
|
||||
*/
|
||||
export function startSession(stateCode?: string, timezone?: string): CrawlSession {
|
||||
const baseFp = getRandomFingerprint();
|
||||
|
||||
// Override Accept-Language based on timezone for geographic consistency
|
||||
const fingerprint: Fingerprint = {
|
||||
...baseFp,
|
||||
acceptLanguage: getLocaleForTimezone(timezone),
|
||||
};
|
||||
|
||||
currentSession = {
|
||||
sessionId: `session_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
|
||||
fingerprint,
|
||||
proxyUrl: currentProxy,
|
||||
stateCode,
|
||||
timezone,
|
||||
startedAt: new Date(),
|
||||
};
|
||||
|
||||
console.log(`[Dutchie Client] Started session ${currentSession.sessionId}`);
|
||||
console.log(`[Dutchie Client] Fingerprint: ${fingerprint.userAgent.slice(0, 50)}...`);
|
||||
console.log(`[Dutchie Client] Accept-Language: ${fingerprint.acceptLanguage}`);
|
||||
if (timezone) {
|
||||
console.log(`[Dutchie Client] Timezone: ${timezone}`);
|
||||
}
|
||||
|
||||
return currentSession;
|
||||
}
|
||||
|
||||
/**
|
||||
* End the current crawl session
|
||||
*/
|
||||
export function endSession(): void {
|
||||
if (currentSession) {
|
||||
const duration = Math.round((Date.now() - currentSession.startedAt.getTime()) / 1000);
|
||||
console.log(`[Dutchie Client] Ended session ${currentSession.sessionId} (${duration}s)`);
|
||||
currentSession = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current active session
|
||||
*/
|
||||
export function getCurrentSession(): CrawlSession | null {
|
||||
return currentSession;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// CURL HTTP CLIENT
|
||||
// ============================================================
|
||||
|
||||
@@ -18,6 +18,13 @@ export {
|
||||
getFingerprint,
|
||||
rotateFingerprint,
|
||||
resetFingerprint,
|
||||
getRandomFingerprint,
|
||||
getLocaleForTimezone,
|
||||
|
||||
// Session Management (per-store fingerprint rotation)
|
||||
startSession,
|
||||
endSession,
|
||||
getCurrentSession,
|
||||
|
||||
// Proxy
|
||||
setProxy,
|
||||
@@ -32,6 +39,7 @@ export {
|
||||
// Types
|
||||
type CurlResponse,
|
||||
type Fingerprint,
|
||||
type CrawlSession,
|
||||
type ExecuteGraphQLOptions,
|
||||
type FetchPageOptions,
|
||||
} from './client';
|
||||
|
||||
@@ -8,10 +8,12 @@ router.use(authMiddleware);
|
||||
// Valid menu_type values
|
||||
const VALID_MENU_TYPES = ['dutchie', 'treez', 'jane', 'weedmaps', 'leafly', 'meadow', 'blaze', 'flowhub', 'dispense', 'cova', 'other', 'unknown'];
|
||||
|
||||
// Get all dispensaries
|
||||
// Get all dispensaries (with pagination)
|
||||
router.get('/', async (req, res) => {
|
||||
try {
|
||||
const { menu_type, city, state, crawl_enabled, dutchie_verified } = req.query;
|
||||
const { menu_type, city, state, crawl_enabled, dutchie_verified, limit, offset, search } = req.query;
|
||||
const pageLimit = Math.min(parseInt(limit as string) || 50, 500);
|
||||
const pageOffset = parseInt(offset as string) || 0;
|
||||
|
||||
let query = `
|
||||
SELECT
|
||||
@@ -98,15 +100,34 @@ router.get('/', async (req, res) => {
|
||||
}
|
||||
}
|
||||
|
||||
if (conditions.length > 0) {
|
||||
query += ` WHERE ${conditions.join(' AND ')}`;
|
||||
// Search filter (name, dba_name, city, company_name)
|
||||
if (search) {
|
||||
conditions.push(`(name ILIKE $${params.length + 1} OR dba_name ILIKE $${params.length + 1} OR city ILIKE $${params.length + 1})`);
|
||||
params.push(`%${search}%`);
|
||||
}
|
||||
|
||||
// Build WHERE clause
|
||||
const whereClause = conditions.length > 0 ? ` WHERE ${conditions.join(' AND ')}` : '';
|
||||
|
||||
// Get total count first
|
||||
const countResult = await pool.query(`SELECT COUNT(*) FROM dispensaries${whereClause}`, params);
|
||||
const total = parseInt(countResult.rows[0].count);
|
||||
|
||||
// Add pagination
|
||||
query += whereClause;
|
||||
query += ` ORDER BY name`;
|
||||
query += ` LIMIT $${params.length + 1} OFFSET $${params.length + 2}`;
|
||||
params.push(pageLimit, pageOffset);
|
||||
|
||||
const result = await pool.query(query, params);
|
||||
|
||||
res.json({ dispensaries: result.rows, total: result.rowCount });
|
||||
res.json({
|
||||
dispensaries: result.rows,
|
||||
total,
|
||||
limit: pageLimit,
|
||||
offset: pageOffset,
|
||||
hasMore: pageOffset + result.rows.length < total
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error fetching dispensaries:', error);
|
||||
res.status(500).json({ error: 'Failed to fetch dispensaries' });
|
||||
|
||||
214
backend/src/routes/image-proxy.ts
Normal file
214
backend/src/routes/image-proxy.ts
Normal file
@@ -0,0 +1,214 @@
|
||||
/**
|
||||
* Image Proxy Route
|
||||
*
|
||||
* On-demand image resizing service. Serves images with URL-based transforms.
|
||||
*
|
||||
* Usage:
|
||||
* /img/<path>?w=200&h=200&q=80&fit=cover
|
||||
*
|
||||
* Parameters:
|
||||
* w - width (pixels)
|
||||
* h - height (pixels)
|
||||
* q - quality (1-100, default 80)
|
||||
* fit - resize fit: cover, contain, fill, inside, outside (default: inside)
|
||||
* blur - blur sigma (0.3-1000)
|
||||
* gray - grayscale (1 = enabled)
|
||||
* format - output format: webp, jpeg, png, avif (default: webp)
|
||||
*
|
||||
* Examples:
|
||||
* /img/products/az/store/brand/product/image.webp?w=200
|
||||
* /img/products/az/store/brand/product/image.webp?w=600&h=400&fit=cover
|
||||
* /img/products/az/store/brand/product/image.webp?w=100&blur=5&gray=1
|
||||
*/
|
||||
|
||||
import { Router, Request, Response } from 'express';
|
||||
import * as fs from 'fs/promises';
|
||||
import * as path from 'path';
|
||||
// @ts-ignore
|
||||
const sharp = require('sharp');
|
||||
|
||||
const router = Router();
|
||||
|
||||
// Base path for images
|
||||
function getImagesBasePath(): string {
|
||||
if (process.env.IMAGES_PATH) {
|
||||
return process.env.IMAGES_PATH;
|
||||
}
|
||||
if (process.env.STORAGE_BASE_PATH) {
|
||||
return path.join(process.env.STORAGE_BASE_PATH, 'images');
|
||||
}
|
||||
return './storage/images';
|
||||
}
|
||||
|
||||
const IMAGES_BASE_PATH = getImagesBasePath();
|
||||
|
||||
// Allowed fit modes
|
||||
const ALLOWED_FITS = ['cover', 'contain', 'fill', 'inside', 'outside'] as const;
|
||||
type FitMode = typeof ALLOWED_FITS[number];
|
||||
|
||||
// Allowed formats
|
||||
const ALLOWED_FORMATS = ['webp', 'jpeg', 'jpg', 'png', 'avif'] as const;
|
||||
type OutputFormat = typeof ALLOWED_FORMATS[number];
|
||||
|
||||
// Cache headers (1 year for immutable content-addressed images)
|
||||
const CACHE_MAX_AGE = 31536000; // 1 year in seconds
|
||||
|
||||
interface TransformParams {
|
||||
width?: number;
|
||||
height?: number;
|
||||
quality: number;
|
||||
fit: FitMode;
|
||||
blur?: number;
|
||||
grayscale: boolean;
|
||||
format: OutputFormat;
|
||||
}
|
||||
|
||||
function parseTransformParams(query: any): TransformParams {
|
||||
return {
|
||||
width: query.w ? Math.min(Math.max(parseInt(query.w, 10), 1), 4000) : undefined,
|
||||
height: query.h ? Math.min(Math.max(parseInt(query.h, 10), 1), 4000) : undefined,
|
||||
quality: query.q ? Math.min(Math.max(parseInt(query.q, 10), 1), 100) : 80,
|
||||
fit: ALLOWED_FITS.includes(query.fit) ? query.fit : 'inside',
|
||||
blur: query.blur ? Math.min(Math.max(parseFloat(query.blur), 0.3), 1000) : undefined,
|
||||
grayscale: query.gray === '1' || query.grayscale === '1',
|
||||
format: ALLOWED_FORMATS.includes(query.format) ? query.format : 'webp',
|
||||
};
|
||||
}
|
||||
|
||||
function getContentType(format: OutputFormat): string {
|
||||
switch (format) {
|
||||
case 'jpeg':
|
||||
case 'jpg':
|
||||
return 'image/jpeg';
|
||||
case 'png':
|
||||
return 'image/png';
|
||||
case 'avif':
|
||||
return 'image/avif';
|
||||
case 'webp':
|
||||
default:
|
||||
return 'image/webp';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Image proxy endpoint
|
||||
* GET /img/*
|
||||
*/
|
||||
router.get('/*', async (req: Request, res: Response) => {
|
||||
try {
|
||||
// Get the image path from URL (everything after /img/)
|
||||
const imagePath = req.params[0];
|
||||
|
||||
if (!imagePath) {
|
||||
return res.status(400).json({ error: 'Image path required' });
|
||||
}
|
||||
|
||||
// Security: prevent directory traversal
|
||||
const normalizedPath = path.normalize(imagePath).replace(/^(\.\.(\/|\\|$))+/, '');
|
||||
const basePath = path.resolve(IMAGES_BASE_PATH);
|
||||
const fullPath = path.resolve(path.join(IMAGES_BASE_PATH, normalizedPath));
|
||||
|
||||
// Ensure path is within base directory
|
||||
if (!fullPath.startsWith(basePath)) {
|
||||
console.error(`[ImageProxy] Path traversal attempt: ${fullPath} not in ${basePath}`);
|
||||
return res.status(403).json({ error: 'Access denied' });
|
||||
}
|
||||
|
||||
// Check if file exists
|
||||
try {
|
||||
await fs.access(fullPath);
|
||||
} catch {
|
||||
return res.status(404).json({ error: 'Image not found' });
|
||||
}
|
||||
|
||||
// Parse transform parameters
|
||||
const params = parseTransformParams(req.query);
|
||||
|
||||
// Check if any transforms are requested
|
||||
const hasTransforms = params.width || params.height || params.blur || params.grayscale;
|
||||
|
||||
// Read the original image
|
||||
const imageBuffer = await fs.readFile(fullPath);
|
||||
|
||||
let outputBuffer: Buffer;
|
||||
|
||||
if (hasTransforms) {
|
||||
// Apply transforms
|
||||
let pipeline = sharp(imageBuffer);
|
||||
|
||||
// Resize
|
||||
if (params.width || params.height) {
|
||||
pipeline = pipeline.resize(params.width, params.height, {
|
||||
fit: params.fit,
|
||||
withoutEnlargement: true,
|
||||
});
|
||||
}
|
||||
|
||||
// Blur
|
||||
if (params.blur) {
|
||||
pipeline = pipeline.blur(params.blur);
|
||||
}
|
||||
|
||||
// Grayscale
|
||||
if (params.grayscale) {
|
||||
pipeline = pipeline.grayscale();
|
||||
}
|
||||
|
||||
// Output format
|
||||
switch (params.format) {
|
||||
case 'jpeg':
|
||||
case 'jpg':
|
||||
pipeline = pipeline.jpeg({ quality: params.quality });
|
||||
break;
|
||||
case 'png':
|
||||
pipeline = pipeline.png({ quality: params.quality });
|
||||
break;
|
||||
case 'avif':
|
||||
pipeline = pipeline.avif({ quality: params.quality });
|
||||
break;
|
||||
case 'webp':
|
||||
default:
|
||||
pipeline = pipeline.webp({ quality: params.quality });
|
||||
}
|
||||
|
||||
outputBuffer = await pipeline.toBuffer();
|
||||
} else {
|
||||
// No transforms - serve original (but maybe convert format)
|
||||
if (params.format !== 'webp' || params.quality !== 80) {
|
||||
let pipeline = sharp(imageBuffer);
|
||||
switch (params.format) {
|
||||
case 'jpeg':
|
||||
case 'jpg':
|
||||
pipeline = pipeline.jpeg({ quality: params.quality });
|
||||
break;
|
||||
case 'png':
|
||||
pipeline = pipeline.png({ quality: params.quality });
|
||||
break;
|
||||
case 'avif':
|
||||
pipeline = pipeline.avif({ quality: params.quality });
|
||||
break;
|
||||
case 'webp':
|
||||
default:
|
||||
pipeline = pipeline.webp({ quality: params.quality });
|
||||
}
|
||||
outputBuffer = await pipeline.toBuffer();
|
||||
} else {
|
||||
outputBuffer = imageBuffer;
|
||||
}
|
||||
}
|
||||
|
||||
// Set headers
|
||||
res.setHeader('Content-Type', getContentType(params.format));
|
||||
res.setHeader('Cache-Control', `public, max-age=${CACHE_MAX_AGE}, immutable`);
|
||||
res.setHeader('X-Image-Size', outputBuffer.length);
|
||||
|
||||
// Send image
|
||||
res.send(outputBuffer);
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('[ImageProxy] Error:', error.message);
|
||||
res.status(500).json({ error: 'Failed to process image' });
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
@@ -8,11 +8,13 @@ const router = Router();
|
||||
*/
|
||||
router.get('/', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const gitSha = process.env.APP_GIT_SHA || 'unknown';
|
||||
const versionInfo = {
|
||||
build_version: process.env.APP_BUILD_VERSION || 'dev',
|
||||
git_sha: process.env.APP_GIT_SHA || 'local',
|
||||
build_version: process.env.APP_BUILD_VERSION?.slice(0, 8) || 'dev',
|
||||
git_sha: gitSha.slice(0, 8) || 'unknown',
|
||||
git_sha_full: gitSha,
|
||||
build_time: process.env.APP_BUILD_TIME || new Date().toISOString(),
|
||||
image_tag: process.env.CONTAINER_IMAGE_TAG || 'local',
|
||||
image_tag: process.env.CONTAINER_IMAGE_TAG?.slice(0, 8) || 'local',
|
||||
};
|
||||
|
||||
res.json(versionInfo);
|
||||
|
||||
250
backend/src/scripts/crawl-single-store.ts
Normal file
250
backend/src/scripts/crawl-single-store.ts
Normal file
@@ -0,0 +1,250 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
/**
|
||||
* Crawl Single Store - Verbose test showing each step
|
||||
*
|
||||
* Usage:
|
||||
* DATABASE_URL="postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \
|
||||
* npx tsx src/scripts/crawl-single-store.ts <dispensaryId>
|
||||
*
|
||||
* Example:
|
||||
* DATABASE_URL="..." npx tsx src/scripts/crawl-single-store.ts 112
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import dotenv from 'dotenv';
|
||||
import {
|
||||
executeGraphQL,
|
||||
startSession,
|
||||
endSession,
|
||||
getFingerprint,
|
||||
GRAPHQL_HASHES,
|
||||
DUTCHIE_CONFIG,
|
||||
} from '../platforms/dutchie';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
// ============================================================
|
||||
// DATABASE CONNECTION
|
||||
// ============================================================
|
||||
|
||||
function getConnectionString(): string {
|
||||
if (process.env.DATABASE_URL) {
|
||||
return process.env.DATABASE_URL;
|
||||
}
|
||||
if (process.env.CANNAIQ_DB_URL) {
|
||||
return process.env.CANNAIQ_DB_URL;
|
||||
}
|
||||
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
|
||||
const port = process.env.CANNAIQ_DB_PORT || '54320';
|
||||
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
|
||||
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
|
||||
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
|
||||
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
|
||||
}
|
||||
|
||||
const pool = new Pool({ connectionString: getConnectionString() });
|
||||
|
||||
// ============================================================
|
||||
// MAIN
|
||||
// ============================================================
|
||||
|
||||
async function main() {
|
||||
const dispensaryId = parseInt(process.argv[2], 10);
|
||||
|
||||
if (!dispensaryId) {
|
||||
console.error('Usage: npx tsx src/scripts/crawl-single-store.ts <dispensaryId>');
|
||||
console.error('Example: npx tsx src/scripts/crawl-single-store.ts 112');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log('');
|
||||
console.log('╔════════════════════════════════════════════════════════════╗');
|
||||
console.log('║ SINGLE STORE CRAWL - VERBOSE OUTPUT ║');
|
||||
console.log('╚════════════════════════════════════════════════════════════╝');
|
||||
console.log('');
|
||||
|
||||
try {
|
||||
// ============================================================
|
||||
// STEP 1: Get dispensary info from database
|
||||
// ============================================================
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 1: Load Dispensary Info from Database │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
const dispResult = await pool.query(`
|
||||
SELECT
|
||||
id,
|
||||
name,
|
||||
platform_dispensary_id,
|
||||
menu_url,
|
||||
menu_type,
|
||||
city,
|
||||
state
|
||||
FROM dispensaries
|
||||
WHERE id = $1
|
||||
`, [dispensaryId]);
|
||||
|
||||
if (dispResult.rows.length === 0) {
|
||||
throw new Error(`Dispensary ${dispensaryId} not found`);
|
||||
}
|
||||
|
||||
const disp = dispResult.rows[0];
|
||||
console.log(` Dispensary ID: ${disp.id}`);
|
||||
console.log(` Name: ${disp.name}`);
|
||||
console.log(` City, State: ${disp.city}, ${disp.state}`);
|
||||
console.log(` Menu Type: ${disp.menu_type}`);
|
||||
console.log(` Platform ID: ${disp.platform_dispensary_id}`);
|
||||
console.log(` Menu URL: ${disp.menu_url}`);
|
||||
|
||||
if (!disp.platform_dispensary_id) {
|
||||
throw new Error('Dispensary does not have a platform_dispensary_id - cannot crawl');
|
||||
}
|
||||
|
||||
// Extract cName from menu_url
|
||||
const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
|
||||
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
|
||||
console.log(` cName (derived): ${cName}`);
|
||||
console.log('');
|
||||
|
||||
// ============================================================
|
||||
// STEP 2: Start stealth session
|
||||
// ============================================================
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 2: Start Stealth Session │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
// Use Arizona timezone for this store
|
||||
const session = startSession(disp.state || 'AZ', 'America/Phoenix');
|
||||
|
||||
const fp = getFingerprint();
|
||||
console.log(` Session ID: ${session.sessionId}`);
|
||||
console.log(` User-Agent: ${fp.userAgent.slice(0, 60)}...`);
|
||||
console.log(` Accept-Language: ${fp.acceptLanguage}`);
|
||||
console.log(` Sec-CH-UA: ${fp.secChUa || '(not set)'}`);
|
||||
console.log('');
|
||||
|
||||
// ============================================================
|
||||
// STEP 3: Execute GraphQL query
|
||||
// ============================================================
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 3: Execute GraphQL Query (FilteredProducts) │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
const variables = {
|
||||
includeEnterpriseSpecials: false,
|
||||
productsFilter: {
|
||||
dispensaryId: disp.platform_dispensary_id,
|
||||
pricingType: 'rec',
|
||||
Status: 'Active',
|
||||
types: [],
|
||||
useCache: true,
|
||||
isDefaultSort: true,
|
||||
sortBy: 'popularSortIdx',
|
||||
sortDirection: 1,
|
||||
bypassOnlineThresholds: true,
|
||||
isKioskMenu: false,
|
||||
removeProductsBelowOptionThresholds: false,
|
||||
},
|
||||
page: 0,
|
||||
perPage: 100,
|
||||
};
|
||||
|
||||
console.log(` Endpoint: ${DUTCHIE_CONFIG.graphqlEndpoint}`);
|
||||
console.log(` Operation: FilteredProducts`);
|
||||
console.log(` Hash: ${GRAPHQL_HASHES.FilteredProducts.slice(0, 20)}...`);
|
||||
console.log(` dispensaryId: ${variables.productsFilter.dispensaryId}`);
|
||||
console.log(` pricingType: ${variables.productsFilter.pricingType}`);
|
||||
console.log(` Status: ${variables.productsFilter.Status}`);
|
||||
console.log(` perPage: ${variables.perPage}`);
|
||||
console.log('');
|
||||
console.log(' Sending request...');
|
||||
|
||||
const startTime = Date.now();
|
||||
const result = await executeGraphQL(
|
||||
'FilteredProducts',
|
||||
variables,
|
||||
GRAPHQL_HASHES.FilteredProducts,
|
||||
{ cName, maxRetries: 3 }
|
||||
);
|
||||
const elapsed = Date.now() - startTime;
|
||||
|
||||
console.log(` Response time: ${elapsed}ms`);
|
||||
console.log('');
|
||||
|
||||
// ============================================================
|
||||
// STEP 4: Process response
|
||||
// ============================================================
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 4: Process Response │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
const data = result?.data?.filteredProducts;
|
||||
if (!data) {
|
||||
console.log(' ERROR: No data returned from GraphQL');
|
||||
console.log(' Raw result:', JSON.stringify(result, null, 2).slice(0, 500));
|
||||
endSession();
|
||||
return;
|
||||
}
|
||||
|
||||
const products = data.products || [];
|
||||
const totalCount = data.queryInfo?.totalCount || 0;
|
||||
const totalPages = Math.ceil(totalCount / 100);
|
||||
|
||||
console.log(` Total products: ${totalCount}`);
|
||||
console.log(` Products in page: ${products.length}`);
|
||||
console.log(` Total pages: ${totalPages}`);
|
||||
console.log('');
|
||||
|
||||
// Show first few products
|
||||
console.log(' First 5 products:');
|
||||
console.log(' ─────────────────────────────────────────────────────────');
|
||||
for (let i = 0; i < Math.min(5, products.length); i++) {
|
||||
const p = products[i];
|
||||
const name = (p.name || 'Unknown').slice(0, 40);
|
||||
const brand = (p.brand?.name || 'Unknown').slice(0, 15);
|
||||
const price = p.Prices?.[0]?.price || p.medPrice || p.recPrice || 'N/A';
|
||||
const category = p.type || p.category || 'N/A';
|
||||
console.log(` ${i + 1}. ${name.padEnd(42)} | ${brand.padEnd(17)} | $${price}`);
|
||||
}
|
||||
console.log('');
|
||||
|
||||
// ============================================================
|
||||
// STEP 5: End session
|
||||
// ============================================================
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 5: End Session │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
endSession();
|
||||
console.log('');
|
||||
|
||||
// ============================================================
|
||||
// SUMMARY
|
||||
// ============================================================
|
||||
console.log('╔════════════════════════════════════════════════════════════╗');
|
||||
console.log('║ SUMMARY ║');
|
||||
console.log('╠════════════════════════════════════════════════════════════╣');
|
||||
console.log(`║ Store: ${disp.name.slice(0, 38).padEnd(38)} ║`);
|
||||
console.log(`║ Products Found: ${String(totalCount).padEnd(38)} ║`);
|
||||
console.log(`║ Response Time: ${(elapsed + 'ms').padEnd(38)} ║`);
|
||||
console.log(`║ Status: ${'SUCCESS'.padEnd(38)} ║`);
|
||||
console.log('╚════════════════════════════════════════════════════════════╝');
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('');
|
||||
console.error('╔════════════════════════════════════════════════════════════╗');
|
||||
console.error('║ ERROR ║');
|
||||
console.error('╚════════════════════════════════════════════════════════════╝');
|
||||
console.error(` ${error.message}`);
|
||||
if (error.stack) {
|
||||
console.error('');
|
||||
console.error('Stack trace:');
|
||||
console.error(error.stack.split('\n').slice(0, 5).join('\n'));
|
||||
}
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
@@ -23,6 +23,7 @@ import {
|
||||
DutchieNormalizer,
|
||||
hydrateToCanonical,
|
||||
} from '../hydration';
|
||||
import { initializeImageStorage } from '../utils/image-storage';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
@@ -137,6 +138,11 @@ async function main() {
|
||||
console.log(`Test Crawl to Canonical - Dispensary ${dispensaryId}`);
|
||||
console.log('============================================================\n');
|
||||
|
||||
// Initialize image storage
|
||||
console.log('[Init] Initializing image storage...');
|
||||
await initializeImageStorage();
|
||||
console.log(' Image storage ready\n');
|
||||
|
||||
try {
|
||||
// Step 1: Get dispensary info
|
||||
console.log('[Step 1] Getting dispensary info...');
|
||||
|
||||
268
backend/src/scripts/test-image-download.ts
Normal file
268
backend/src/scripts/test-image-download.ts
Normal file
@@ -0,0 +1,268 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
/**
|
||||
* Test Image Download - Tests image downloading with a small batch of products
|
||||
*
|
||||
* Usage:
|
||||
* DATABASE_URL="postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \
|
||||
* STORAGE_DRIVER=local STORAGE_BASE_PATH=./storage \
|
||||
* npx tsx src/scripts/test-image-download.ts <dispensaryId> [limit]
|
||||
*
|
||||
* Example:
|
||||
* DATABASE_URL="..." npx tsx src/scripts/test-image-download.ts 112 5
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import dotenv from 'dotenv';
|
||||
import {
|
||||
executeGraphQL,
|
||||
startSession,
|
||||
endSession,
|
||||
GRAPHQL_HASHES,
|
||||
} from '../platforms/dutchie';
|
||||
import { DutchieNormalizer } from '../hydration/normalizers/dutchie';
|
||||
import { hydrateToCanonical } from '../hydration/canonical-upsert';
|
||||
import { initializeImageStorage, getStorageStats } from '../utils/image-storage';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
// ============================================================
|
||||
// DATABASE CONNECTION
|
||||
// ============================================================
|
||||
|
||||
function getConnectionString(): string {
|
||||
if (process.env.DATABASE_URL) {
|
||||
return process.env.DATABASE_URL;
|
||||
}
|
||||
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
|
||||
const port = process.env.CANNAIQ_DB_PORT || '54320';
|
||||
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
|
||||
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
|
||||
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
|
||||
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
|
||||
}
|
||||
|
||||
const pool = new Pool({ connectionString: getConnectionString() });
|
||||
|
||||
// ============================================================
|
||||
// MAIN
|
||||
// ============================================================
|
||||
|
||||
async function main() {
|
||||
const dispensaryId = parseInt(process.argv[2], 10);
|
||||
const limit = parseInt(process.argv[3], 10) || 5;
|
||||
|
||||
if (!dispensaryId) {
|
||||
console.error('Usage: npx tsx src/scripts/test-image-download.ts <dispensaryId> [limit]');
|
||||
console.error('Example: npx tsx src/scripts/test-image-download.ts 112 5');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log('');
|
||||
console.log('╔════════════════════════════════════════════════════════════╗');
|
||||
console.log('║ IMAGE DOWNLOAD TEST ║');
|
||||
console.log('╚════════════════════════════════════════════════════════════╝');
|
||||
console.log('');
|
||||
|
||||
try {
|
||||
// Initialize image storage
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 1: Initialize Image Storage │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
await initializeImageStorage();
|
||||
const statsBefore = await getStorageStats();
|
||||
console.log(` Base path: ${statsBefore.basePath}`);
|
||||
console.log(` Products before: ${statsBefore.productCount}`);
|
||||
console.log(` Brands before: ${statsBefore.brandCount}`);
|
||||
console.log('');
|
||||
|
||||
// Get dispensary info
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 2: Load Dispensary Info │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
const dispResult = await pool.query(`
|
||||
SELECT
|
||||
id, name, platform_dispensary_id, menu_url, state, slug
|
||||
FROM dispensaries
|
||||
WHERE id = $1
|
||||
`, [dispensaryId]);
|
||||
|
||||
if (dispResult.rows.length === 0) {
|
||||
throw new Error(`Dispensary ${dispensaryId} not found`);
|
||||
}
|
||||
|
||||
const disp = dispResult.rows[0];
|
||||
console.log(` Dispensary: ${disp.name}`);
|
||||
console.log(` State: ${disp.state}`);
|
||||
console.log(` Slug: ${disp.slug}`);
|
||||
console.log(` Platform ID: ${disp.platform_dispensary_id}`);
|
||||
console.log('');
|
||||
|
||||
// Delete some existing store_products to force "new" products
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 3: Clear Store Products (to test new product flow) │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
const deleteResult = await pool.query(`
|
||||
DELETE FROM store_products
|
||||
WHERE dispensary_id = $1
|
||||
RETURNING id
|
||||
`, [dispensaryId]);
|
||||
console.log(` Deleted ${deleteResult.rowCount} existing store_products`);
|
||||
console.log('');
|
||||
|
||||
// Fetch products from Dutchie
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 4: Fetch Products from Dutchie (limited) │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
|
||||
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
|
||||
|
||||
const session = startSession(disp.state || 'AZ', 'America/Phoenix');
|
||||
console.log(` Session ID: ${session.sessionId}`);
|
||||
console.log(` cName: ${cName}`);
|
||||
console.log(` Limit: ${limit} products`);
|
||||
|
||||
const variables = {
|
||||
includeEnterpriseSpecials: false,
|
||||
productsFilter: {
|
||||
dispensaryId: disp.platform_dispensary_id,
|
||||
pricingType: 'rec',
|
||||
Status: 'Active',
|
||||
types: [],
|
||||
useCache: true,
|
||||
isDefaultSort: true,
|
||||
sortBy: 'popularSortIdx',
|
||||
sortDirection: 1,
|
||||
bypassOnlineThresholds: true,
|
||||
isKioskMenu: false,
|
||||
removeProductsBelowOptionThresholds: false,
|
||||
},
|
||||
page: 0,
|
||||
perPage: limit, // Only fetch limited products
|
||||
};
|
||||
|
||||
const startTime = Date.now();
|
||||
const result = await executeGraphQL(
|
||||
'FilteredProducts',
|
||||
variables,
|
||||
GRAPHQL_HASHES.FilteredProducts,
|
||||
{ cName, maxRetries: 3 }
|
||||
);
|
||||
const elapsed = Date.now() - startTime;
|
||||
|
||||
endSession();
|
||||
|
||||
const products = result?.data?.filteredProducts?.products || [];
|
||||
console.log(` Fetched: ${products.length} products in ${elapsed}ms`);
|
||||
|
||||
// Show products with images
|
||||
console.log('');
|
||||
console.log(' Products with images:');
|
||||
for (let i = 0; i < products.length; i++) {
|
||||
const p = products[i];
|
||||
const hasImage = !!p.Image;
|
||||
const brandName = p.brand?.name || 'Unknown';
|
||||
console.log(` ${i + 1}. ${p.name?.slice(0, 40).padEnd(42)} | ${brandName.slice(0, 15).padEnd(17)} | ${hasImage ? '✓ has image' : '✗ no image'}`);
|
||||
}
|
||||
console.log('');
|
||||
|
||||
// Normalize and hydrate
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 5: Normalize and Hydrate (with image download) │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
const normalizer = new DutchieNormalizer();
|
||||
// Wrap products in expected payload format
|
||||
const payload = {
|
||||
raw_json: products, // DutchieNormalizer.extractProducts handles arrays
|
||||
dispensary_id: dispensaryId,
|
||||
};
|
||||
const normResult = normalizer.normalize(payload);
|
||||
console.log(` Normalized products: ${normResult.products.length}`);
|
||||
console.log(` Brands found: ${normResult.brands.length}`);
|
||||
|
||||
const hydrateStart = Date.now();
|
||||
const hydrateResult = await hydrateToCanonical(
|
||||
pool,
|
||||
dispensaryId,
|
||||
normResult,
|
||||
null, // no crawl run ID for test
|
||||
{ dryRun: false, downloadImages: true }
|
||||
);
|
||||
const hydrateElapsed = Date.now() - hydrateStart;
|
||||
|
||||
console.log('');
|
||||
console.log(` Hydration time: ${hydrateElapsed}ms`);
|
||||
console.log(` Products new: ${hydrateResult.productsNew}`);
|
||||
console.log(` Products updated: ${hydrateResult.productsUpdated}`);
|
||||
console.log(` Images downloaded: ${hydrateResult.imagesDownloaded}`);
|
||||
console.log(` Images skipped: ${hydrateResult.imagesSkipped}`);
|
||||
console.log(` Images failed: ${hydrateResult.imagesFailed}`);
|
||||
console.log(` Image bytes: ${(hydrateResult.imagesBytesTotal / 1024).toFixed(1)} KB`);
|
||||
console.log('');
|
||||
|
||||
// Check storage stats
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 6: Verify Storage │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
const statsAfter = await getStorageStats();
|
||||
console.log(` Products after: ${statsAfter.productCount}`);
|
||||
console.log(` Brands after: ${statsAfter.brandCount}`);
|
||||
console.log(` Total size: ${(statsAfter.totalSizeBytes / 1024).toFixed(1)} KB`);
|
||||
console.log('');
|
||||
|
||||
// Check database for local_image_path
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 7: Check Database for Local Image Paths │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
const dbCheck = await pool.query(`
|
||||
SELECT
|
||||
id, name_raw, local_image_path, images
|
||||
FROM store_products
|
||||
WHERE dispensary_id = $1
|
||||
LIMIT 10
|
||||
`, [dispensaryId]);
|
||||
|
||||
for (const row of dbCheck.rows) {
|
||||
const hasLocal = !!row.local_image_path;
|
||||
const hasImages = !!row.images;
|
||||
console.log(` ${row.id}: ${row.name_raw?.slice(0, 40).padEnd(42)} | local: ${hasLocal ? '✓' : '✗'} | images: ${hasImages ? '✓' : '✗'}`);
|
||||
if (row.local_image_path) {
|
||||
console.log(` → ${row.local_image_path}`);
|
||||
}
|
||||
}
|
||||
console.log('');
|
||||
|
||||
// Summary
|
||||
console.log('╔════════════════════════════════════════════════════════════╗');
|
||||
console.log('║ SUMMARY ║');
|
||||
console.log('╠════════════════════════════════════════════════════════════╣');
|
||||
console.log(`║ Dispensary: ${disp.name.slice(0, 37).padEnd(37)} ║`);
|
||||
console.log(`║ Products crawled: ${String(products.length).padEnd(37)} ║`);
|
||||
console.log(`║ Images downloaded: ${String(hydrateResult.imagesDownloaded).padEnd(37)} ║`);
|
||||
console.log(`║ Total image bytes: ${((hydrateResult.imagesBytesTotal / 1024).toFixed(1) + ' KB').padEnd(37)} ║`);
|
||||
console.log(`║ Status: ${'SUCCESS'.padEnd(37)} ║`);
|
||||
console.log('╚════════════════════════════════════════════════════════════╝');
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('');
|
||||
console.error('╔════════════════════════════════════════════════════════════╗');
|
||||
console.error('║ ERROR ║');
|
||||
console.error('╚════════════════════════════════════════════════════════════╝');
|
||||
console.error(` ${error.message}`);
|
||||
if (error.stack) {
|
||||
console.error('');
|
||||
console.error('Stack trace:');
|
||||
console.error(error.stack.split('\n').slice(0, 5).join('\n'));
|
||||
}
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
80
backend/src/scripts/test-image-proxy.ts
Normal file
80
backend/src/scripts/test-image-proxy.ts
Normal file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
/**
|
||||
* Test Image Proxy - Standalone test without backend
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx src/scripts/test-image-proxy.ts
|
||||
*/
|
||||
|
||||
import express from 'express';
|
||||
import imageProxyRoutes from '../routes/image-proxy';
|
||||
|
||||
const app = express();
|
||||
const PORT = 3099;
|
||||
|
||||
// Mount the image proxy
|
||||
app.use('/img', imageProxyRoutes);
|
||||
|
||||
// Start server
|
||||
app.listen(PORT, async () => {
|
||||
console.log(`Test image proxy running on http://localhost:${PORT}`);
|
||||
console.log('');
|
||||
console.log('Testing image proxy...');
|
||||
console.log('');
|
||||
|
||||
const axios = require('axios');
|
||||
|
||||
// Test cases
|
||||
const tests = [
|
||||
{
|
||||
name: 'Original image',
|
||||
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp',
|
||||
},
|
||||
{
|
||||
name: 'Resize to 200px width',
|
||||
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200',
|
||||
},
|
||||
{
|
||||
name: 'Resize to 100x100 cover',
|
||||
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=100&h=100&fit=cover',
|
||||
},
|
||||
{
|
||||
name: 'Grayscale + blur',
|
||||
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200&gray=1&blur=2',
|
||||
},
|
||||
{
|
||||
name: 'Convert to JPEG',
|
||||
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200&format=jpeg&q=70',
|
||||
},
|
||||
{
|
||||
name: 'Non-existent image',
|
||||
url: '/img/products/az/nonexistent/image.webp',
|
||||
},
|
||||
];
|
||||
|
||||
for (const test of tests) {
|
||||
try {
|
||||
const response = await axios.get(`http://localhost:${PORT}${test.url}`, {
|
||||
responseType: 'arraybuffer',
|
||||
validateStatus: () => true,
|
||||
});
|
||||
|
||||
const contentType = response.headers['content-type'];
|
||||
const size = response.data.length;
|
||||
const status = response.status;
|
||||
|
||||
console.log(`${test.name}:`);
|
||||
console.log(` URL: ${test.url.slice(0, 80)}${test.url.length > 80 ? '...' : ''}`);
|
||||
console.log(` Status: ${status}`);
|
||||
console.log(` Content-Type: ${contentType}`);
|
||||
console.log(` Size: ${(size / 1024).toFixed(1)} KB`);
|
||||
console.log('');
|
||||
} catch (error: any) {
|
||||
console.log(`${test.name}: ERROR - ${error.message}`);
|
||||
console.log('');
|
||||
}
|
||||
}
|
||||
|
||||
console.log('Tests complete!');
|
||||
process.exit(0);
|
||||
});
|
||||
117
backend/src/scripts/test-stealth-session.ts
Normal file
117
backend/src/scripts/test-stealth-session.ts
Normal file
@@ -0,0 +1,117 @@
|
||||
/**
|
||||
* Test script for stealth session management
|
||||
*
|
||||
* Tests:
|
||||
* 1. Per-session fingerprint rotation
|
||||
* 2. Geographic consistency (timezone → Accept-Language)
|
||||
* 3. Proxy location loading from database
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx src/scripts/test-stealth-session.ts
|
||||
*/
|
||||
|
||||
import {
|
||||
startSession,
|
||||
endSession,
|
||||
getCurrentSession,
|
||||
getFingerprint,
|
||||
getRandomFingerprint,
|
||||
getLocaleForTimezone,
|
||||
buildHeaders,
|
||||
} from '../platforms/dutchie';
|
||||
|
||||
console.log('='.repeat(60));
|
||||
console.log('STEALTH SESSION TEST');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
// Test 1: Timezone to Locale mapping
|
||||
console.log('\n[Test 1] Timezone to Locale Mapping:');
|
||||
const testTimezones = [
|
||||
'America/Phoenix',
|
||||
'America/Los_Angeles',
|
||||
'America/New_York',
|
||||
'America/Chicago',
|
||||
undefined,
|
||||
'Invalid/Timezone',
|
||||
];
|
||||
|
||||
for (const tz of testTimezones) {
|
||||
const locale = getLocaleForTimezone(tz);
|
||||
console.log(` ${tz || '(undefined)'} → ${locale}`);
|
||||
}
|
||||
|
||||
// Test 2: Random fingerprint selection
|
||||
console.log('\n[Test 2] Random Fingerprint Selection (5 samples):');
|
||||
for (let i = 0; i < 5; i++) {
|
||||
const fp = getRandomFingerprint();
|
||||
console.log(` ${i + 1}. ${fp.userAgent.slice(0, 60)}...`);
|
||||
}
|
||||
|
||||
// Test 3: Session Management
|
||||
console.log('\n[Test 3] Session Management:');
|
||||
|
||||
// Before session - should use default fingerprint
|
||||
console.log(' Before session:');
|
||||
const beforeFp = getFingerprint();
|
||||
console.log(` getFingerprint(): ${beforeFp.userAgent.slice(0, 50)}...`);
|
||||
console.log(` getCurrentSession(): ${getCurrentSession()}`);
|
||||
|
||||
// Start session with Arizona timezone
|
||||
console.log('\n Starting session (AZ, America/Phoenix):');
|
||||
const session1 = startSession('AZ', 'America/Phoenix');
|
||||
console.log(` Session ID: ${session1.sessionId}`);
|
||||
console.log(` Fingerprint UA: ${session1.fingerprint.userAgent.slice(0, 50)}...`);
|
||||
console.log(` Accept-Language: ${session1.fingerprint.acceptLanguage}`);
|
||||
console.log(` Timezone: ${session1.timezone}`);
|
||||
|
||||
// During session - should use session fingerprint
|
||||
console.log('\n During session:');
|
||||
const duringFp = getFingerprint();
|
||||
console.log(` getFingerprint(): ${duringFp.userAgent.slice(0, 50)}...`);
|
||||
console.log(` Same as session? ${duringFp.userAgent === session1.fingerprint.userAgent}`);
|
||||
|
||||
// Test buildHeaders with session
|
||||
console.log('\n buildHeaders() during session:');
|
||||
const headers = buildHeaders('/embedded-menu/test-store');
|
||||
console.log(` User-Agent: ${headers['user-agent'].slice(0, 50)}...`);
|
||||
console.log(` Accept-Language: ${headers['accept-language']}`);
|
||||
console.log(` Origin: ${headers['origin']}`);
|
||||
console.log(` Referer: ${headers['referer']}`);
|
||||
|
||||
// End session
|
||||
console.log('\n Ending session:');
|
||||
endSession();
|
||||
console.log(` getCurrentSession(): ${getCurrentSession()}`);
|
||||
|
||||
// Test 4: Multiple sessions should have different fingerprints
|
||||
console.log('\n[Test 4] Multiple Sessions (fingerprint variety):');
|
||||
const fingerprints: string[] = [];
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const session = startSession('CA', 'America/Los_Angeles');
|
||||
fingerprints.push(session.fingerprint.userAgent);
|
||||
endSession();
|
||||
}
|
||||
|
||||
const uniqueCount = new Set(fingerprints).size;
|
||||
console.log(` 10 sessions created, ${uniqueCount} unique fingerprints`);
|
||||
console.log(` Variety: ${uniqueCount >= 3 ? '✅ Good' : '⚠️ Low - may need more fingerprint options'}`);
|
||||
|
||||
// Test 5: Geographic consistency check
|
||||
console.log('\n[Test 5] Geographic Consistency:');
|
||||
const geoTests = [
|
||||
{ state: 'AZ', tz: 'America/Phoenix' },
|
||||
{ state: 'CA', tz: 'America/Los_Angeles' },
|
||||
{ state: 'NY', tz: 'America/New_York' },
|
||||
{ state: 'IL', tz: 'America/Chicago' },
|
||||
];
|
||||
|
||||
for (const { state, tz } of geoTests) {
|
||||
const session = startSession(state, tz);
|
||||
const consistent = session.fingerprint.acceptLanguage.includes('en-US');
|
||||
console.log(` ${state} (${tz}): Accept-Language=${session.fingerprint.acceptLanguage} ${consistent ? '✅' : '❌'}`);
|
||||
endSession();
|
||||
}
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('TEST COMPLETE');
|
||||
console.log('='.repeat(60));
|
||||
144
backend/src/scripts/test-stealth-with-db.ts
Normal file
144
backend/src/scripts/test-stealth-with-db.ts
Normal file
@@ -0,0 +1,144 @@
|
||||
/**
|
||||
* Test script for stealth session with REAL proxy data from database
|
||||
*
|
||||
* Tests:
|
||||
* 1. Load proxies from database (with location data)
|
||||
* 2. Verify location fields (city, state, timezone) are loaded
|
||||
* 3. Start session with proxy's timezone
|
||||
* 4. Verify Accept-Language matches timezone
|
||||
*
|
||||
* Usage:
|
||||
* DATABASE_URL="postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" npx tsx src/scripts/test-stealth-with-db.ts
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import {
|
||||
CrawlRotator,
|
||||
ProxyRotator,
|
||||
} from '../services/crawl-rotator';
|
||||
import {
|
||||
startSession,
|
||||
endSession,
|
||||
getLocaleForTimezone,
|
||||
} from '../platforms/dutchie';
|
||||
|
||||
const DATABASE_URL = process.env.DATABASE_URL;
|
||||
|
||||
if (!DATABASE_URL) {
|
||||
console.error('ERROR: DATABASE_URL environment variable is required');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('STEALTH SESSION TEST WITH DATABASE');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const pool = new Pool({ connectionString: DATABASE_URL });
|
||||
|
||||
try {
|
||||
// Test 1: Load proxies with location data
|
||||
console.log('\n[Test 1] Loading proxies from database...');
|
||||
const rotator = new CrawlRotator(pool);
|
||||
await rotator.initialize();
|
||||
|
||||
const stats = rotator.proxy.getStats();
|
||||
console.log(` Total proxies: ${stats.totalProxies}`);
|
||||
console.log(` Active proxies: ${stats.activeProxies}`);
|
||||
|
||||
if (stats.activeProxies === 0) {
|
||||
console.log('\n WARNING: No active proxies in database!');
|
||||
console.log(' Insert test proxies with:');
|
||||
console.log(` INSERT INTO proxies (host, port, protocol, city, state, country_code, timezone, active)`);
|
||||
console.log(` VALUES ('proxy1.example.com', 8080, 'http', 'Phoenix', 'AZ', 'US', 'America/Phoenix', true);`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Test 2: Check location data on proxies
|
||||
console.log('\n[Test 2] Checking proxy location data...');
|
||||
let proxyCount = 0;
|
||||
let withLocationCount = 0;
|
||||
|
||||
// Iterate through proxies
|
||||
for (let i = 0; i < stats.totalProxies; i++) {
|
||||
const proxy = rotator.proxy.getNext();
|
||||
if (!proxy) break;
|
||||
|
||||
proxyCount++;
|
||||
const hasLocation = !!(proxy.stateCode || proxy.timezone);
|
||||
if (hasLocation) withLocationCount++;
|
||||
|
||||
console.log(` Proxy ${proxy.id}: ${proxy.host}:${proxy.port}`);
|
||||
console.log(` City: ${proxy.city || '(not set)'}`);
|
||||
console.log(` State: ${proxy.stateCode || '(not set)'}`);
|
||||
console.log(` Country: ${proxy.countryCode || '(not set)'}`);
|
||||
console.log(` Timezone: ${proxy.timezone || '(not set)'}`);
|
||||
console.log(` Has location data: ${hasLocation ? '✅' : '❌'}`);
|
||||
}
|
||||
|
||||
console.log(`\n Summary: ${withLocationCount}/${proxyCount} proxies have location data`);
|
||||
|
||||
// Test 3: Start session using proxy's timezone
|
||||
console.log('\n[Test 3] Starting session with proxy timezone...');
|
||||
|
||||
// Get first proxy with timezone
|
||||
const firstProxy = rotator.proxy.getNext();
|
||||
if (firstProxy && firstProxy.timezone) {
|
||||
console.log(` Using proxy: ${firstProxy.host} (${firstProxy.city}, ${firstProxy.stateCode})`);
|
||||
console.log(` Proxy timezone: ${firstProxy.timezone}`);
|
||||
|
||||
const session = startSession(firstProxy.stateCode, firstProxy.timezone);
|
||||
console.log(` Session ID: ${session.sessionId}`);
|
||||
console.log(` Session timezone: ${session.timezone}`);
|
||||
console.log(` Session Accept-Language: ${session.fingerprint.acceptLanguage}`);
|
||||
|
||||
// Verify Accept-Language matches expected locale for timezone
|
||||
const expectedLocale = getLocaleForTimezone(firstProxy.timezone);
|
||||
const matches = session.fingerprint.acceptLanguage === expectedLocale;
|
||||
console.log(` Expected locale: ${expectedLocale}`);
|
||||
console.log(` Locale matches: ${matches ? '✅' : '❌'}`);
|
||||
|
||||
endSession();
|
||||
} else {
|
||||
console.log(' WARNING: No proxy with timezone data found');
|
||||
}
|
||||
|
||||
// Test 4: Test each timezone in database
|
||||
console.log('\n[Test 4] Testing all proxy timezones...');
|
||||
const seenTimezones = new Set<string>();
|
||||
|
||||
// Reset to beginning
|
||||
for (let i = 0; i < stats.totalProxies; i++) {
|
||||
const proxy = rotator.proxy.getNext();
|
||||
if (!proxy || !proxy.timezone) continue;
|
||||
if (seenTimezones.has(proxy.timezone)) continue;
|
||||
|
||||
seenTimezones.add(proxy.timezone);
|
||||
const session = startSession(proxy.stateCode, proxy.timezone);
|
||||
console.log(` ${proxy.timezone}:`);
|
||||
console.log(` State: ${proxy.stateCode || 'unknown'}`);
|
||||
console.log(` Accept-Language: ${session.fingerprint.acceptLanguage}`);
|
||||
endSession();
|
||||
}
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('TEST COMPLETE');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
if (withLocationCount === 0) {
|
||||
console.log('\n⚠️ No proxies have location data.');
|
||||
console.log(' Geographic consistency will use default locale (en-US).');
|
||||
console.log(' To enable geo-consistency, populate city/state/timezone on proxies.');
|
||||
} else {
|
||||
console.log('\n✅ Stealth session with geo-consistency is working!');
|
||||
console.log(' Sessions will use Accept-Language matching proxy timezone.');
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error:', error);
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
@@ -1,26 +1,29 @@
|
||||
/**
|
||||
* Local Image Storage Utility
|
||||
*
|
||||
* Downloads and stores product images to local filesystem.
|
||||
* Replaces MinIO-based storage with simple local file storage.
|
||||
* Downloads and stores product images to local filesystem with proper hierarchy.
|
||||
*
|
||||
* Directory structure:
|
||||
* /images/products/<dispensary_id>/<product_id>.webp
|
||||
* /images/products/<dispensary_id>/<product_id>-thumb.webp
|
||||
* /images/products/<dispensary_id>/<product_id>-medium.webp
|
||||
* /images/brands/<brand_slug>.webp
|
||||
* /images/products/<state>/<store_slug>/<brand_slug>/<product_id>/image.webp
|
||||
* /images/products/<state>/<store_slug>/<brand_slug>/<product_id>/image-medium.webp
|
||||
* /images/products/<state>/<store_slug>/<brand_slug>/<product_id>/image-thumb.webp
|
||||
* /images/brands/<brand_slug>/logo.webp
|
||||
*
|
||||
* This structure allows:
|
||||
* - Easy migration to MinIO/S3 (bucket per state)
|
||||
* - Browsing by state/store/brand
|
||||
* - Multiple images per product (future: gallery)
|
||||
*/
|
||||
|
||||
import axios from 'axios';
|
||||
import sharp from 'sharp';
|
||||
// @ts-ignore - sharp module typing quirk
|
||||
const sharp = require('sharp');
|
||||
import * as fs from 'fs/promises';
|
||||
import * as path from 'path';
|
||||
import { createHash } from 'crypto';
|
||||
|
||||
// Base path for image storage - configurable via env
|
||||
// Uses project-relative paths by default, NOT /app or other privileged paths
|
||||
function getImagesBasePath(): string {
|
||||
// Priority: IMAGES_PATH > STORAGE_BASE_PATH/images > ./storage/images
|
||||
if (process.env.IMAGES_PATH) {
|
||||
return process.env.IMAGES_PATH;
|
||||
}
|
||||
@@ -35,16 +38,28 @@ const IMAGES_BASE_PATH = getImagesBasePath();
|
||||
const IMAGES_PUBLIC_URL = process.env.IMAGES_PUBLIC_URL || '/images';
|
||||
|
||||
export interface LocalImageSizes {
|
||||
full: string; // URL path: /images/products/123/456.webp
|
||||
medium: string; // URL path: /images/products/123/456-medium.webp
|
||||
thumb: string; // URL path: /images/products/123/456-thumb.webp
|
||||
original: string; // URL path to original image
|
||||
// Legacy compatibility - all point to original until we add image proxy
|
||||
full: string;
|
||||
medium: string;
|
||||
thumb: string;
|
||||
}
|
||||
|
||||
export interface DownloadResult {
|
||||
success: boolean;
|
||||
urls?: LocalImageSizes;
|
||||
localPaths?: LocalImageSizes;
|
||||
error?: string;
|
||||
bytesDownloaded?: number;
|
||||
skipped?: boolean; // True if image already exists
|
||||
}
|
||||
|
||||
export interface ProductImageContext {
|
||||
stateCode: string; // e.g., "AZ", "CA"
|
||||
storeSlug: string; // e.g., "deeply-rooted"
|
||||
brandSlug: string; // e.g., "high-west-farms"
|
||||
productId: string; // External product ID
|
||||
dispensaryId?: number; // For backwards compat
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -58,6 +73,17 @@ async function ensureDir(dirPath: string): Promise<void> {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize a string for use in file paths
|
||||
*/
|
||||
function slugify(str: string): string {
|
||||
return str
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '-')
|
||||
.replace(/^-+|-+$/g, '')
|
||||
.substring(0, 50) || 'unknown';
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a short hash from a URL for deduplication
|
||||
*/
|
||||
@@ -81,53 +107,30 @@ async function downloadImage(imageUrl: string): Promise<Buffer> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Process and save image in multiple sizes
|
||||
* Returns the file paths relative to IMAGES_BASE_PATH
|
||||
* Process and save original image (convert to webp for consistency)
|
||||
*
|
||||
* We store only the original - resizing will be done on-demand via
|
||||
* an image proxy service (imgproxy, thumbor, or similar) in the future.
|
||||
*/
|
||||
async function processAndSaveImage(
|
||||
buffer: Buffer,
|
||||
outputDir: string,
|
||||
baseFilename: string
|
||||
): Promise<{ full: string; medium: string; thumb: string; totalBytes: number }> {
|
||||
): Promise<{ original: string; totalBytes: number }> {
|
||||
await ensureDir(outputDir);
|
||||
|
||||
const fullPath = path.join(outputDir, `${baseFilename}.webp`);
|
||||
const mediumPath = path.join(outputDir, `${baseFilename}-medium.webp`);
|
||||
const thumbPath = path.join(outputDir, `${baseFilename}-thumb.webp`);
|
||||
const originalPath = path.join(outputDir, `${baseFilename}.webp`);
|
||||
|
||||
// Process images in parallel
|
||||
const [fullBuffer, mediumBuffer, thumbBuffer] = await Promise.all([
|
||||
// Full: max 1200x1200, high quality
|
||||
sharp(buffer)
|
||||
.resize(1200, 1200, { fit: 'inside', withoutEnlargement: true })
|
||||
.webp({ quality: 85 })
|
||||
.toBuffer(),
|
||||
// Medium: 600x600
|
||||
sharp(buffer)
|
||||
.resize(600, 600, { fit: 'inside', withoutEnlargement: true })
|
||||
.webp({ quality: 80 })
|
||||
.toBuffer(),
|
||||
// Thumb: 200x200
|
||||
sharp(buffer)
|
||||
.resize(200, 200, { fit: 'inside', withoutEnlargement: true })
|
||||
.webp({ quality: 75 })
|
||||
.toBuffer(),
|
||||
]);
|
||||
// Convert to webp, preserve original dimensions, high quality
|
||||
const originalBuffer = await sharp(buffer)
|
||||
.webp({ quality: 90 })
|
||||
.toBuffer();
|
||||
|
||||
// Save all sizes
|
||||
await Promise.all([
|
||||
fs.writeFile(fullPath, fullBuffer),
|
||||
fs.writeFile(mediumPath, mediumBuffer),
|
||||
fs.writeFile(thumbPath, thumbBuffer),
|
||||
]);
|
||||
|
||||
const totalBytes = fullBuffer.length + mediumBuffer.length + thumbBuffer.length;
|
||||
await fs.writeFile(originalPath, originalBuffer);
|
||||
|
||||
return {
|
||||
full: fullPath,
|
||||
medium: mediumPath,
|
||||
thumb: thumbPath,
|
||||
totalBytes,
|
||||
original: originalPath,
|
||||
totalBytes: originalBuffer.length,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -135,47 +138,107 @@ async function processAndSaveImage(
|
||||
* Convert a file path to a public URL
|
||||
*/
|
||||
function pathToUrl(filePath: string): string {
|
||||
// Find /products/ or /brands/ in the path and extract from there
|
||||
const productsMatch = filePath.match(/(\/products\/.*)/);
|
||||
const brandsMatch = filePath.match(/(\/brands\/.*)/);
|
||||
|
||||
if (productsMatch) {
|
||||
return `${IMAGES_PUBLIC_URL}${productsMatch[1]}`;
|
||||
}
|
||||
if (brandsMatch) {
|
||||
return `${IMAGES_PUBLIC_URL}${brandsMatch[1]}`;
|
||||
}
|
||||
|
||||
// Fallback: try to replace base path (works if paths match exactly)
|
||||
const relativePath = filePath.replace(IMAGES_BASE_PATH, '');
|
||||
return `${IMAGES_PUBLIC_URL}${relativePath}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Download and store a product image locally
|
||||
* Build the directory path for a product image
|
||||
* Structure: /images/products/<state>/<store>/<brand>/<product>/
|
||||
*/
|
||||
function buildProductImagePath(ctx: ProductImageContext): string {
|
||||
const state = slugify(ctx.stateCode || 'unknown');
|
||||
const store = slugify(ctx.storeSlug || 'unknown');
|
||||
const brand = slugify(ctx.brandSlug || 'unknown');
|
||||
const product = slugify(ctx.productId || 'unknown');
|
||||
|
||||
return path.join(IMAGES_BASE_PATH, 'products', state, store, brand, product);
|
||||
}
|
||||
|
||||
/**
|
||||
* Download and store a product image with proper hierarchy
|
||||
*
|
||||
* @param imageUrl - The third-party image URL to download
|
||||
* @param dispensaryId - The dispensary ID (for directory organization)
|
||||
* @param productId - The product ID or external ID (for filename)
|
||||
* @param ctx - Product context (state, store, brand, product)
|
||||
* @param options - Download options
|
||||
* @returns Download result with local URLs
|
||||
*/
|
||||
export async function downloadProductImage(
|
||||
imageUrl: string,
|
||||
dispensaryId: number,
|
||||
productId: string | number
|
||||
ctx: ProductImageContext,
|
||||
options: { skipIfExists?: boolean } = {}
|
||||
): Promise<DownloadResult> {
|
||||
const { skipIfExists = true } = options;
|
||||
|
||||
try {
|
||||
if (!imageUrl) {
|
||||
return { success: false, error: 'No image URL provided' };
|
||||
}
|
||||
|
||||
const outputDir = buildProductImagePath(ctx);
|
||||
const urlHash = hashUrl(imageUrl);
|
||||
const baseFilename = `image-${urlHash}`;
|
||||
|
||||
// Check if image already exists
|
||||
if (skipIfExists) {
|
||||
const existingPath = path.join(outputDir, `${baseFilename}.webp`);
|
||||
try {
|
||||
await fs.access(existingPath);
|
||||
// Image exists, return existing URL
|
||||
const url = pathToUrl(existingPath);
|
||||
return {
|
||||
success: true,
|
||||
skipped: true,
|
||||
urls: {
|
||||
original: url,
|
||||
full: url,
|
||||
medium: url,
|
||||
thumb: url,
|
||||
},
|
||||
localPaths: {
|
||||
original: existingPath,
|
||||
full: existingPath,
|
||||
medium: existingPath,
|
||||
thumb: existingPath,
|
||||
},
|
||||
};
|
||||
} catch {
|
||||
// Image doesn't exist, continue to download
|
||||
}
|
||||
}
|
||||
|
||||
// Download the image
|
||||
const buffer = await downloadImage(imageUrl);
|
||||
|
||||
// Organize by dispensary ID
|
||||
const outputDir = path.join(IMAGES_BASE_PATH, 'products', String(dispensaryId));
|
||||
|
||||
// Use product ID + URL hash for uniqueness
|
||||
const urlHash = hashUrl(imageUrl);
|
||||
const baseFilename = `${productId}-${urlHash}`;
|
||||
|
||||
// Process and save
|
||||
// Process and save (original only)
|
||||
const result = await processAndSaveImage(buffer, outputDir, baseFilename);
|
||||
const url = pathToUrl(result.original);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
urls: {
|
||||
full: pathToUrl(result.full),
|
||||
medium: pathToUrl(result.medium),
|
||||
thumb: pathToUrl(result.thumb),
|
||||
original: url,
|
||||
full: url,
|
||||
medium: url,
|
||||
thumb: url,
|
||||
},
|
||||
localPaths: {
|
||||
original: result.original,
|
||||
full: result.original,
|
||||
medium: result.original,
|
||||
thumb: result.original,
|
||||
},
|
||||
bytesDownloaded: result.totalBytes,
|
||||
};
|
||||
@@ -188,33 +251,70 @@ export async function downloadProductImage(
|
||||
}
|
||||
|
||||
/**
|
||||
* Download and store a brand logo locally
|
||||
* Legacy function - backwards compatible with old signature
|
||||
* Maps to new hierarchy using dispensary_id as store identifier
|
||||
*/
|
||||
export async function downloadProductImageLegacy(
|
||||
imageUrl: string,
|
||||
dispensaryId: number,
|
||||
productId: string | number
|
||||
): Promise<DownloadResult> {
|
||||
return downloadProductImage(imageUrl, {
|
||||
stateCode: 'unknown',
|
||||
storeSlug: `store-${dispensaryId}`,
|
||||
brandSlug: 'unknown',
|
||||
productId: String(productId),
|
||||
dispensaryId,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Download and store a brand logo
|
||||
*
|
||||
* @param logoUrl - The brand logo URL
|
||||
* @param brandId - The brand ID or slug
|
||||
* @param brandSlug - The brand slug/ID
|
||||
* @returns Download result with local URL
|
||||
*/
|
||||
export async function downloadBrandLogo(
|
||||
logoUrl: string,
|
||||
brandId: string
|
||||
brandSlug: string,
|
||||
options: { skipIfExists?: boolean } = {}
|
||||
): Promise<DownloadResult> {
|
||||
const { skipIfExists = true } = options;
|
||||
|
||||
try {
|
||||
if (!logoUrl) {
|
||||
return { success: false, error: 'No logo URL provided' };
|
||||
}
|
||||
|
||||
const safeBrandSlug = slugify(brandSlug);
|
||||
const outputDir = path.join(IMAGES_BASE_PATH, 'brands', safeBrandSlug);
|
||||
const urlHash = hashUrl(logoUrl);
|
||||
const baseFilename = `logo-${urlHash}`;
|
||||
|
||||
// Check if logo already exists
|
||||
if (skipIfExists) {
|
||||
const existingPath = path.join(outputDir, `${baseFilename}.webp`);
|
||||
try {
|
||||
await fs.access(existingPath);
|
||||
return {
|
||||
success: true,
|
||||
skipped: true,
|
||||
urls: {
|
||||
full: pathToUrl(existingPath),
|
||||
medium: pathToUrl(existingPath),
|
||||
thumb: pathToUrl(existingPath),
|
||||
},
|
||||
};
|
||||
} catch {
|
||||
// Logo doesn't exist, continue
|
||||
}
|
||||
}
|
||||
|
||||
// Download the image
|
||||
const buffer = await downloadImage(logoUrl);
|
||||
|
||||
// Brand logos go in /images/brands/
|
||||
const outputDir = path.join(IMAGES_BASE_PATH, 'brands');
|
||||
|
||||
// Sanitize brand ID for filename
|
||||
const safeBrandId = brandId.replace(/[^a-zA-Z0-9-_]/g, '_');
|
||||
const urlHash = hashUrl(logoUrl);
|
||||
const baseFilename = `${safeBrandId}-${urlHash}`;
|
||||
|
||||
// Process and save (single size for logos)
|
||||
// Brand logos in their own directory
|
||||
await ensureDir(outputDir);
|
||||
const logoPath = path.join(outputDir, `${baseFilename}.webp`);
|
||||
|
||||
@@ -243,20 +343,16 @@ export async function downloadBrandLogo(
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a local image already exists
|
||||
* Check if a product image already exists
|
||||
*/
|
||||
export async function imageExists(
|
||||
dispensaryId: number,
|
||||
productId: string | number,
|
||||
export async function productImageExists(
|
||||
ctx: ProductImageContext,
|
||||
imageUrl: string
|
||||
): Promise<boolean> {
|
||||
const outputDir = buildProductImagePath(ctx);
|
||||
const urlHash = hashUrl(imageUrl);
|
||||
const imagePath = path.join(
|
||||
IMAGES_BASE_PATH,
|
||||
'products',
|
||||
String(dispensaryId),
|
||||
`${productId}-${urlHash}.webp`
|
||||
);
|
||||
const imagePath = path.join(outputDir, `image-${urlHash}.webp`);
|
||||
|
||||
try {
|
||||
await fs.access(imagePath);
|
||||
return true;
|
||||
@@ -266,24 +362,27 @@ export async function imageExists(
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a product's local images
|
||||
* Get the local image URL for a product (if exists)
|
||||
*/
|
||||
export async function deleteProductImages(
|
||||
dispensaryId: number,
|
||||
productId: string | number,
|
||||
imageUrl?: string
|
||||
): Promise<void> {
|
||||
const productDir = path.join(IMAGES_BASE_PATH, 'products', String(dispensaryId));
|
||||
const prefix = imageUrl
|
||||
? `${productId}-${hashUrl(imageUrl)}`
|
||||
: String(productId);
|
||||
export async function getProductImageUrl(
|
||||
ctx: ProductImageContext,
|
||||
imageUrl: string
|
||||
): Promise<LocalImageSizes | null> {
|
||||
const outputDir = buildProductImagePath(ctx);
|
||||
const urlHash = hashUrl(imageUrl);
|
||||
const imagePath = path.join(outputDir, `image-${urlHash}.webp`);
|
||||
|
||||
try {
|
||||
const files = await fs.readdir(productDir);
|
||||
const toDelete = files.filter(f => f.startsWith(prefix));
|
||||
await Promise.all(toDelete.map(f => fs.unlink(path.join(productDir, f))));
|
||||
await fs.access(imagePath);
|
||||
const url = pathToUrl(imagePath);
|
||||
return {
|
||||
original: url,
|
||||
full: url,
|
||||
medium: url,
|
||||
thumb: url,
|
||||
};
|
||||
} catch {
|
||||
// Directory might not exist, that's fine
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -296,19 +395,17 @@ export function isImageStorageReady(): boolean {
|
||||
|
||||
/**
|
||||
* Initialize the image storage directories
|
||||
* Does NOT throw on failure - logs warning and continues
|
||||
*/
|
||||
export async function initializeImageStorage(): Promise<void> {
|
||||
try {
|
||||
await ensureDir(path.join(IMAGES_BASE_PATH, 'products'));
|
||||
await ensureDir(path.join(IMAGES_BASE_PATH, 'brands'));
|
||||
console.log(`✅ Image storage initialized at ${IMAGES_BASE_PATH}`);
|
||||
console.log(`[ImageStorage] Initialized at ${IMAGES_BASE_PATH}`);
|
||||
imageStorageReady = true;
|
||||
} catch (error: any) {
|
||||
console.warn(`⚠️ WARNING: Could not initialize image storage at ${IMAGES_BASE_PATH}: ${error.message}`);
|
||||
console.warn(' Image upload/processing is disabled. Server will continue without image features.');
|
||||
console.warn(`[ImageStorage] WARNING: Could not initialize at ${IMAGES_BASE_PATH}: ${error.message}`);
|
||||
console.warn(' Image features disabled. Server will continue without image downloads.');
|
||||
imageStorageReady = false;
|
||||
// Do NOT throw - server should still start
|
||||
}
|
||||
}
|
||||
|
||||
@@ -316,34 +413,43 @@ export async function initializeImageStorage(): Promise<void> {
|
||||
* Get storage stats
|
||||
*/
|
||||
export async function getStorageStats(): Promise<{
|
||||
productsDir: string;
|
||||
brandsDir: string;
|
||||
basePath: string;
|
||||
productCount: number;
|
||||
brandCount: number;
|
||||
totalSizeBytes: number;
|
||||
}> {
|
||||
const productsDir = path.join(IMAGES_BASE_PATH, 'products');
|
||||
const brandsDir = path.join(IMAGES_BASE_PATH, 'brands');
|
||||
|
||||
let productCount = 0;
|
||||
let brandCount = 0;
|
||||
let totalSizeBytes = 0;
|
||||
|
||||
try {
|
||||
const productDirs = await fs.readdir(productsDir);
|
||||
for (const dir of productDirs) {
|
||||
const files = await fs.readdir(path.join(productsDir, dir));
|
||||
productCount += files.filter(f => f.endsWith('.webp') && !f.includes('-')).length;
|
||||
}
|
||||
} catch { /* ignore */ }
|
||||
async function countDir(dirPath: string): Promise<{ count: number; size: number }> {
|
||||
let count = 0;
|
||||
let size = 0;
|
||||
try {
|
||||
const entries = await fs.readdir(dirPath, { withFileTypes: true });
|
||||
for (const entry of entries) {
|
||||
const fullPath = path.join(dirPath, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
const sub = await countDir(fullPath);
|
||||
count += sub.count;
|
||||
size += sub.size;
|
||||
} else if (entry.name.endsWith('.webp') && !entry.name.includes('-')) {
|
||||
count++;
|
||||
const stat = await fs.stat(fullPath);
|
||||
size += stat.size;
|
||||
}
|
||||
}
|
||||
} catch { /* ignore */ }
|
||||
return { count, size };
|
||||
}
|
||||
|
||||
try {
|
||||
const brandFiles = await fs.readdir(brandsDir);
|
||||
brandCount = brandFiles.filter(f => f.endsWith('.webp')).length;
|
||||
} catch { /* ignore */ }
|
||||
const products = await countDir(path.join(IMAGES_BASE_PATH, 'products'));
|
||||
const brands = await countDir(path.join(IMAGES_BASE_PATH, 'brands'));
|
||||
|
||||
return {
|
||||
productsDir,
|
||||
brandsDir,
|
||||
productCount,
|
||||
brandCount,
|
||||
basePath: IMAGES_BASE_PATH,
|
||||
productCount: products.count,
|
||||
brandCount: brands.count,
|
||||
totalSizeBytes: products.size + brands.size,
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user