feat(images): Add local image storage with on-demand resizing

- Store product images locally with hierarchy: /images/products/<state>/<store>/<brand>/<product>/
- Add /img/* proxy endpoint for on-demand resizing via Sharp
- Implement per-product image checking to skip existing downloads
- Fix pathToUrl() to correctly generate /images/... URLs
- Add frontend getImageUrl() helper with preset sizes (thumb, medium, large)
- Update all product pages to use optimized image URLs
- Add stealth session support for Dutchie GraphQL crawls
- Include test scripts for crawl and image verification

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-09 11:04:42 -07:00
parent aa776226b0
commit 91efd1d03d
28 changed files with 2027 additions and 205 deletions

View File

@@ -16,6 +16,12 @@ import {
NormalizedBrand,
NormalizationResult,
} from './types';
import {
downloadProductImage,
ProductImageContext,
isImageStorageReady,
LocalImageSizes,
} from '../utils/image-storage';
const BATCH_SIZE = 100;
@@ -23,10 +29,21 @@ const BATCH_SIZE = 100;
// PRODUCT UPSERTS
// ============================================================
export interface NewProductInfo {
id: number; // store_products.id
externalProductId: string; // provider_product_id
name: string;
brandName: string | null;
primaryImageUrl: string | null;
hasLocalImage?: boolean; // True if local_image_path is already set
}
export interface UpsertProductsResult {
upserted: number;
new: number;
updated: number;
newProducts: NewProductInfo[]; // Details of newly created products
productsNeedingImages: NewProductInfo[]; // Products (new or updated) that need image downloads
}
/**
@@ -41,12 +58,14 @@ export async function upsertStoreProducts(
options: { dryRun?: boolean } = {}
): Promise<UpsertProductsResult> {
if (products.length === 0) {
return { upserted: 0, new: 0, updated: 0 };
return { upserted: 0, new: 0, updated: 0, newProducts: [], productsNeedingImages: [] };
}
const { dryRun = false } = options;
let newCount = 0;
let updatedCount = 0;
const newProducts: NewProductInfo[] = [];
const productsNeedingImages: NewProductInfo[] = [];
// Process in batches
for (let i = 0; i < products.length; i += BATCH_SIZE) {
@@ -104,7 +123,7 @@ export async function upsertStoreProducts(
image_url = EXCLUDED.image_url,
last_seen_at = NOW(),
updated_at = NOW()
RETURNING (xmax = 0) as is_new`,
RETURNING id, (xmax = 0) as is_new, (local_image_path IS NOT NULL) as has_local_image`,
[
product.dispensaryId,
product.platform,
@@ -129,10 +148,30 @@ export async function upsertStoreProducts(
]
);
if (result.rows[0]?.is_new) {
const row = result.rows[0];
const productInfo: NewProductInfo = {
id: row.id,
externalProductId: product.externalProductId,
name: product.name,
brandName: product.brandName,
primaryImageUrl: product.primaryImageUrl,
hasLocalImage: row.has_local_image,
};
if (row.is_new) {
newCount++;
// Track new products
newProducts.push(productInfo);
// New products always need images (if they have a source URL)
if (product.primaryImageUrl && !row.has_local_image) {
productsNeedingImages.push(productInfo);
}
} else {
updatedCount++;
// Updated products need images only if they don't have a local image yet
if (product.primaryImageUrl && !row.has_local_image) {
productsNeedingImages.push(productInfo);
}
}
}
@@ -149,6 +188,8 @@ export async function upsertStoreProducts(
upserted: newCount + updatedCount,
new: newCount,
updated: updatedCount,
newProducts,
productsNeedingImages,
};
}
@@ -564,6 +605,19 @@ export async function upsertBrands(
// FULL HYDRATION
// ============================================================
export interface ImageDownloadResult {
downloaded: number;
skipped: number;
failed: number;
bytesTotal: number;
}
export interface DispensaryContext {
stateCode: string;
storeSlug: string;
hasExistingProducts?: boolean; // True if store already has products with local images
}
export interface HydratePayloadResult {
productsUpserted: number;
productsNew: number;
@@ -574,6 +628,154 @@ export interface HydratePayloadResult {
variantsUpserted: number;
variantsNew: number;
variantSnapshotsCreated: number;
imagesDownloaded: number;
imagesSkipped: number;
imagesFailed: number;
imagesBytesTotal: number;
}
/**
* Helper to create slug from string
*/
function slugify(str: string): string {
return str
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '')
.substring(0, 50) || 'unknown';
}
/**
* Download images for new products and update their local paths
*/
export async function downloadProductImages(
pool: Pool,
newProducts: NewProductInfo[],
dispensaryContext: DispensaryContext,
options: { dryRun?: boolean; concurrency?: number } = {}
): Promise<ImageDownloadResult> {
const { dryRun = false, concurrency = 5 } = options;
// Filter products that have images to download
const productsWithImages = newProducts.filter(p => p.primaryImageUrl);
if (productsWithImages.length === 0) {
return { downloaded: 0, skipped: 0, failed: 0, bytesTotal: 0 };
}
// Check if image storage is ready
if (!isImageStorageReady()) {
console.warn('[ImageDownload] Image storage not initialized, skipping downloads');
return { downloaded: 0, skipped: productsWithImages.length, failed: 0, bytesTotal: 0 };
}
if (dryRun) {
console.log(`[DryRun] Would download ${productsWithImages.length} images`);
return { downloaded: 0, skipped: productsWithImages.length, failed: 0, bytesTotal: 0 };
}
let downloaded = 0;
let skipped = 0;
let failed = 0;
let bytesTotal = 0;
// Process in batches with concurrency limit
for (let i = 0; i < productsWithImages.length; i += concurrency) {
const batch = productsWithImages.slice(i, i + concurrency);
const results = await Promise.allSettled(
batch.map(async (product) => {
const ctx: ProductImageContext = {
stateCode: dispensaryContext.stateCode,
storeSlug: dispensaryContext.storeSlug,
brandSlug: slugify(product.brandName || 'unknown'),
productId: product.externalProductId,
};
const result = await downloadProductImage(product.primaryImageUrl!, ctx, { skipIfExists: true });
if (result.success) {
// Update the database with local image path
const imagesJson = JSON.stringify({
full: result.urls!.full,
medium: result.urls!.medium,
thumb: result.urls!.thumb,
});
await pool.query(
`UPDATE store_products
SET local_image_path = $1, images = $2
WHERE id = $3`,
[result.urls!.full, imagesJson, product.id]
);
}
return result;
})
);
for (const result of results) {
if (result.status === 'fulfilled') {
const downloadResult = result.value;
if (downloadResult.success) {
if (downloadResult.skipped) {
skipped++;
} else {
downloaded++;
bytesTotal += downloadResult.bytesDownloaded || 0;
}
} else {
failed++;
console.warn(`[ImageDownload] Failed: ${downloadResult.error}`);
}
} else {
failed++;
console.error(`[ImageDownload] Error:`, result.reason);
}
}
}
console.log(`[ImageDownload] Downloaded: ${downloaded}, Skipped: ${skipped}, Failed: ${failed}, Bytes: ${bytesTotal}`);
return { downloaded, skipped, failed, bytesTotal };
}
/**
* Get dispensary context for image paths
* Also checks if this dispensary already has products with local images
* to skip unnecessary filesystem checks for existing stores
*/
async function getDispensaryContext(pool: Pool, dispensaryId: number): Promise<DispensaryContext | null> {
try {
const result = await pool.query(
`SELECT
d.state,
d.slug,
d.name,
EXISTS(
SELECT 1 FROM store_products sp
WHERE sp.dispensary_id = d.id
AND sp.local_image_path IS NOT NULL
LIMIT 1
) as has_local_images
FROM dispensaries d
WHERE d.id = $1`,
[dispensaryId]
);
if (result.rows.length === 0) {
return null;
}
const row = result.rows[0];
return {
stateCode: row.state || 'unknown',
storeSlug: row.slug || slugify(row.name || `store-${dispensaryId}`),
hasExistingProducts: row.has_local_images,
};
} catch (error) {
console.error('[getDispensaryContext] Error:', error);
return null;
}
}
/**
@@ -584,9 +786,9 @@ export async function hydrateToCanonical(
dispensaryId: number,
normResult: NormalizationResult,
crawlRunId: number | null,
options: { dryRun?: boolean } = {}
options: { dryRun?: boolean; downloadImages?: boolean } = {}
): Promise<HydratePayloadResult> {
const { dryRun = false } = options;
const { dryRun = false, downloadImages: shouldDownloadImages = true } = options;
// 1. Upsert brands
const brandResult = await upsertBrands(pool, normResult.brands, { dryRun });
@@ -634,6 +836,36 @@ export async function hydrateToCanonical(
{ dryRun }
);
// 6. Download images for products that need them
// This includes:
// - New products (always need images)
// - Updated products that don't have local images yet (backfill)
// This avoids:
// - Filesystem checks for products that already have local images
// - Unnecessary HTTP requests for products with existing images
let imageResult: ImageDownloadResult = { downloaded: 0, skipped: 0, failed: 0, bytesTotal: 0 };
if (shouldDownloadImages && productResult.productsNeedingImages.length > 0) {
const dispensaryContext = await getDispensaryContext(pool, dispensaryId);
if (dispensaryContext) {
const newCount = productResult.productsNeedingImages.filter(p => !p.hasLocalImage).length;
const backfillCount = productResult.productsNeedingImages.length - newCount;
console.log(`[Hydration] Downloading images for ${productResult.productsNeedingImages.length} products (${productResult.new} new, ${backfillCount} backfill)...`);
imageResult = await downloadProductImages(
pool,
productResult.productsNeedingImages,
dispensaryContext,
{ dryRun }
);
} else {
console.warn(`[Hydration] Could not get dispensary context for ID ${dispensaryId}, skipping image downloads`);
}
} else if (productResult.productsNeedingImages.length === 0 && productResult.upserted > 0) {
// All products already have local images
console.log(`[Hydration] All ${productResult.upserted} products already have local images, skipping downloads`);
}
return {
productsUpserted: productResult.upserted,
productsNew: productResult.new,
@@ -644,5 +876,9 @@ export async function hydrateToCanonical(
variantsUpserted: variantResult.upserted,
variantsNew: variantResult.new,
variantSnapshotsCreated: variantResult.snapshotsCreated,
imagesDownloaded: imageResult.downloaded,
imagesSkipped: imageResult.skipped,
imagesFailed: imageResult.failed,
imagesBytesTotal: imageResult.bytesTotal,
};
}