feat(images): Add local image storage with on-demand resizing

- Store product images locally with hierarchy: /images/products/<state>/<store>/<brand>/<product>/
- Add /img/* proxy endpoint for on-demand resizing via Sharp
- Implement per-product image checking to skip existing downloads
- Fix pathToUrl() to correctly generate /images/... URLs
- Add frontend getImageUrl() helper with preset sizes (thumb, medium, large)
- Update all product pages to use optimized image URLs
- Add stealth session support for Dutchie GraphQL crawls
- Include test scripts for crawl and image verification

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-09 11:04:42 -07:00
parent aa776226b0
commit 91efd1d03d
28 changed files with 2027 additions and 205 deletions

View File

@@ -65,10 +65,10 @@ steps:
platforms: linux/amd64
provenance: false
build_args:
- APP_BUILD_VERSION=${CI_COMMIT_SHA:0:8}
- APP_BUILD_VERSION=${CI_COMMIT_SHA}
- APP_GIT_SHA=${CI_COMMIT_SHA}
- APP_BUILD_TIME=${CI_PIPELINE_CREATED}
- CONTAINER_IMAGE_TAG=${CI_COMMIT_SHA:0:8}
- CONTAINER_IMAGE_TAG=${CI_COMMIT_SHA}
depends_on: []
when:
branch: master

View File

@@ -459,15 +459,66 @@ const result = await pool.query(`
### Local Storage Structure
```
/storage/products/{brand}/{state}/{product_id}/
/storage/images/products/{state}/{store}/{brand}/{product}/
image-{hash}.webp
image-{hash}-medium.webp
image-{hash}-thumb.webp
/storage/brands/{brand}/
/storage/images/brands/{brand}/
logo-{hash}.webp
```
### Image Proxy API (On-Demand Resizing)
Images are stored at full resolution and resized on-demand via the `/img` endpoint.
**Endpoint:** `GET /img/<path>?<params>`
**Parameters:**
| Param | Description | Example |
|-------|-------------|---------|
| `w` | Width in pixels (max 4000) | `?w=200` |
| `h` | Height in pixels (max 4000) | `?h=200` |
| `q` | Quality 1-100 (default 80) | `?q=70` |
| `fit` | Resize mode: cover, contain, fill, inside, outside | `?fit=cover` |
| `blur` | Blur sigma 0.3-1000 | `?blur=5` |
| `gray` | Grayscale (1 = enabled) | `?gray=1` |
| `format` | Output: webp, jpeg, png, avif (default webp) | `?format=jpeg` |
**Examples:**
```bash
# Thumbnail (50px)
GET /img/products/az/store/brand/product/image-abc123.webp?w=50
# Card image (200px, cover fit)
GET /img/products/az/store/brand/product/image-abc123.webp?w=200&h=200&fit=cover
# JPEG at 70% quality
GET /img/products/az/store/brand/product/image-abc123.webp?w=400&format=jpeg&q=70
# Grayscale blur
GET /img/products/az/store/brand/product/image-abc123.webp?w=200&gray=1&blur=3
```
**Frontend Usage:**
```typescript
import { getImageUrl, ImageSizes } from '../lib/images';
// Returns /img/products/.../image.webp?w=50 for local images
// Returns original URL for remote images (CDN, etc.)
const thumbUrl = getImageUrl(product.image_url, ImageSizes.thumb);
const cardUrl = getImageUrl(product.image_url, ImageSizes.medium);
const detailUrl = getImageUrl(product.image_url, ImageSizes.detail);
```
**Size Presets:**
| Preset | Width | Use Case |
|--------|-------|----------|
| `thumb` | 50px | Table thumbnails |
| `small` | 100px | Small cards |
| `medium` | 200px | Grid cards |
| `large` | 400px | Large cards |
| `detail` | 600px | Product detail |
| `full` | - | No resize |
### Storage Adapter
```typescript
@@ -480,8 +531,9 @@ import { saveImage, getImageUrl } from '../utils/storage-adapter';
| File | Purpose |
|------|---------|
| `backend/src/utils/local-storage.ts` | Local filesystem adapter |
| `backend/src/utils/storage-adapter.ts` | Unified storage abstraction |
| `backend/src/utils/image-storage.ts` | Image download and storage |
| `backend/src/routes/image-proxy.ts` | On-demand image resizing endpoint |
| `cannaiq/src/lib/images.ts` | Frontend image URL helper |
| `docker-compose.local.yml` | Local stack without MinIO |
| `start-local.sh` | Convenience startup script |

View File

@@ -0,0 +1,12 @@
-- Add timezone column to proxies table for geo-consistent fingerprinting
-- This allows matching Accept-Language and other headers to proxy location
ALTER TABLE proxies
ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
-- Add timezone to failed_proxies as well
ALTER TABLE failed_proxies
ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
-- Comment explaining usage
COMMENT ON COLUMN proxies.timezone IS 'IANA timezone (e.g., America/Phoenix) for geo-consistent fingerprinting';

View File

@@ -16,6 +16,12 @@ import {
NormalizedBrand,
NormalizationResult,
} from './types';
import {
downloadProductImage,
ProductImageContext,
isImageStorageReady,
LocalImageSizes,
} from '../utils/image-storage';
const BATCH_SIZE = 100;
@@ -23,10 +29,21 @@ const BATCH_SIZE = 100;
// PRODUCT UPSERTS
// ============================================================
export interface NewProductInfo {
id: number; // store_products.id
externalProductId: string; // provider_product_id
name: string;
brandName: string | null;
primaryImageUrl: string | null;
hasLocalImage?: boolean; // True if local_image_path is already set
}
export interface UpsertProductsResult {
upserted: number;
new: number;
updated: number;
newProducts: NewProductInfo[]; // Details of newly created products
productsNeedingImages: NewProductInfo[]; // Products (new or updated) that need image downloads
}
/**
@@ -41,12 +58,14 @@ export async function upsertStoreProducts(
options: { dryRun?: boolean } = {}
): Promise<UpsertProductsResult> {
if (products.length === 0) {
return { upserted: 0, new: 0, updated: 0 };
return { upserted: 0, new: 0, updated: 0, newProducts: [], productsNeedingImages: [] };
}
const { dryRun = false } = options;
let newCount = 0;
let updatedCount = 0;
const newProducts: NewProductInfo[] = [];
const productsNeedingImages: NewProductInfo[] = [];
// Process in batches
for (let i = 0; i < products.length; i += BATCH_SIZE) {
@@ -104,7 +123,7 @@ export async function upsertStoreProducts(
image_url = EXCLUDED.image_url,
last_seen_at = NOW(),
updated_at = NOW()
RETURNING (xmax = 0) as is_new`,
RETURNING id, (xmax = 0) as is_new, (local_image_path IS NOT NULL) as has_local_image`,
[
product.dispensaryId,
product.platform,
@@ -129,10 +148,30 @@ export async function upsertStoreProducts(
]
);
if (result.rows[0]?.is_new) {
const row = result.rows[0];
const productInfo: NewProductInfo = {
id: row.id,
externalProductId: product.externalProductId,
name: product.name,
brandName: product.brandName,
primaryImageUrl: product.primaryImageUrl,
hasLocalImage: row.has_local_image,
};
if (row.is_new) {
newCount++;
// Track new products
newProducts.push(productInfo);
// New products always need images (if they have a source URL)
if (product.primaryImageUrl && !row.has_local_image) {
productsNeedingImages.push(productInfo);
}
} else {
updatedCount++;
// Updated products need images only if they don't have a local image yet
if (product.primaryImageUrl && !row.has_local_image) {
productsNeedingImages.push(productInfo);
}
}
}
@@ -149,6 +188,8 @@ export async function upsertStoreProducts(
upserted: newCount + updatedCount,
new: newCount,
updated: updatedCount,
newProducts,
productsNeedingImages,
};
}
@@ -564,6 +605,19 @@ export async function upsertBrands(
// FULL HYDRATION
// ============================================================
export interface ImageDownloadResult {
downloaded: number;
skipped: number;
failed: number;
bytesTotal: number;
}
export interface DispensaryContext {
stateCode: string;
storeSlug: string;
hasExistingProducts?: boolean; // True if store already has products with local images
}
export interface HydratePayloadResult {
productsUpserted: number;
productsNew: number;
@@ -574,6 +628,154 @@ export interface HydratePayloadResult {
variantsUpserted: number;
variantsNew: number;
variantSnapshotsCreated: number;
imagesDownloaded: number;
imagesSkipped: number;
imagesFailed: number;
imagesBytesTotal: number;
}
/**
* Helper to create slug from string
*/
function slugify(str: string): string {
return str
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '')
.substring(0, 50) || 'unknown';
}
/**
* Download images for new products and update their local paths
*/
export async function downloadProductImages(
pool: Pool,
newProducts: NewProductInfo[],
dispensaryContext: DispensaryContext,
options: { dryRun?: boolean; concurrency?: number } = {}
): Promise<ImageDownloadResult> {
const { dryRun = false, concurrency = 5 } = options;
// Filter products that have images to download
const productsWithImages = newProducts.filter(p => p.primaryImageUrl);
if (productsWithImages.length === 0) {
return { downloaded: 0, skipped: 0, failed: 0, bytesTotal: 0 };
}
// Check if image storage is ready
if (!isImageStorageReady()) {
console.warn('[ImageDownload] Image storage not initialized, skipping downloads');
return { downloaded: 0, skipped: productsWithImages.length, failed: 0, bytesTotal: 0 };
}
if (dryRun) {
console.log(`[DryRun] Would download ${productsWithImages.length} images`);
return { downloaded: 0, skipped: productsWithImages.length, failed: 0, bytesTotal: 0 };
}
let downloaded = 0;
let skipped = 0;
let failed = 0;
let bytesTotal = 0;
// Process in batches with concurrency limit
for (let i = 0; i < productsWithImages.length; i += concurrency) {
const batch = productsWithImages.slice(i, i + concurrency);
const results = await Promise.allSettled(
batch.map(async (product) => {
const ctx: ProductImageContext = {
stateCode: dispensaryContext.stateCode,
storeSlug: dispensaryContext.storeSlug,
brandSlug: slugify(product.brandName || 'unknown'),
productId: product.externalProductId,
};
const result = await downloadProductImage(product.primaryImageUrl!, ctx, { skipIfExists: true });
if (result.success) {
// Update the database with local image path
const imagesJson = JSON.stringify({
full: result.urls!.full,
medium: result.urls!.medium,
thumb: result.urls!.thumb,
});
await pool.query(
`UPDATE store_products
SET local_image_path = $1, images = $2
WHERE id = $3`,
[result.urls!.full, imagesJson, product.id]
);
}
return result;
})
);
for (const result of results) {
if (result.status === 'fulfilled') {
const downloadResult = result.value;
if (downloadResult.success) {
if (downloadResult.skipped) {
skipped++;
} else {
downloaded++;
bytesTotal += downloadResult.bytesDownloaded || 0;
}
} else {
failed++;
console.warn(`[ImageDownload] Failed: ${downloadResult.error}`);
}
} else {
failed++;
console.error(`[ImageDownload] Error:`, result.reason);
}
}
}
console.log(`[ImageDownload] Downloaded: ${downloaded}, Skipped: ${skipped}, Failed: ${failed}, Bytes: ${bytesTotal}`);
return { downloaded, skipped, failed, bytesTotal };
}
/**
* Get dispensary context for image paths
* Also checks if this dispensary already has products with local images
* to skip unnecessary filesystem checks for existing stores
*/
async function getDispensaryContext(pool: Pool, dispensaryId: number): Promise<DispensaryContext | null> {
try {
const result = await pool.query(
`SELECT
d.state,
d.slug,
d.name,
EXISTS(
SELECT 1 FROM store_products sp
WHERE sp.dispensary_id = d.id
AND sp.local_image_path IS NOT NULL
LIMIT 1
) as has_local_images
FROM dispensaries d
WHERE d.id = $1`,
[dispensaryId]
);
if (result.rows.length === 0) {
return null;
}
const row = result.rows[0];
return {
stateCode: row.state || 'unknown',
storeSlug: row.slug || slugify(row.name || `store-${dispensaryId}`),
hasExistingProducts: row.has_local_images,
};
} catch (error) {
console.error('[getDispensaryContext] Error:', error);
return null;
}
}
/**
@@ -584,9 +786,9 @@ export async function hydrateToCanonical(
dispensaryId: number,
normResult: NormalizationResult,
crawlRunId: number | null,
options: { dryRun?: boolean } = {}
options: { dryRun?: boolean; downloadImages?: boolean } = {}
): Promise<HydratePayloadResult> {
const { dryRun = false } = options;
const { dryRun = false, downloadImages: shouldDownloadImages = true } = options;
// 1. Upsert brands
const brandResult = await upsertBrands(pool, normResult.brands, { dryRun });
@@ -634,6 +836,36 @@ export async function hydrateToCanonical(
{ dryRun }
);
// 6. Download images for products that need them
// This includes:
// - New products (always need images)
// - Updated products that don't have local images yet (backfill)
// This avoids:
// - Filesystem checks for products that already have local images
// - Unnecessary HTTP requests for products with existing images
let imageResult: ImageDownloadResult = { downloaded: 0, skipped: 0, failed: 0, bytesTotal: 0 };
if (shouldDownloadImages && productResult.productsNeedingImages.length > 0) {
const dispensaryContext = await getDispensaryContext(pool, dispensaryId);
if (dispensaryContext) {
const newCount = productResult.productsNeedingImages.filter(p => !p.hasLocalImage).length;
const backfillCount = productResult.productsNeedingImages.length - newCount;
console.log(`[Hydration] Downloading images for ${productResult.productsNeedingImages.length} products (${productResult.new} new, ${backfillCount} backfill)...`);
imageResult = await downloadProductImages(
pool,
productResult.productsNeedingImages,
dispensaryContext,
{ dryRun }
);
} else {
console.warn(`[Hydration] Could not get dispensary context for ID ${dispensaryId}, skipping image downloads`);
}
} else if (productResult.productsNeedingImages.length === 0 && productResult.upserted > 0) {
// All products already have local images
console.log(`[Hydration] All ${productResult.upserted} products already have local images, skipping downloads`);
}
return {
productsUpserted: productResult.upserted,
productsNew: productResult.new,
@@ -644,5 +876,9 @@ export async function hydrateToCanonical(
variantsUpserted: variantResult.upserted,
variantsNew: variantResult.new,
variantSnapshotsCreated: variantResult.snapshotsCreated,
imagesDownloaded: imageResult.downloaded,
imagesSkipped: imageResult.skipped,
imagesFailed: imageResult.failed,
imagesBytesTotal: imageResult.bytesTotal,
};
}

View File

@@ -7,6 +7,7 @@ import { initializeImageStorage } from './utils/image-storage';
import { logger } from './services/logger';
import { cleanupOrphanedJobs } from './services/proxyTestQueue';
import healthRoutes from './routes/health';
import imageProxyRoutes from './routes/image-proxy';
dotenv.config();
@@ -29,6 +30,10 @@ app.use(express.json());
const LOCAL_IMAGES_PATH = process.env.LOCAL_IMAGES_PATH || './public/images';
app.use('/images', express.static(LOCAL_IMAGES_PATH));
// Image proxy with on-demand resizing
// Usage: /img/products/az/store/brand/product/image.webp?w=200&h=200
app.use('/img', imageProxyRoutes);
// Serve static downloads (plugin files, etc.)
// Uses ./public/downloads relative to working directory (works for both Docker and local dev)
const LOCAL_DOWNLOADS_PATH = process.env.LOCAL_DOWNLOADS_PATH || './public/downloads';

View File

@@ -213,7 +213,24 @@ const FINGERPRINTS: Fingerprint[] = [
let currentFingerprintIndex = 0;
// Forward declaration for session (actual CrawlSession interface defined later)
let currentSession: {
sessionId: string;
fingerprint: Fingerprint;
proxyUrl: string | null;
stateCode?: string;
timezone?: string;
startedAt: Date;
} | null = null;
/**
* Get current fingerprint - returns session fingerprint if active, otherwise default
*/
export function getFingerprint(): Fingerprint {
// Use session fingerprint if a session is active
if (currentSession) {
return currentSession.fingerprint;
}
return FINGERPRINTS[currentFingerprintIndex];
}
@@ -228,6 +245,103 @@ export function resetFingerprint(): void {
currentFingerprintIndex = 0;
}
/**
* Get a random fingerprint from the pool
*/
export function getRandomFingerprint(): Fingerprint {
const index = Math.floor(Math.random() * FINGERPRINTS.length);
return FINGERPRINTS[index];
}
// ============================================================
// SESSION MANAGEMENT
// Per-session fingerprint rotation for stealth
// ============================================================
export interface CrawlSession {
sessionId: string;
fingerprint: Fingerprint;
proxyUrl: string | null;
stateCode?: string;
timezone?: string;
startedAt: Date;
}
// Note: currentSession variable declared earlier in file for proper scoping
/**
* Timezone to Accept-Language mapping
* US timezones all use en-US but this can be extended for international
*/
const TIMEZONE_TO_LOCALE: Record<string, string> = {
'America/Phoenix': 'en-US,en;q=0.9',
'America/Los_Angeles': 'en-US,en;q=0.9',
'America/Denver': 'en-US,en;q=0.9',
'America/Chicago': 'en-US,en;q=0.9',
'America/New_York': 'en-US,en;q=0.9',
'America/Detroit': 'en-US,en;q=0.9',
'America/Anchorage': 'en-US,en;q=0.9',
'Pacific/Honolulu': 'en-US,en;q=0.9',
};
/**
* Get Accept-Language header for a given timezone
*/
export function getLocaleForTimezone(timezone?: string): string {
if (!timezone) return 'en-US,en;q=0.9';
return TIMEZONE_TO_LOCALE[timezone] || 'en-US,en;q=0.9';
}
/**
* Start a new crawl session with a random fingerprint
* Call this before crawling a store to get a fresh identity
*/
export function startSession(stateCode?: string, timezone?: string): CrawlSession {
const baseFp = getRandomFingerprint();
// Override Accept-Language based on timezone for geographic consistency
const fingerprint: Fingerprint = {
...baseFp,
acceptLanguage: getLocaleForTimezone(timezone),
};
currentSession = {
sessionId: `session_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
fingerprint,
proxyUrl: currentProxy,
stateCode,
timezone,
startedAt: new Date(),
};
console.log(`[Dutchie Client] Started session ${currentSession.sessionId}`);
console.log(`[Dutchie Client] Fingerprint: ${fingerprint.userAgent.slice(0, 50)}...`);
console.log(`[Dutchie Client] Accept-Language: ${fingerprint.acceptLanguage}`);
if (timezone) {
console.log(`[Dutchie Client] Timezone: ${timezone}`);
}
return currentSession;
}
/**
* End the current crawl session
*/
export function endSession(): void {
if (currentSession) {
const duration = Math.round((Date.now() - currentSession.startedAt.getTime()) / 1000);
console.log(`[Dutchie Client] Ended session ${currentSession.sessionId} (${duration}s)`);
currentSession = null;
}
}
/**
* Get current active session
*/
export function getCurrentSession(): CrawlSession | null {
return currentSession;
}
// ============================================================
// CURL HTTP CLIENT
// ============================================================

View File

@@ -18,6 +18,13 @@ export {
getFingerprint,
rotateFingerprint,
resetFingerprint,
getRandomFingerprint,
getLocaleForTimezone,
// Session Management (per-store fingerprint rotation)
startSession,
endSession,
getCurrentSession,
// Proxy
setProxy,
@@ -32,6 +39,7 @@ export {
// Types
type CurlResponse,
type Fingerprint,
type CrawlSession,
type ExecuteGraphQLOptions,
type FetchPageOptions,
} from './client';

View File

@@ -8,10 +8,12 @@ router.use(authMiddleware);
// Valid menu_type values
const VALID_MENU_TYPES = ['dutchie', 'treez', 'jane', 'weedmaps', 'leafly', 'meadow', 'blaze', 'flowhub', 'dispense', 'cova', 'other', 'unknown'];
// Get all dispensaries
// Get all dispensaries (with pagination)
router.get('/', async (req, res) => {
try {
const { menu_type, city, state, crawl_enabled, dutchie_verified } = req.query;
const { menu_type, city, state, crawl_enabled, dutchie_verified, limit, offset, search } = req.query;
const pageLimit = Math.min(parseInt(limit as string) || 50, 500);
const pageOffset = parseInt(offset as string) || 0;
let query = `
SELECT
@@ -98,15 +100,34 @@ router.get('/', async (req, res) => {
}
}
if (conditions.length > 0) {
query += ` WHERE ${conditions.join(' AND ')}`;
// Search filter (name, dba_name, city, company_name)
if (search) {
conditions.push(`(name ILIKE $${params.length + 1} OR dba_name ILIKE $${params.length + 1} OR city ILIKE $${params.length + 1})`);
params.push(`%${search}%`);
}
// Build WHERE clause
const whereClause = conditions.length > 0 ? ` WHERE ${conditions.join(' AND ')}` : '';
// Get total count first
const countResult = await pool.query(`SELECT COUNT(*) FROM dispensaries${whereClause}`, params);
const total = parseInt(countResult.rows[0].count);
// Add pagination
query += whereClause;
query += ` ORDER BY name`;
query += ` LIMIT $${params.length + 1} OFFSET $${params.length + 2}`;
params.push(pageLimit, pageOffset);
const result = await pool.query(query, params);
res.json({ dispensaries: result.rows, total: result.rowCount });
res.json({
dispensaries: result.rows,
total,
limit: pageLimit,
offset: pageOffset,
hasMore: pageOffset + result.rows.length < total
});
} catch (error) {
console.error('Error fetching dispensaries:', error);
res.status(500).json({ error: 'Failed to fetch dispensaries' });

View File

@@ -0,0 +1,214 @@
/**
* Image Proxy Route
*
* On-demand image resizing service. Serves images with URL-based transforms.
*
* Usage:
* /img/<path>?w=200&h=200&q=80&fit=cover
*
* Parameters:
* w - width (pixels)
* h - height (pixels)
* q - quality (1-100, default 80)
* fit - resize fit: cover, contain, fill, inside, outside (default: inside)
* blur - blur sigma (0.3-1000)
* gray - grayscale (1 = enabled)
* format - output format: webp, jpeg, png, avif (default: webp)
*
* Examples:
* /img/products/az/store/brand/product/image.webp?w=200
* /img/products/az/store/brand/product/image.webp?w=600&h=400&fit=cover
* /img/products/az/store/brand/product/image.webp?w=100&blur=5&gray=1
*/
import { Router, Request, Response } from 'express';
import * as fs from 'fs/promises';
import * as path from 'path';
// @ts-ignore
const sharp = require('sharp');
const router = Router();
// Base path for images
function getImagesBasePath(): string {
if (process.env.IMAGES_PATH) {
return process.env.IMAGES_PATH;
}
if (process.env.STORAGE_BASE_PATH) {
return path.join(process.env.STORAGE_BASE_PATH, 'images');
}
return './storage/images';
}
const IMAGES_BASE_PATH = getImagesBasePath();
// Allowed fit modes
const ALLOWED_FITS = ['cover', 'contain', 'fill', 'inside', 'outside'] as const;
type FitMode = typeof ALLOWED_FITS[number];
// Allowed formats
const ALLOWED_FORMATS = ['webp', 'jpeg', 'jpg', 'png', 'avif'] as const;
type OutputFormat = typeof ALLOWED_FORMATS[number];
// Cache headers (1 year for immutable content-addressed images)
const CACHE_MAX_AGE = 31536000; // 1 year in seconds
interface TransformParams {
width?: number;
height?: number;
quality: number;
fit: FitMode;
blur?: number;
grayscale: boolean;
format: OutputFormat;
}
function parseTransformParams(query: any): TransformParams {
return {
width: query.w ? Math.min(Math.max(parseInt(query.w, 10), 1), 4000) : undefined,
height: query.h ? Math.min(Math.max(parseInt(query.h, 10), 1), 4000) : undefined,
quality: query.q ? Math.min(Math.max(parseInt(query.q, 10), 1), 100) : 80,
fit: ALLOWED_FITS.includes(query.fit) ? query.fit : 'inside',
blur: query.blur ? Math.min(Math.max(parseFloat(query.blur), 0.3), 1000) : undefined,
grayscale: query.gray === '1' || query.grayscale === '1',
format: ALLOWED_FORMATS.includes(query.format) ? query.format : 'webp',
};
}
function getContentType(format: OutputFormat): string {
switch (format) {
case 'jpeg':
case 'jpg':
return 'image/jpeg';
case 'png':
return 'image/png';
case 'avif':
return 'image/avif';
case 'webp':
default:
return 'image/webp';
}
}
/**
* Image proxy endpoint
* GET /img/*
*/
router.get('/*', async (req: Request, res: Response) => {
try {
// Get the image path from URL (everything after /img/)
const imagePath = req.params[0];
if (!imagePath) {
return res.status(400).json({ error: 'Image path required' });
}
// Security: prevent directory traversal
const normalizedPath = path.normalize(imagePath).replace(/^(\.\.(\/|\\|$))+/, '');
const basePath = path.resolve(IMAGES_BASE_PATH);
const fullPath = path.resolve(path.join(IMAGES_BASE_PATH, normalizedPath));
// Ensure path is within base directory
if (!fullPath.startsWith(basePath)) {
console.error(`[ImageProxy] Path traversal attempt: ${fullPath} not in ${basePath}`);
return res.status(403).json({ error: 'Access denied' });
}
// Check if file exists
try {
await fs.access(fullPath);
} catch {
return res.status(404).json({ error: 'Image not found' });
}
// Parse transform parameters
const params = parseTransformParams(req.query);
// Check if any transforms are requested
const hasTransforms = params.width || params.height || params.blur || params.grayscale;
// Read the original image
const imageBuffer = await fs.readFile(fullPath);
let outputBuffer: Buffer;
if (hasTransforms) {
// Apply transforms
let pipeline = sharp(imageBuffer);
// Resize
if (params.width || params.height) {
pipeline = pipeline.resize(params.width, params.height, {
fit: params.fit,
withoutEnlargement: true,
});
}
// Blur
if (params.blur) {
pipeline = pipeline.blur(params.blur);
}
// Grayscale
if (params.grayscale) {
pipeline = pipeline.grayscale();
}
// Output format
switch (params.format) {
case 'jpeg':
case 'jpg':
pipeline = pipeline.jpeg({ quality: params.quality });
break;
case 'png':
pipeline = pipeline.png({ quality: params.quality });
break;
case 'avif':
pipeline = pipeline.avif({ quality: params.quality });
break;
case 'webp':
default:
pipeline = pipeline.webp({ quality: params.quality });
}
outputBuffer = await pipeline.toBuffer();
} else {
// No transforms - serve original (but maybe convert format)
if (params.format !== 'webp' || params.quality !== 80) {
let pipeline = sharp(imageBuffer);
switch (params.format) {
case 'jpeg':
case 'jpg':
pipeline = pipeline.jpeg({ quality: params.quality });
break;
case 'png':
pipeline = pipeline.png({ quality: params.quality });
break;
case 'avif':
pipeline = pipeline.avif({ quality: params.quality });
break;
case 'webp':
default:
pipeline = pipeline.webp({ quality: params.quality });
}
outputBuffer = await pipeline.toBuffer();
} else {
outputBuffer = imageBuffer;
}
}
// Set headers
res.setHeader('Content-Type', getContentType(params.format));
res.setHeader('Cache-Control', `public, max-age=${CACHE_MAX_AGE}, immutable`);
res.setHeader('X-Image-Size', outputBuffer.length);
// Send image
res.send(outputBuffer);
} catch (error: any) {
console.error('[ImageProxy] Error:', error.message);
res.status(500).json({ error: 'Failed to process image' });
}
});
export default router;

View File

@@ -8,11 +8,13 @@ const router = Router();
*/
router.get('/', async (req: Request, res: Response) => {
try {
const gitSha = process.env.APP_GIT_SHA || 'unknown';
const versionInfo = {
build_version: process.env.APP_BUILD_VERSION || 'dev',
git_sha: process.env.APP_GIT_SHA || 'local',
build_version: process.env.APP_BUILD_VERSION?.slice(0, 8) || 'dev',
git_sha: gitSha.slice(0, 8) || 'unknown',
git_sha_full: gitSha,
build_time: process.env.APP_BUILD_TIME || new Date().toISOString(),
image_tag: process.env.CONTAINER_IMAGE_TAG || 'local',
image_tag: process.env.CONTAINER_IMAGE_TAG?.slice(0, 8) || 'local',
};
res.json(versionInfo);

View File

@@ -0,0 +1,250 @@
#!/usr/bin/env npx tsx
/**
* Crawl Single Store - Verbose test showing each step
*
* Usage:
* DATABASE_URL="postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \
* npx tsx src/scripts/crawl-single-store.ts <dispensaryId>
*
* Example:
* DATABASE_URL="..." npx tsx src/scripts/crawl-single-store.ts 112
*/
import { Pool } from 'pg';
import dotenv from 'dotenv';
import {
executeGraphQL,
startSession,
endSession,
getFingerprint,
GRAPHQL_HASHES,
DUTCHIE_CONFIG,
} from '../platforms/dutchie';
dotenv.config();
// ============================================================
// DATABASE CONNECTION
// ============================================================
function getConnectionString(): string {
if (process.env.DATABASE_URL) {
return process.env.DATABASE_URL;
}
if (process.env.CANNAIQ_DB_URL) {
return process.env.CANNAIQ_DB_URL;
}
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
const port = process.env.CANNAIQ_DB_PORT || '54320';
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
}
const pool = new Pool({ connectionString: getConnectionString() });
// ============================================================
// MAIN
// ============================================================
async function main() {
const dispensaryId = parseInt(process.argv[2], 10);
if (!dispensaryId) {
console.error('Usage: npx tsx src/scripts/crawl-single-store.ts <dispensaryId>');
console.error('Example: npx tsx src/scripts/crawl-single-store.ts 112');
process.exit(1);
}
console.log('');
console.log('╔════════════════════════════════════════════════════════════╗');
console.log('║ SINGLE STORE CRAWL - VERBOSE OUTPUT ║');
console.log('╚════════════════════════════════════════════════════════════╝');
console.log('');
try {
// ============================================================
// STEP 1: Get dispensary info from database
// ============================================================
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 1: Load Dispensary Info from Database │');
console.log('└─────────────────────────────────────────────────────────────┘');
const dispResult = await pool.query(`
SELECT
id,
name,
platform_dispensary_id,
menu_url,
menu_type,
city,
state
FROM dispensaries
WHERE id = $1
`, [dispensaryId]);
if (dispResult.rows.length === 0) {
throw new Error(`Dispensary ${dispensaryId} not found`);
}
const disp = dispResult.rows[0];
console.log(` Dispensary ID: ${disp.id}`);
console.log(` Name: ${disp.name}`);
console.log(` City, State: ${disp.city}, ${disp.state}`);
console.log(` Menu Type: ${disp.menu_type}`);
console.log(` Platform ID: ${disp.platform_dispensary_id}`);
console.log(` Menu URL: ${disp.menu_url}`);
if (!disp.platform_dispensary_id) {
throw new Error('Dispensary does not have a platform_dispensary_id - cannot crawl');
}
// Extract cName from menu_url
const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
console.log(` cName (derived): ${cName}`);
console.log('');
// ============================================================
// STEP 2: Start stealth session
// ============================================================
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 2: Start Stealth Session │');
console.log('└─────────────────────────────────────────────────────────────┘');
// Use Arizona timezone for this store
const session = startSession(disp.state || 'AZ', 'America/Phoenix');
const fp = getFingerprint();
console.log(` Session ID: ${session.sessionId}`);
console.log(` User-Agent: ${fp.userAgent.slice(0, 60)}...`);
console.log(` Accept-Language: ${fp.acceptLanguage}`);
console.log(` Sec-CH-UA: ${fp.secChUa || '(not set)'}`);
console.log('');
// ============================================================
// STEP 3: Execute GraphQL query
// ============================================================
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 3: Execute GraphQL Query (FilteredProducts) │');
console.log('└─────────────────────────────────────────────────────────────┘');
const variables = {
includeEnterpriseSpecials: false,
productsFilter: {
dispensaryId: disp.platform_dispensary_id,
pricingType: 'rec',
Status: 'Active',
types: [],
useCache: true,
isDefaultSort: true,
sortBy: 'popularSortIdx',
sortDirection: 1,
bypassOnlineThresholds: true,
isKioskMenu: false,
removeProductsBelowOptionThresholds: false,
},
page: 0,
perPage: 100,
};
console.log(` Endpoint: ${DUTCHIE_CONFIG.graphqlEndpoint}`);
console.log(` Operation: FilteredProducts`);
console.log(` Hash: ${GRAPHQL_HASHES.FilteredProducts.slice(0, 20)}...`);
console.log(` dispensaryId: ${variables.productsFilter.dispensaryId}`);
console.log(` pricingType: ${variables.productsFilter.pricingType}`);
console.log(` Status: ${variables.productsFilter.Status}`);
console.log(` perPage: ${variables.perPage}`);
console.log('');
console.log(' Sending request...');
const startTime = Date.now();
const result = await executeGraphQL(
'FilteredProducts',
variables,
GRAPHQL_HASHES.FilteredProducts,
{ cName, maxRetries: 3 }
);
const elapsed = Date.now() - startTime;
console.log(` Response time: ${elapsed}ms`);
console.log('');
// ============================================================
// STEP 4: Process response
// ============================================================
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 4: Process Response │');
console.log('└─────────────────────────────────────────────────────────────┘');
const data = result?.data?.filteredProducts;
if (!data) {
console.log(' ERROR: No data returned from GraphQL');
console.log(' Raw result:', JSON.stringify(result, null, 2).slice(0, 500));
endSession();
return;
}
const products = data.products || [];
const totalCount = data.queryInfo?.totalCount || 0;
const totalPages = Math.ceil(totalCount / 100);
console.log(` Total products: ${totalCount}`);
console.log(` Products in page: ${products.length}`);
console.log(` Total pages: ${totalPages}`);
console.log('');
// Show first few products
console.log(' First 5 products:');
console.log(' ─────────────────────────────────────────────────────────');
for (let i = 0; i < Math.min(5, products.length); i++) {
const p = products[i];
const name = (p.name || 'Unknown').slice(0, 40);
const brand = (p.brand?.name || 'Unknown').slice(0, 15);
const price = p.Prices?.[0]?.price || p.medPrice || p.recPrice || 'N/A';
const category = p.type || p.category || 'N/A';
console.log(` ${i + 1}. ${name.padEnd(42)} | ${brand.padEnd(17)} | $${price}`);
}
console.log('');
// ============================================================
// STEP 5: End session
// ============================================================
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 5: End Session │');
console.log('└─────────────────────────────────────────────────────────────┘');
endSession();
console.log('');
// ============================================================
// SUMMARY
// ============================================================
console.log('╔════════════════════════════════════════════════════════════╗');
console.log('║ SUMMARY ║');
console.log('╠════════════════════════════════════════════════════════════╣');
console.log(`║ Store: ${disp.name.slice(0, 38).padEnd(38)}`);
console.log(`║ Products Found: ${String(totalCount).padEnd(38)}`);
console.log(`║ Response Time: ${(elapsed + 'ms').padEnd(38)}`);
console.log(`║ Status: ${'SUCCESS'.padEnd(38)}`);
console.log('╚════════════════════════════════════════════════════════════╝');
} catch (error: any) {
console.error('');
console.error('╔════════════════════════════════════════════════════════════╗');
console.error('║ ERROR ║');
console.error('╚════════════════════════════════════════════════════════════╝');
console.error(` ${error.message}`);
if (error.stack) {
console.error('');
console.error('Stack trace:');
console.error(error.stack.split('\n').slice(0, 5).join('\n'));
}
process.exit(1);
} finally {
await pool.end();
}
}
main();

View File

@@ -23,6 +23,7 @@ import {
DutchieNormalizer,
hydrateToCanonical,
} from '../hydration';
import { initializeImageStorage } from '../utils/image-storage';
dotenv.config();
@@ -137,6 +138,11 @@ async function main() {
console.log(`Test Crawl to Canonical - Dispensary ${dispensaryId}`);
console.log('============================================================\n');
// Initialize image storage
console.log('[Init] Initializing image storage...');
await initializeImageStorage();
console.log(' Image storage ready\n');
try {
// Step 1: Get dispensary info
console.log('[Step 1] Getting dispensary info...');

View File

@@ -0,0 +1,268 @@
#!/usr/bin/env npx tsx
/**
* Test Image Download - Tests image downloading with a small batch of products
*
* Usage:
* DATABASE_URL="postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \
* STORAGE_DRIVER=local STORAGE_BASE_PATH=./storage \
* npx tsx src/scripts/test-image-download.ts <dispensaryId> [limit]
*
* Example:
* DATABASE_URL="..." npx tsx src/scripts/test-image-download.ts 112 5
*/
import { Pool } from 'pg';
import dotenv from 'dotenv';
import {
executeGraphQL,
startSession,
endSession,
GRAPHQL_HASHES,
} from '../platforms/dutchie';
import { DutchieNormalizer } from '../hydration/normalizers/dutchie';
import { hydrateToCanonical } from '../hydration/canonical-upsert';
import { initializeImageStorage, getStorageStats } from '../utils/image-storage';
dotenv.config();
// ============================================================
// DATABASE CONNECTION
// ============================================================
function getConnectionString(): string {
if (process.env.DATABASE_URL) {
return process.env.DATABASE_URL;
}
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
const port = process.env.CANNAIQ_DB_PORT || '54320';
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
}
const pool = new Pool({ connectionString: getConnectionString() });
// ============================================================
// MAIN
// ============================================================
async function main() {
const dispensaryId = parseInt(process.argv[2], 10);
const limit = parseInt(process.argv[3], 10) || 5;
if (!dispensaryId) {
console.error('Usage: npx tsx src/scripts/test-image-download.ts <dispensaryId> [limit]');
console.error('Example: npx tsx src/scripts/test-image-download.ts 112 5');
process.exit(1);
}
console.log('');
console.log('╔════════════════════════════════════════════════════════════╗');
console.log('║ IMAGE DOWNLOAD TEST ║');
console.log('╚════════════════════════════════════════════════════════════╝');
console.log('');
try {
// Initialize image storage
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 1: Initialize Image Storage │');
console.log('└─────────────────────────────────────────────────────────────┘');
await initializeImageStorage();
const statsBefore = await getStorageStats();
console.log(` Base path: ${statsBefore.basePath}`);
console.log(` Products before: ${statsBefore.productCount}`);
console.log(` Brands before: ${statsBefore.brandCount}`);
console.log('');
// Get dispensary info
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 2: Load Dispensary Info │');
console.log('└─────────────────────────────────────────────────────────────┘');
const dispResult = await pool.query(`
SELECT
id, name, platform_dispensary_id, menu_url, state, slug
FROM dispensaries
WHERE id = $1
`, [dispensaryId]);
if (dispResult.rows.length === 0) {
throw new Error(`Dispensary ${dispensaryId} not found`);
}
const disp = dispResult.rows[0];
console.log(` Dispensary: ${disp.name}`);
console.log(` State: ${disp.state}`);
console.log(` Slug: ${disp.slug}`);
console.log(` Platform ID: ${disp.platform_dispensary_id}`);
console.log('');
// Delete some existing store_products to force "new" products
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 3: Clear Store Products (to test new product flow) │');
console.log('└─────────────────────────────────────────────────────────────┘');
const deleteResult = await pool.query(`
DELETE FROM store_products
WHERE dispensary_id = $1
RETURNING id
`, [dispensaryId]);
console.log(` Deleted ${deleteResult.rowCount} existing store_products`);
console.log('');
// Fetch products from Dutchie
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 4: Fetch Products from Dutchie (limited) │');
console.log('└─────────────────────────────────────────────────────────────┘');
const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
const session = startSession(disp.state || 'AZ', 'America/Phoenix');
console.log(` Session ID: ${session.sessionId}`);
console.log(` cName: ${cName}`);
console.log(` Limit: ${limit} products`);
const variables = {
includeEnterpriseSpecials: false,
productsFilter: {
dispensaryId: disp.platform_dispensary_id,
pricingType: 'rec',
Status: 'Active',
types: [],
useCache: true,
isDefaultSort: true,
sortBy: 'popularSortIdx',
sortDirection: 1,
bypassOnlineThresholds: true,
isKioskMenu: false,
removeProductsBelowOptionThresholds: false,
},
page: 0,
perPage: limit, // Only fetch limited products
};
const startTime = Date.now();
const result = await executeGraphQL(
'FilteredProducts',
variables,
GRAPHQL_HASHES.FilteredProducts,
{ cName, maxRetries: 3 }
);
const elapsed = Date.now() - startTime;
endSession();
const products = result?.data?.filteredProducts?.products || [];
console.log(` Fetched: ${products.length} products in ${elapsed}ms`);
// Show products with images
console.log('');
console.log(' Products with images:');
for (let i = 0; i < products.length; i++) {
const p = products[i];
const hasImage = !!p.Image;
const brandName = p.brand?.name || 'Unknown';
console.log(` ${i + 1}. ${p.name?.slice(0, 40).padEnd(42)} | ${brandName.slice(0, 15).padEnd(17)} | ${hasImage ? '✓ has image' : '✗ no image'}`);
}
console.log('');
// Normalize and hydrate
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 5: Normalize and Hydrate (with image download) │');
console.log('└─────────────────────────────────────────────────────────────┘');
const normalizer = new DutchieNormalizer();
// Wrap products in expected payload format
const payload = {
raw_json: products, // DutchieNormalizer.extractProducts handles arrays
dispensary_id: dispensaryId,
};
const normResult = normalizer.normalize(payload);
console.log(` Normalized products: ${normResult.products.length}`);
console.log(` Brands found: ${normResult.brands.length}`);
const hydrateStart = Date.now();
const hydrateResult = await hydrateToCanonical(
pool,
dispensaryId,
normResult,
null, // no crawl run ID for test
{ dryRun: false, downloadImages: true }
);
const hydrateElapsed = Date.now() - hydrateStart;
console.log('');
console.log(` Hydration time: ${hydrateElapsed}ms`);
console.log(` Products new: ${hydrateResult.productsNew}`);
console.log(` Products updated: ${hydrateResult.productsUpdated}`);
console.log(` Images downloaded: ${hydrateResult.imagesDownloaded}`);
console.log(` Images skipped: ${hydrateResult.imagesSkipped}`);
console.log(` Images failed: ${hydrateResult.imagesFailed}`);
console.log(` Image bytes: ${(hydrateResult.imagesBytesTotal / 1024).toFixed(1)} KB`);
console.log('');
// Check storage stats
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 6: Verify Storage │');
console.log('└─────────────────────────────────────────────────────────────┘');
const statsAfter = await getStorageStats();
console.log(` Products after: ${statsAfter.productCount}`);
console.log(` Brands after: ${statsAfter.brandCount}`);
console.log(` Total size: ${(statsAfter.totalSizeBytes / 1024).toFixed(1)} KB`);
console.log('');
// Check database for local_image_path
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 7: Check Database for Local Image Paths │');
console.log('└─────────────────────────────────────────────────────────────┘');
const dbCheck = await pool.query(`
SELECT
id, name_raw, local_image_path, images
FROM store_products
WHERE dispensary_id = $1
LIMIT 10
`, [dispensaryId]);
for (const row of dbCheck.rows) {
const hasLocal = !!row.local_image_path;
const hasImages = !!row.images;
console.log(` ${row.id}: ${row.name_raw?.slice(0, 40).padEnd(42)} | local: ${hasLocal ? '✓' : '✗'} | images: ${hasImages ? '✓' : '✗'}`);
if (row.local_image_path) {
console.log(`${row.local_image_path}`);
}
}
console.log('');
// Summary
console.log('╔════════════════════════════════════════════════════════════╗');
console.log('║ SUMMARY ║');
console.log('╠════════════════════════════════════════════════════════════╣');
console.log(`║ Dispensary: ${disp.name.slice(0, 37).padEnd(37)}`);
console.log(`║ Products crawled: ${String(products.length).padEnd(37)}`);
console.log(`║ Images downloaded: ${String(hydrateResult.imagesDownloaded).padEnd(37)}`);
console.log(`║ Total image bytes: ${((hydrateResult.imagesBytesTotal / 1024).toFixed(1) + ' KB').padEnd(37)}`);
console.log(`║ Status: ${'SUCCESS'.padEnd(37)}`);
console.log('╚════════════════════════════════════════════════════════════╝');
} catch (error: any) {
console.error('');
console.error('╔════════════════════════════════════════════════════════════╗');
console.error('║ ERROR ║');
console.error('╚════════════════════════════════════════════════════════════╝');
console.error(` ${error.message}`);
if (error.stack) {
console.error('');
console.error('Stack trace:');
console.error(error.stack.split('\n').slice(0, 5).join('\n'));
}
process.exit(1);
} finally {
await pool.end();
}
}
main();

View File

@@ -0,0 +1,80 @@
#!/usr/bin/env npx tsx
/**
* Test Image Proxy - Standalone test without backend
*
* Usage:
* npx tsx src/scripts/test-image-proxy.ts
*/
import express from 'express';
import imageProxyRoutes from '../routes/image-proxy';
const app = express();
const PORT = 3099;
// Mount the image proxy
app.use('/img', imageProxyRoutes);
// Start server
app.listen(PORT, async () => {
console.log(`Test image proxy running on http://localhost:${PORT}`);
console.log('');
console.log('Testing image proxy...');
console.log('');
const axios = require('axios');
// Test cases
const tests = [
{
name: 'Original image',
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp',
},
{
name: 'Resize to 200px width',
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200',
},
{
name: 'Resize to 100x100 cover',
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=100&h=100&fit=cover',
},
{
name: 'Grayscale + blur',
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200&gray=1&blur=2',
},
{
name: 'Convert to JPEG',
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200&format=jpeg&q=70',
},
{
name: 'Non-existent image',
url: '/img/products/az/nonexistent/image.webp',
},
];
for (const test of tests) {
try {
const response = await axios.get(`http://localhost:${PORT}${test.url}`, {
responseType: 'arraybuffer',
validateStatus: () => true,
});
const contentType = response.headers['content-type'];
const size = response.data.length;
const status = response.status;
console.log(`${test.name}:`);
console.log(` URL: ${test.url.slice(0, 80)}${test.url.length > 80 ? '...' : ''}`);
console.log(` Status: ${status}`);
console.log(` Content-Type: ${contentType}`);
console.log(` Size: ${(size / 1024).toFixed(1)} KB`);
console.log('');
} catch (error: any) {
console.log(`${test.name}: ERROR - ${error.message}`);
console.log('');
}
}
console.log('Tests complete!');
process.exit(0);
});

View File

@@ -0,0 +1,117 @@
/**
* Test script for stealth session management
*
* Tests:
* 1. Per-session fingerprint rotation
* 2. Geographic consistency (timezone → Accept-Language)
* 3. Proxy location loading from database
*
* Usage:
* npx tsx src/scripts/test-stealth-session.ts
*/
import {
startSession,
endSession,
getCurrentSession,
getFingerprint,
getRandomFingerprint,
getLocaleForTimezone,
buildHeaders,
} from '../platforms/dutchie';
console.log('='.repeat(60));
console.log('STEALTH SESSION TEST');
console.log('='.repeat(60));
// Test 1: Timezone to Locale mapping
console.log('\n[Test 1] Timezone to Locale Mapping:');
const testTimezones = [
'America/Phoenix',
'America/Los_Angeles',
'America/New_York',
'America/Chicago',
undefined,
'Invalid/Timezone',
];
for (const tz of testTimezones) {
const locale = getLocaleForTimezone(tz);
console.log(` ${tz || '(undefined)'}${locale}`);
}
// Test 2: Random fingerprint selection
console.log('\n[Test 2] Random Fingerprint Selection (5 samples):');
for (let i = 0; i < 5; i++) {
const fp = getRandomFingerprint();
console.log(` ${i + 1}. ${fp.userAgent.slice(0, 60)}...`);
}
// Test 3: Session Management
console.log('\n[Test 3] Session Management:');
// Before session - should use default fingerprint
console.log(' Before session:');
const beforeFp = getFingerprint();
console.log(` getFingerprint(): ${beforeFp.userAgent.slice(0, 50)}...`);
console.log(` getCurrentSession(): ${getCurrentSession()}`);
// Start session with Arizona timezone
console.log('\n Starting session (AZ, America/Phoenix):');
const session1 = startSession('AZ', 'America/Phoenix');
console.log(` Session ID: ${session1.sessionId}`);
console.log(` Fingerprint UA: ${session1.fingerprint.userAgent.slice(0, 50)}...`);
console.log(` Accept-Language: ${session1.fingerprint.acceptLanguage}`);
console.log(` Timezone: ${session1.timezone}`);
// During session - should use session fingerprint
console.log('\n During session:');
const duringFp = getFingerprint();
console.log(` getFingerprint(): ${duringFp.userAgent.slice(0, 50)}...`);
console.log(` Same as session? ${duringFp.userAgent === session1.fingerprint.userAgent}`);
// Test buildHeaders with session
console.log('\n buildHeaders() during session:');
const headers = buildHeaders('/embedded-menu/test-store');
console.log(` User-Agent: ${headers['user-agent'].slice(0, 50)}...`);
console.log(` Accept-Language: ${headers['accept-language']}`);
console.log(` Origin: ${headers['origin']}`);
console.log(` Referer: ${headers['referer']}`);
// End session
console.log('\n Ending session:');
endSession();
console.log(` getCurrentSession(): ${getCurrentSession()}`);
// Test 4: Multiple sessions should have different fingerprints
console.log('\n[Test 4] Multiple Sessions (fingerprint variety):');
const fingerprints: string[] = [];
for (let i = 0; i < 10; i++) {
const session = startSession('CA', 'America/Los_Angeles');
fingerprints.push(session.fingerprint.userAgent);
endSession();
}
const uniqueCount = new Set(fingerprints).size;
console.log(` 10 sessions created, ${uniqueCount} unique fingerprints`);
console.log(` Variety: ${uniqueCount >= 3 ? '✅ Good' : '⚠️ Low - may need more fingerprint options'}`);
// Test 5: Geographic consistency check
console.log('\n[Test 5] Geographic Consistency:');
const geoTests = [
{ state: 'AZ', tz: 'America/Phoenix' },
{ state: 'CA', tz: 'America/Los_Angeles' },
{ state: 'NY', tz: 'America/New_York' },
{ state: 'IL', tz: 'America/Chicago' },
];
for (const { state, tz } of geoTests) {
const session = startSession(state, tz);
const consistent = session.fingerprint.acceptLanguage.includes('en-US');
console.log(` ${state} (${tz}): Accept-Language=${session.fingerprint.acceptLanguage} ${consistent ? '✅' : '❌'}`);
endSession();
}
console.log('\n' + '='.repeat(60));
console.log('TEST COMPLETE');
console.log('='.repeat(60));

View File

@@ -0,0 +1,144 @@
/**
* Test script for stealth session with REAL proxy data from database
*
* Tests:
* 1. Load proxies from database (with location data)
* 2. Verify location fields (city, state, timezone) are loaded
* 3. Start session with proxy's timezone
* 4. Verify Accept-Language matches timezone
*
* Usage:
* DATABASE_URL="postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" npx tsx src/scripts/test-stealth-with-db.ts
*/
import { Pool } from 'pg';
import {
CrawlRotator,
ProxyRotator,
} from '../services/crawl-rotator';
import {
startSession,
endSession,
getLocaleForTimezone,
} from '../platforms/dutchie';
const DATABASE_URL = process.env.DATABASE_URL;
if (!DATABASE_URL) {
console.error('ERROR: DATABASE_URL environment variable is required');
process.exit(1);
}
async function main() {
console.log('='.repeat(60));
console.log('STEALTH SESSION TEST WITH DATABASE');
console.log('='.repeat(60));
const pool = new Pool({ connectionString: DATABASE_URL });
try {
// Test 1: Load proxies with location data
console.log('\n[Test 1] Loading proxies from database...');
const rotator = new CrawlRotator(pool);
await rotator.initialize();
const stats = rotator.proxy.getStats();
console.log(` Total proxies: ${stats.totalProxies}`);
console.log(` Active proxies: ${stats.activeProxies}`);
if (stats.activeProxies === 0) {
console.log('\n WARNING: No active proxies in database!');
console.log(' Insert test proxies with:');
console.log(` INSERT INTO proxies (host, port, protocol, city, state, country_code, timezone, active)`);
console.log(` VALUES ('proxy1.example.com', 8080, 'http', 'Phoenix', 'AZ', 'US', 'America/Phoenix', true);`);
return;
}
// Test 2: Check location data on proxies
console.log('\n[Test 2] Checking proxy location data...');
let proxyCount = 0;
let withLocationCount = 0;
// Iterate through proxies
for (let i = 0; i < stats.totalProxies; i++) {
const proxy = rotator.proxy.getNext();
if (!proxy) break;
proxyCount++;
const hasLocation = !!(proxy.stateCode || proxy.timezone);
if (hasLocation) withLocationCount++;
console.log(` Proxy ${proxy.id}: ${proxy.host}:${proxy.port}`);
console.log(` City: ${proxy.city || '(not set)'}`);
console.log(` State: ${proxy.stateCode || '(not set)'}`);
console.log(` Country: ${proxy.countryCode || '(not set)'}`);
console.log(` Timezone: ${proxy.timezone || '(not set)'}`);
console.log(` Has location data: ${hasLocation ? '✅' : '❌'}`);
}
console.log(`\n Summary: ${withLocationCount}/${proxyCount} proxies have location data`);
// Test 3: Start session using proxy's timezone
console.log('\n[Test 3] Starting session with proxy timezone...');
// Get first proxy with timezone
const firstProxy = rotator.proxy.getNext();
if (firstProxy && firstProxy.timezone) {
console.log(` Using proxy: ${firstProxy.host} (${firstProxy.city}, ${firstProxy.stateCode})`);
console.log(` Proxy timezone: ${firstProxy.timezone}`);
const session = startSession(firstProxy.stateCode, firstProxy.timezone);
console.log(` Session ID: ${session.sessionId}`);
console.log(` Session timezone: ${session.timezone}`);
console.log(` Session Accept-Language: ${session.fingerprint.acceptLanguage}`);
// Verify Accept-Language matches expected locale for timezone
const expectedLocale = getLocaleForTimezone(firstProxy.timezone);
const matches = session.fingerprint.acceptLanguage === expectedLocale;
console.log(` Expected locale: ${expectedLocale}`);
console.log(` Locale matches: ${matches ? '✅' : '❌'}`);
endSession();
} else {
console.log(' WARNING: No proxy with timezone data found');
}
// Test 4: Test each timezone in database
console.log('\n[Test 4] Testing all proxy timezones...');
const seenTimezones = new Set<string>();
// Reset to beginning
for (let i = 0; i < stats.totalProxies; i++) {
const proxy = rotator.proxy.getNext();
if (!proxy || !proxy.timezone) continue;
if (seenTimezones.has(proxy.timezone)) continue;
seenTimezones.add(proxy.timezone);
const session = startSession(proxy.stateCode, proxy.timezone);
console.log(` ${proxy.timezone}:`);
console.log(` State: ${proxy.stateCode || 'unknown'}`);
console.log(` Accept-Language: ${session.fingerprint.acceptLanguage}`);
endSession();
}
console.log('\n' + '='.repeat(60));
console.log('TEST COMPLETE');
console.log('='.repeat(60));
if (withLocationCount === 0) {
console.log('\n⚠ No proxies have location data.');
console.log(' Geographic consistency will use default locale (en-US).');
console.log(' To enable geo-consistency, populate city/state/timezone on proxies.');
} else {
console.log('\n✅ Stealth session with geo-consistency is working!');
console.log(' Sessions will use Accept-Language matching proxy timezone.');
}
} catch (error) {
console.error('Error:', error);
} finally {
await pool.end();
}
}
main();

View File

@@ -1,26 +1,29 @@
/**
* Local Image Storage Utility
*
* Downloads and stores product images to local filesystem.
* Replaces MinIO-based storage with simple local file storage.
* Downloads and stores product images to local filesystem with proper hierarchy.
*
* Directory structure:
* /images/products/<dispensary_id>/<product_id>.webp
* /images/products/<dispensary_id>/<product_id>-thumb.webp
* /images/products/<dispensary_id>/<product_id>-medium.webp
* /images/brands/<brand_slug>.webp
* /images/products/<state>/<store_slug>/<brand_slug>/<product_id>/image.webp
* /images/products/<state>/<store_slug>/<brand_slug>/<product_id>/image-medium.webp
* /images/products/<state>/<store_slug>/<brand_slug>/<product_id>/image-thumb.webp
* /images/brands/<brand_slug>/logo.webp
*
* This structure allows:
* - Easy migration to MinIO/S3 (bucket per state)
* - Browsing by state/store/brand
* - Multiple images per product (future: gallery)
*/
import axios from 'axios';
import sharp from 'sharp';
// @ts-ignore - sharp module typing quirk
const sharp = require('sharp');
import * as fs from 'fs/promises';
import * as path from 'path';
import { createHash } from 'crypto';
// Base path for image storage - configurable via env
// Uses project-relative paths by default, NOT /app or other privileged paths
function getImagesBasePath(): string {
// Priority: IMAGES_PATH > STORAGE_BASE_PATH/images > ./storage/images
if (process.env.IMAGES_PATH) {
return process.env.IMAGES_PATH;
}
@@ -35,16 +38,28 @@ const IMAGES_BASE_PATH = getImagesBasePath();
const IMAGES_PUBLIC_URL = process.env.IMAGES_PUBLIC_URL || '/images';
export interface LocalImageSizes {
full: string; // URL path: /images/products/123/456.webp
medium: string; // URL path: /images/products/123/456-medium.webp
thumb: string; // URL path: /images/products/123/456-thumb.webp
original: string; // URL path to original image
// Legacy compatibility - all point to original until we add image proxy
full: string;
medium: string;
thumb: string;
}
export interface DownloadResult {
success: boolean;
urls?: LocalImageSizes;
localPaths?: LocalImageSizes;
error?: string;
bytesDownloaded?: number;
skipped?: boolean; // True if image already exists
}
export interface ProductImageContext {
stateCode: string; // e.g., "AZ", "CA"
storeSlug: string; // e.g., "deeply-rooted"
brandSlug: string; // e.g., "high-west-farms"
productId: string; // External product ID
dispensaryId?: number; // For backwards compat
}
/**
@@ -58,6 +73,17 @@ async function ensureDir(dirPath: string): Promise<void> {
}
}
/**
* Sanitize a string for use in file paths
*/
function slugify(str: string): string {
return str
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '')
.substring(0, 50) || 'unknown';
}
/**
* Generate a short hash from a URL for deduplication
*/
@@ -81,53 +107,30 @@ async function downloadImage(imageUrl: string): Promise<Buffer> {
}
/**
* Process and save image in multiple sizes
* Returns the file paths relative to IMAGES_BASE_PATH
* Process and save original image (convert to webp for consistency)
*
* We store only the original - resizing will be done on-demand via
* an image proxy service (imgproxy, thumbor, or similar) in the future.
*/
async function processAndSaveImage(
buffer: Buffer,
outputDir: string,
baseFilename: string
): Promise<{ full: string; medium: string; thumb: string; totalBytes: number }> {
): Promise<{ original: string; totalBytes: number }> {
await ensureDir(outputDir);
const fullPath = path.join(outputDir, `${baseFilename}.webp`);
const mediumPath = path.join(outputDir, `${baseFilename}-medium.webp`);
const thumbPath = path.join(outputDir, `${baseFilename}-thumb.webp`);
const originalPath = path.join(outputDir, `${baseFilename}.webp`);
// Process images in parallel
const [fullBuffer, mediumBuffer, thumbBuffer] = await Promise.all([
// Full: max 1200x1200, high quality
sharp(buffer)
.resize(1200, 1200, { fit: 'inside', withoutEnlargement: true })
.webp({ quality: 85 })
.toBuffer(),
// Medium: 600x600
sharp(buffer)
.resize(600, 600, { fit: 'inside', withoutEnlargement: true })
.webp({ quality: 80 })
.toBuffer(),
// Thumb: 200x200
sharp(buffer)
.resize(200, 200, { fit: 'inside', withoutEnlargement: true })
.webp({ quality: 75 })
.toBuffer(),
]);
// Convert to webp, preserve original dimensions, high quality
const originalBuffer = await sharp(buffer)
.webp({ quality: 90 })
.toBuffer();
// Save all sizes
await Promise.all([
fs.writeFile(fullPath, fullBuffer),
fs.writeFile(mediumPath, mediumBuffer),
fs.writeFile(thumbPath, thumbBuffer),
]);
const totalBytes = fullBuffer.length + mediumBuffer.length + thumbBuffer.length;
await fs.writeFile(originalPath, originalBuffer);
return {
full: fullPath,
medium: mediumPath,
thumb: thumbPath,
totalBytes,
original: originalPath,
totalBytes: originalBuffer.length,
};
}
@@ -135,47 +138,107 @@ async function processAndSaveImage(
* Convert a file path to a public URL
*/
function pathToUrl(filePath: string): string {
// Find /products/ or /brands/ in the path and extract from there
const productsMatch = filePath.match(/(\/products\/.*)/);
const brandsMatch = filePath.match(/(\/brands\/.*)/);
if (productsMatch) {
return `${IMAGES_PUBLIC_URL}${productsMatch[1]}`;
}
if (brandsMatch) {
return `${IMAGES_PUBLIC_URL}${brandsMatch[1]}`;
}
// Fallback: try to replace base path (works if paths match exactly)
const relativePath = filePath.replace(IMAGES_BASE_PATH, '');
return `${IMAGES_PUBLIC_URL}${relativePath}`;
}
/**
* Download and store a product image locally
* Build the directory path for a product image
* Structure: /images/products/<state>/<store>/<brand>/<product>/
*/
function buildProductImagePath(ctx: ProductImageContext): string {
const state = slugify(ctx.stateCode || 'unknown');
const store = slugify(ctx.storeSlug || 'unknown');
const brand = slugify(ctx.brandSlug || 'unknown');
const product = slugify(ctx.productId || 'unknown');
return path.join(IMAGES_BASE_PATH, 'products', state, store, brand, product);
}
/**
* Download and store a product image with proper hierarchy
*
* @param imageUrl - The third-party image URL to download
* @param dispensaryId - The dispensary ID (for directory organization)
* @param productId - The product ID or external ID (for filename)
* @param ctx - Product context (state, store, brand, product)
* @param options - Download options
* @returns Download result with local URLs
*/
export async function downloadProductImage(
imageUrl: string,
dispensaryId: number,
productId: string | number
ctx: ProductImageContext,
options: { skipIfExists?: boolean } = {}
): Promise<DownloadResult> {
const { skipIfExists = true } = options;
try {
if (!imageUrl) {
return { success: false, error: 'No image URL provided' };
}
const outputDir = buildProductImagePath(ctx);
const urlHash = hashUrl(imageUrl);
const baseFilename = `image-${urlHash}`;
// Check if image already exists
if (skipIfExists) {
const existingPath = path.join(outputDir, `${baseFilename}.webp`);
try {
await fs.access(existingPath);
// Image exists, return existing URL
const url = pathToUrl(existingPath);
return {
success: true,
skipped: true,
urls: {
original: url,
full: url,
medium: url,
thumb: url,
},
localPaths: {
original: existingPath,
full: existingPath,
medium: existingPath,
thumb: existingPath,
},
};
} catch {
// Image doesn't exist, continue to download
}
}
// Download the image
const buffer = await downloadImage(imageUrl);
// Organize by dispensary ID
const outputDir = path.join(IMAGES_BASE_PATH, 'products', String(dispensaryId));
// Use product ID + URL hash for uniqueness
const urlHash = hashUrl(imageUrl);
const baseFilename = `${productId}-${urlHash}`;
// Process and save
// Process and save (original only)
const result = await processAndSaveImage(buffer, outputDir, baseFilename);
const url = pathToUrl(result.original);
return {
success: true,
urls: {
full: pathToUrl(result.full),
medium: pathToUrl(result.medium),
thumb: pathToUrl(result.thumb),
original: url,
full: url,
medium: url,
thumb: url,
},
localPaths: {
original: result.original,
full: result.original,
medium: result.original,
thumb: result.original,
},
bytesDownloaded: result.totalBytes,
};
@@ -188,33 +251,70 @@ export async function downloadProductImage(
}
/**
* Download and store a brand logo locally
* Legacy function - backwards compatible with old signature
* Maps to new hierarchy using dispensary_id as store identifier
*/
export async function downloadProductImageLegacy(
imageUrl: string,
dispensaryId: number,
productId: string | number
): Promise<DownloadResult> {
return downloadProductImage(imageUrl, {
stateCode: 'unknown',
storeSlug: `store-${dispensaryId}`,
brandSlug: 'unknown',
productId: String(productId),
dispensaryId,
});
}
/**
* Download and store a brand logo
*
* @param logoUrl - The brand logo URL
* @param brandId - The brand ID or slug
* @param brandSlug - The brand slug/ID
* @returns Download result with local URL
*/
export async function downloadBrandLogo(
logoUrl: string,
brandId: string
brandSlug: string,
options: { skipIfExists?: boolean } = {}
): Promise<DownloadResult> {
const { skipIfExists = true } = options;
try {
if (!logoUrl) {
return { success: false, error: 'No logo URL provided' };
}
const safeBrandSlug = slugify(brandSlug);
const outputDir = path.join(IMAGES_BASE_PATH, 'brands', safeBrandSlug);
const urlHash = hashUrl(logoUrl);
const baseFilename = `logo-${urlHash}`;
// Check if logo already exists
if (skipIfExists) {
const existingPath = path.join(outputDir, `${baseFilename}.webp`);
try {
await fs.access(existingPath);
return {
success: true,
skipped: true,
urls: {
full: pathToUrl(existingPath),
medium: pathToUrl(existingPath),
thumb: pathToUrl(existingPath),
},
};
} catch {
// Logo doesn't exist, continue
}
}
// Download the image
const buffer = await downloadImage(logoUrl);
// Brand logos go in /images/brands/
const outputDir = path.join(IMAGES_BASE_PATH, 'brands');
// Sanitize brand ID for filename
const safeBrandId = brandId.replace(/[^a-zA-Z0-9-_]/g, '_');
const urlHash = hashUrl(logoUrl);
const baseFilename = `${safeBrandId}-${urlHash}`;
// Process and save (single size for logos)
// Brand logos in their own directory
await ensureDir(outputDir);
const logoPath = path.join(outputDir, `${baseFilename}.webp`);
@@ -243,20 +343,16 @@ export async function downloadBrandLogo(
}
/**
* Check if a local image already exists
* Check if a product image already exists
*/
export async function imageExists(
dispensaryId: number,
productId: string | number,
export async function productImageExists(
ctx: ProductImageContext,
imageUrl: string
): Promise<boolean> {
const outputDir = buildProductImagePath(ctx);
const urlHash = hashUrl(imageUrl);
const imagePath = path.join(
IMAGES_BASE_PATH,
'products',
String(dispensaryId),
`${productId}-${urlHash}.webp`
);
const imagePath = path.join(outputDir, `image-${urlHash}.webp`);
try {
await fs.access(imagePath);
return true;
@@ -266,24 +362,27 @@ export async function imageExists(
}
/**
* Delete a product's local images
* Get the local image URL for a product (if exists)
*/
export async function deleteProductImages(
dispensaryId: number,
productId: string | number,
imageUrl?: string
): Promise<void> {
const productDir = path.join(IMAGES_BASE_PATH, 'products', String(dispensaryId));
const prefix = imageUrl
? `${productId}-${hashUrl(imageUrl)}`
: String(productId);
export async function getProductImageUrl(
ctx: ProductImageContext,
imageUrl: string
): Promise<LocalImageSizes | null> {
const outputDir = buildProductImagePath(ctx);
const urlHash = hashUrl(imageUrl);
const imagePath = path.join(outputDir, `image-${urlHash}.webp`);
try {
const files = await fs.readdir(productDir);
const toDelete = files.filter(f => f.startsWith(prefix));
await Promise.all(toDelete.map(f => fs.unlink(path.join(productDir, f))));
await fs.access(imagePath);
const url = pathToUrl(imagePath);
return {
original: url,
full: url,
medium: url,
thumb: url,
};
} catch {
// Directory might not exist, that's fine
return null;
}
}
@@ -296,19 +395,17 @@ export function isImageStorageReady(): boolean {
/**
* Initialize the image storage directories
* Does NOT throw on failure - logs warning and continues
*/
export async function initializeImageStorage(): Promise<void> {
try {
await ensureDir(path.join(IMAGES_BASE_PATH, 'products'));
await ensureDir(path.join(IMAGES_BASE_PATH, 'brands'));
console.log(`Image storage initialized at ${IMAGES_BASE_PATH}`);
console.log(`[ImageStorage] Initialized at ${IMAGES_BASE_PATH}`);
imageStorageReady = true;
} catch (error: any) {
console.warn(`⚠️ WARNING: Could not initialize image storage at ${IMAGES_BASE_PATH}: ${error.message}`);
console.warn(' Image upload/processing is disabled. Server will continue without image features.');
console.warn(`[ImageStorage] WARNING: Could not initialize at ${IMAGES_BASE_PATH}: ${error.message}`);
console.warn(' Image features disabled. Server will continue without image downloads.');
imageStorageReady = false;
// Do NOT throw - server should still start
}
}
@@ -316,34 +413,43 @@ export async function initializeImageStorage(): Promise<void> {
* Get storage stats
*/
export async function getStorageStats(): Promise<{
productsDir: string;
brandsDir: string;
basePath: string;
productCount: number;
brandCount: number;
totalSizeBytes: number;
}> {
const productsDir = path.join(IMAGES_BASE_PATH, 'products');
const brandsDir = path.join(IMAGES_BASE_PATH, 'brands');
let productCount = 0;
let brandCount = 0;
let totalSizeBytes = 0;
async function countDir(dirPath: string): Promise<{ count: number; size: number }> {
let count = 0;
let size = 0;
try {
const productDirs = await fs.readdir(productsDir);
for (const dir of productDirs) {
const files = await fs.readdir(path.join(productsDir, dir));
productCount += files.filter(f => f.endsWith('.webp') && !f.includes('-')).length;
const entries = await fs.readdir(dirPath, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dirPath, entry.name);
if (entry.isDirectory()) {
const sub = await countDir(fullPath);
count += sub.count;
size += sub.size;
} else if (entry.name.endsWith('.webp') && !entry.name.includes('-')) {
count++;
const stat = await fs.stat(fullPath);
size += stat.size;
}
}
} catch { /* ignore */ }
return { count, size };
}
try {
const brandFiles = await fs.readdir(brandsDir);
brandCount = brandFiles.filter(f => f.endsWith('.webp')).length;
} catch { /* ignore */ }
const products = await countDir(path.join(IMAGES_BASE_PATH, 'products'));
const brands = await countDir(path.join(IMAGES_BASE_PATH, 'brands'));
return {
productsDir,
brandsDir,
productCount,
brandCount,
basePath: IMAGES_BASE_PATH,
productCount: products.count,
brandCount: brands.count,
totalSizeBytes: products.size + brands.size,
};
}

View File

@@ -113,8 +113,16 @@ class ApiClient {
});
}
async getDispensaries() {
return this.request<{ dispensaries: any[] }>('/api/dispensaries');
async getDispensaries(params?: { limit?: number; offset?: number; search?: string; city?: string; state?: string; crawl_enabled?: string }) {
const searchParams = new URLSearchParams();
if (params?.limit) searchParams.append('limit', params.limit.toString());
if (params?.offset) searchParams.append('offset', params.offset.toString());
if (params?.search) searchParams.append('search', params.search);
if (params?.city) searchParams.append('city', params.city);
if (params?.state) searchParams.append('state', params.state);
if (params?.crawl_enabled) searchParams.append('crawl_enabled', params.crawl_enabled);
const queryString = searchParams.toString() ? `?${searchParams.toString()}` : '';
return this.request<{ dispensaries: any[]; total: number; limit: number; offset: number; hasMore: boolean }>(`/api/dispensaries${queryString}`);
}
async getDispensary(slug: string) {

119
cannaiq/src/lib/images.ts Normal file
View File

@@ -0,0 +1,119 @@
/**
* Image URL utilities for on-demand resizing
*
* Uses the backend's /img proxy endpoint for local images.
* Falls back to original URL for remote images.
*/
const API_BASE = import.meta.env.VITE_API_URL || '';
interface ImageOptions {
width?: number;
height?: number;
quality?: number;
fit?: 'cover' | 'contain' | 'fill' | 'inside' | 'outside';
}
/**
* Check if URL is a local image path
*/
function isLocalImage(url: string): boolean {
return url.startsWith('/images/') || url.startsWith('/img/');
}
/**
* Build an image URL with optional resize parameters
*
* @param imageUrl - Original image URL (local or remote)
* @param options - Resize options
* @returns Optimized image URL
*
* @example
* // Thumbnail (50px)
* getImageUrl(product.image_url, { width: 50 })
*
* // Card image (200px)
* getImageUrl(product.image_url, { width: 200 })
*
* // Detail view (600px)
* getImageUrl(product.image_url, { width: 600 })
*
* // Square crop
* getImageUrl(product.image_url, { width: 200, height: 200, fit: 'cover' })
*/
export function getImageUrl(
imageUrl: string | null | undefined,
options: ImageOptions = {}
): string | null {
if (!imageUrl) return null;
// For remote images (AWS, Dutchie CDN, etc.), return as-is
// These can't be resized by our proxy
if (imageUrl.startsWith('http://') || imageUrl.startsWith('https://')) {
return imageUrl;
}
// For local images, use the /img proxy with resize params
if (isLocalImage(imageUrl)) {
// Convert /images/ path to /img/ proxy path
let proxyPath = imageUrl;
if (imageUrl.startsWith('/images/')) {
proxyPath = imageUrl.replace('/images/', '/img/');
}
// Build query params
const params = new URLSearchParams();
if (options.width) params.set('w', String(options.width));
if (options.height) params.set('h', String(options.height));
if (options.quality) params.set('q', String(options.quality));
if (options.fit) params.set('fit', options.fit);
const queryString = params.toString();
const url = queryString ? `${proxyPath}?${queryString}` : proxyPath;
// Prepend API base if needed
return API_BASE ? `${API_BASE}${url}` : url;
}
// Unknown format, return as-is
return imageUrl;
}
/**
* Preset sizes for common use cases
*/
export const ImageSizes = {
/** Tiny thumbnail for lists (50px) */
thumb: { width: 50 },
/** Small card (100px) */
small: { width: 100 },
/** Medium card (200px) */
medium: { width: 200 },
/** Large card (400px) */
large: { width: 400 },
/** Detail view (600px) */
detail: { width: 600 },
/** Full size (no resize) */
full: {},
} as const;
/**
* Convenience function for thumbnail
*/
export function getThumbUrl(imageUrl: string | null | undefined): string | null {
return getImageUrl(imageUrl, ImageSizes.thumb);
}
/**
* Convenience function for card images
*/
export function getCardUrl(imageUrl: string | null | undefined): string | null {
return getImageUrl(imageUrl, ImageSizes.medium);
}
/**
* Convenience function for detail images
*/
export function getDetailUrl(imageUrl: string | null | undefined): string | null {
return getImageUrl(imageUrl, ImageSizes.detail);
}

View File

@@ -1,33 +1,71 @@
import React, { useEffect, useState } from 'react';
import React, { useEffect, useState, useCallback } from 'react';
import { useNavigate } from 'react-router-dom';
import { Layout } from '../components/Layout';
import { api } from '../lib/api';
import { Building2, Phone, Mail, MapPin, ExternalLink, Search, Eye, Pencil, X, Save } from 'lucide-react';
import { Building2, Phone, Mail, MapPin, ExternalLink, Search, Eye, Pencil, X, Save, ChevronLeft, ChevronRight } from 'lucide-react';
const PAGE_SIZE = 50;
export function Dispensaries() {
const navigate = useNavigate();
const [dispensaries, setDispensaries] = useState<any[]>([]);
const [loading, setLoading] = useState(true);
const [searchTerm, setSearchTerm] = useState('');
const [filterCity, setFilterCity] = useState('');
const [debouncedSearch, setDebouncedSearch] = useState('');
const [filterState, setFilterState] = useState('');
const [editingDispensary, setEditingDispensary] = useState<any | null>(null);
const [editForm, setEditForm] = useState<any>({});
const [total, setTotal] = useState(0);
const [offset, setOffset] = useState(0);
const [hasMore, setHasMore] = useState(false);
const [states, setStates] = useState<string[]>([]);
// Debounce search
useEffect(() => {
loadDispensaries();
const timer = setTimeout(() => {
setDebouncedSearch(searchTerm);
setOffset(0); // Reset to first page on search
}, 300);
return () => clearTimeout(timer);
}, [searchTerm]);
// Load states once for filter dropdown
useEffect(() => {
const loadStates = async () => {
try {
const data = await api.getDispensaries({ limit: 500, crawl_enabled: 'all' });
const uniqueStates = Array.from(new Set(data.dispensaries.map((d: any) => d.state).filter(Boolean))).sort() as string[];
setStates(uniqueStates);
} catch (error) {
console.error('Failed to load states:', error);
}
};
loadStates();
}, []);
const loadDispensaries = async () => {
const loadDispensaries = useCallback(async () => {
setLoading(true);
try {
const data = await api.getDispensaries();
const data = await api.getDispensaries({
limit: PAGE_SIZE,
offset,
search: debouncedSearch || undefined,
state: filterState || undefined,
crawl_enabled: 'all'
});
setDispensaries(data.dispensaries);
setTotal(data.total);
setHasMore(data.hasMore);
} catch (error) {
console.error('Failed to load dispensaries:', error);
} finally {
setLoading(false);
}
};
}, [offset, debouncedSearch, filterState]);
useEffect(() => {
loadDispensaries();
}, [loadDispensaries]);
const handleEdit = (dispensary: any) => {
setEditingDispensary(dispensary);
@@ -59,17 +97,18 @@ export function Dispensaries() {
setEditForm({});
};
const filteredDispensaries = dispensaries.filter(disp => {
const searchLower = searchTerm.toLowerCase();
const matchesSearch = !searchTerm ||
disp.name.toLowerCase().includes(searchLower) ||
(disp.company_name && disp.company_name.toLowerCase().includes(searchLower)) ||
(disp.dba_name && disp.dba_name.toLowerCase().includes(searchLower));
const matchesCity = !filterCity || disp.city === filterCity;
return matchesSearch && matchesCity;
});
const currentPage = Math.floor(offset / PAGE_SIZE) + 1;
const totalPages = Math.ceil(total / PAGE_SIZE);
const cities = Array.from(new Set(dispensaries.map(d => d.city).filter(Boolean))).sort();
const goToPage = (page: number) => {
const newOffset = (page - 1) * PAGE_SIZE;
setOffset(newOffset);
};
const handleStateFilter = (state: string) => {
setFilterState(state);
setOffset(0); // Reset to first page
};
return (
<Layout>
@@ -78,7 +117,7 @@ export function Dispensaries() {
<div>
<h1 className="text-2xl font-bold text-gray-900">Dispensaries</h1>
<p className="text-sm text-gray-600 mt-1">
AZDHS official dispensary directory ({dispensaries.length} total)
USA and Canada Dispensary Directory ({total} total)
</p>
</div>
@@ -102,16 +141,16 @@ export function Dispensaries() {
</div>
<div>
<label className="block text-sm font-medium text-gray-700 mb-2">
Filter by City
Filter by State
</label>
<select
value={filterCity}
onChange={(e) => setFilterCity(e.target.value)}
value={filterState}
onChange={(e) => handleStateFilter(e.target.value)}
className="w-full px-3 py-2 border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500 focus:border-blue-500"
>
<option value="">All Cities</option>
{cities.map(city => (
<option key={city} value={city}>{city}</option>
<option value="">All States</option>
{states.map(state => (
<option key={state} value={state}>{state}</option>
))}
</select>
</div>
@@ -133,9 +172,6 @@ export function Dispensaries() {
<th className="px-4 py-3 text-left text-xs font-medium text-gray-700 uppercase tracking-wider">
Name
</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-700 uppercase tracking-wider">
Company
</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-700 uppercase tracking-wider">
Address
</th>
@@ -157,14 +193,14 @@ export function Dispensaries() {
</tr>
</thead>
<tbody className="divide-y divide-gray-200">
{filteredDispensaries.length === 0 ? (
{dispensaries.length === 0 ? (
<tr>
<td colSpan={8} className="px-4 py-8 text-center text-sm text-gray-500">
<td colSpan={7} className="px-4 py-8 text-center text-sm text-gray-500">
No dispensaries found
</td>
</tr>
) : (
filteredDispensaries.map((disp) => (
dispensaries.map((disp) => (
<tr key={disp.id} className="hover:bg-gray-50">
<td className="px-4 py-3">
<div className="flex items-center gap-2">
@@ -181,13 +217,10 @@ export function Dispensaries() {
</div>
</div>
</td>
<td className="px-4 py-3">
<span className="text-sm text-gray-600">{disp.company_name || '-'}</span>
</td>
<td className="px-4 py-3">
<div className="flex items-start gap-1">
<MapPin className="w-3 h-3 text-gray-400 flex-shrink-0 mt-0.5" />
<span className="text-sm text-gray-600">{disp.address || '-'}</span>
<span className="text-sm text-gray-600">{disp.address1 || '-'}</span>
</div>
</td>
<td className="px-4 py-3">
@@ -266,10 +299,33 @@ export function Dispensaries() {
</table>
</div>
{/* Footer */}
{/* Footer with Pagination */}
<div className="bg-gray-50 px-4 py-3 border-t border-gray-200">
<div className="flex items-center justify-between">
<div className="text-sm text-gray-600">
Showing {filteredDispensaries.length} of {dispensaries.length} dispensaries
Showing {offset + 1}-{Math.min(offset + dispensaries.length, total)} of {total} dispensaries
</div>
<div className="flex items-center gap-2">
<button
onClick={() => goToPage(currentPage - 1)}
disabled={currentPage === 1}
className="inline-flex items-center gap-1 px-3 py-1.5 text-sm font-medium text-gray-700 bg-white border border-gray-300 rounded-lg hover:bg-gray-50 disabled:opacity-50 disabled:cursor-not-allowed"
>
<ChevronLeft className="w-4 h-4" />
Prev
</button>
<span className="text-sm text-gray-600">
Page {currentPage} of {totalPages}
</span>
<button
onClick={() => goToPage(currentPage + 1)}
disabled={!hasMore}
className="inline-flex items-center gap-1 px-3 py-1.5 text-sm font-medium text-gray-700 bg-white border border-gray-300 rounded-lg hover:bg-gray-50 disabled:opacity-50 disabled:cursor-not-allowed"
>
Next
<ChevronRight className="w-4 h-4" />
</button>
</div>
</div>
</div>
</div>

View File

@@ -2,6 +2,7 @@ import { useEffect, useState } from 'react';
import { useParams, useNavigate, Link } from 'react-router-dom';
import { Layout } from '../components/Layout';
import { api } from '../lib/api';
import { getImageUrl, ImageSizes } from '../lib/images';
import {
Building2,
Phone,
@@ -497,7 +498,7 @@ export function DispensaryDetail() {
<td className="whitespace-nowrap">
{product.image_url ? (
<img
src={product.image_url}
src={getImageUrl(product.image_url, ImageSizes.thumb) || product.image_url}
alt={product.name}
className="w-12 h-12 object-cover rounded"
onError={(e) => e.currentTarget.style.display = 'none'}
@@ -686,7 +687,7 @@ export function DispensaryDetail() {
<div className="flex items-start gap-3">
{special.image_url && (
<img
src={special.image_url}
src={getImageUrl(special.image_url, ImageSizes.small) || special.image_url}
alt={special.name}
className="w-16 h-16 object-cover rounded"
onError={(e) => e.currentTarget.style.display = 'none'}

View File

@@ -3,6 +3,7 @@ import { Layout } from '../components/Layout';
import { Package, ArrowLeft, TrendingUp, TrendingDown, DollarSign, Search, Filter, ChevronDown, X, LineChart } from 'lucide-react';
import { useNavigate, useSearchParams } from 'react-router-dom';
import { api } from '../lib/api';
import { getImageUrl, ImageSizes } from '../lib/images';
interface Product {
id: number;
@@ -324,7 +325,7 @@ export function OrchestratorProducts() {
<div className="flex items-center gap-3">
{product.image_url ? (
<img
src={product.image_url}
src={getImageUrl(product.image_url, ImageSizes.thumb) || product.image_url}
alt={product.name}
className="w-10 h-10 rounded object-cover"
/>
@@ -395,7 +396,7 @@ export function OrchestratorProducts() {
<div className="flex items-center gap-4">
{selectedProduct.image_url ? (
<img
src={selectedProduct.image_url}
src={getImageUrl(selectedProduct.image_url, ImageSizes.small) || selectedProduct.image_url}
alt={selectedProduct.name}
className="w-16 h-16 rounded object-cover"
/>

View File

@@ -3,6 +3,7 @@ import { Layout } from '../components/Layout';
import { Scale, Search, Package, Store, Trophy, TrendingDown, TrendingUp, MapPin } from 'lucide-react';
import { useNavigate, useSearchParams } from 'react-router-dom';
import { api } from '../lib/api';
import { getImageUrl, ImageSizes } from '../lib/images';
interface CompareResult {
product_id: number;
@@ -311,7 +312,7 @@ export function PriceCompare() {
<div className="flex items-center gap-3">
{item.image_url ? (
<img
src={item.image_url}
src={getImageUrl(item.image_url, ImageSizes.thumb) || item.image_url}
alt={item.product_name}
className="w-10 h-10 rounded object-cover"
/>

View File

@@ -2,6 +2,7 @@ import { useEffect, useState } from 'react';
import { useParams, useNavigate } from 'react-router-dom';
import { Layout } from '../components/Layout';
import { api } from '../lib/api';
import { getImageUrl, ImageSizes } from '../lib/images';
import { ArrowLeft, ExternalLink, Package, Code, Copy, CheckCircle, FileJson, TrendingUp, TrendingDown, Minus, BarChart3 } from 'lucide-react';
export function ProductDetail() {
@@ -114,14 +115,9 @@ export function ProductDetail() {
const metadata = product.metadata || {};
const getImageUrl = () => {
if (product.image_url_full) return product.image_url_full;
if (product.medium_path) return `/api/images/dutchie/${product.medium_path}`;
if (product.thumbnail_path) return `/api/images/dutchie/${product.thumbnail_path}`;
return null;
};
const imageUrl = getImageUrl();
// Use the centralized image URL helper for on-demand resizing
const productImageUrl = product.image_url_full || product.image_url || product.medium_path || product.thumbnail_path;
const imageUrl = getImageUrl(productImageUrl, ImageSizes.detail);
return (
<Layout>

View File

@@ -2,6 +2,7 @@ import { useEffect, useState } from 'react';
import { useSearchParams, useNavigate } from 'react-router-dom';
import { Layout } from '../components/Layout';
import { api } from '../lib/api';
import { getImageUrl, ImageSizes } from '../lib/images';
export function Products() {
const [searchParams, setSearchParams] = useSearchParams();
@@ -417,9 +418,9 @@ function ProductCard({ product, onViewDetails }: { product: any; onViewDetails:
onMouseEnter={(e) => e.currentTarget.style.transform = 'translateY(-4px)'}
onMouseLeave={(e) => e.currentTarget.style.transform = 'translateY(0)'}
>
{product.image_url_full ? (
{(product.image_url_full || product.image_url) ? (
<img
src={product.image_url_full}
src={getImageUrl(product.image_url_full || product.image_url, ImageSizes.medium) || product.image_url_full || product.image_url}
alt={product.name}
style={{
width: '100%',

View File

@@ -3,6 +3,7 @@ import { Layout } from '../components/Layout';
import { Tag, Package, Store, Percent, Search, Filter, ArrowUpDown, ExternalLink } from 'lucide-react';
import { useNavigate, useSearchParams } from 'react-router-dom';
import { api } from '../lib/api';
import { getImageUrl, ImageSizes } from '../lib/images';
interface Special {
variant_id: number;
@@ -284,7 +285,7 @@ export function Specials() {
<div className="relative">
{special.image_url ? (
<img
src={special.image_url}
src={getImageUrl(special.image_url, ImageSizes.medium) || special.image_url}
alt={special.product_name}
className="w-full h-32 object-cover"
/>

View File

@@ -2,6 +2,7 @@ import { useEffect, useState } from 'react';
import { useParams, useNavigate } from 'react-router-dom';
import { Layout } from '../components/Layout';
import { api } from '../lib/api';
import { getImageUrl as getResizedImageUrl, ImageSizes } from '../lib/images';
import {
Package, Tag, Zap, Clock, ExternalLink, CheckCircle, XCircle,
AlertCircle, Building, MapPin, RefreshCw, Calendar, Activity
@@ -101,9 +102,10 @@ export function StoreDetail() {
};
const getImageUrl = (product: any) => {
if (product.image_url_full) return product.image_url_full;
if (product.medium_path) return `/api/images/dutchie/${product.medium_path}`;
if (product.thumbnail_path) return `/api/images/dutchie/${product.thumbnail_path}`;
const rawUrl = product.image_url_full || product.image_url || product.medium_path || product.thumbnail_path;
if (rawUrl) {
return getResizedImageUrl(rawUrl, ImageSizes.medium) || rawUrl;
}
return 'https://via.placeholder.com/300x300?text=No+Image';
};

View File

@@ -3,6 +3,7 @@ import { useParams, useNavigate } from 'react-router-dom';
import { Layout } from '../components/Layout';
import { api } from '../lib/api';
import { trackProductView } from '../lib/analytics';
import { getImageUrl, ImageSizes } from '../lib/images';
import {
Building2,
Phone,
@@ -470,7 +471,7 @@ export function StoreDetailPage() {
<td className="whitespace-nowrap">
{product.image_url ? (
<img
src={product.image_url}
src={getImageUrl(product.image_url, ImageSizes.thumb) || product.image_url}
alt={product.name}
className="w-12 h-12 object-cover rounded"
onError={(e) => e.currentTarget.style.display = 'none'}