diff --git a/.woodpecker/.ci.yml b/.woodpecker/.ci.yml index 7aca4add..cb9e23dc 100644 --- a/.woodpecker/.ci.yml +++ b/.woodpecker/.ci.yml @@ -65,10 +65,10 @@ steps: platforms: linux/amd64 provenance: false build_args: - - APP_BUILD_VERSION=${CI_COMMIT_SHA:0:8} + - APP_BUILD_VERSION=${CI_COMMIT_SHA} - APP_GIT_SHA=${CI_COMMIT_SHA} - APP_BUILD_TIME=${CI_PIPELINE_CREATED} - - CONTAINER_IMAGE_TAG=${CI_COMMIT_SHA:0:8} + - CONTAINER_IMAGE_TAG=${CI_COMMIT_SHA} depends_on: [] when: branch: master diff --git a/CLAUDE.md b/CLAUDE.md index 9a52deb3..dbd7e261 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -459,15 +459,66 @@ const result = await pool.query(` ### Local Storage Structure ``` -/storage/products/{brand}/{state}/{product_id}/ +/storage/images/products/{state}/{store}/{brand}/{product}/ image-{hash}.webp - image-{hash}-medium.webp - image-{hash}-thumb.webp -/storage/brands/{brand}/ +/storage/images/brands/{brand}/ logo-{hash}.webp ``` +### Image Proxy API (On-Demand Resizing) + +Images are stored at full resolution and resized on-demand via the `/img` endpoint. + +**Endpoint:** `GET /img/?` + +**Parameters:** +| Param | Description | Example | +|-------|-------------|---------| +| `w` | Width in pixels (max 4000) | `?w=200` | +| `h` | Height in pixels (max 4000) | `?h=200` | +| `q` | Quality 1-100 (default 80) | `?q=70` | +| `fit` | Resize mode: cover, contain, fill, inside, outside | `?fit=cover` | +| `blur` | Blur sigma 0.3-1000 | `?blur=5` | +| `gray` | Grayscale (1 = enabled) | `?gray=1` | +| `format` | Output: webp, jpeg, png, avif (default webp) | `?format=jpeg` | + +**Examples:** +```bash +# Thumbnail (50px) +GET /img/products/az/store/brand/product/image-abc123.webp?w=50 + +# Card image (200px, cover fit) +GET /img/products/az/store/brand/product/image-abc123.webp?w=200&h=200&fit=cover + +# JPEG at 70% quality +GET /img/products/az/store/brand/product/image-abc123.webp?w=400&format=jpeg&q=70 + +# Grayscale blur +GET /img/products/az/store/brand/product/image-abc123.webp?w=200&gray=1&blur=3 +``` + +**Frontend Usage:** +```typescript +import { getImageUrl, ImageSizes } from '../lib/images'; + +// Returns /img/products/.../image.webp?w=50 for local images +// Returns original URL for remote images (CDN, etc.) +const thumbUrl = getImageUrl(product.image_url, ImageSizes.thumb); +const cardUrl = getImageUrl(product.image_url, ImageSizes.medium); +const detailUrl = getImageUrl(product.image_url, ImageSizes.detail); +``` + +**Size Presets:** +| Preset | Width | Use Case | +|--------|-------|----------| +| `thumb` | 50px | Table thumbnails | +| `small` | 100px | Small cards | +| `medium` | 200px | Grid cards | +| `large` | 400px | Large cards | +| `detail` | 600px | Product detail | +| `full` | - | No resize | + ### Storage Adapter ```typescript @@ -480,8 +531,9 @@ import { saveImage, getImageUrl } from '../utils/storage-adapter'; | File | Purpose | |------|---------| -| `backend/src/utils/local-storage.ts` | Local filesystem adapter | -| `backend/src/utils/storage-adapter.ts` | Unified storage abstraction | +| `backend/src/utils/image-storage.ts` | Image download and storage | +| `backend/src/routes/image-proxy.ts` | On-demand image resizing endpoint | +| `cannaiq/src/lib/images.ts` | Frontend image URL helper | | `docker-compose.local.yml` | Local stack without MinIO | | `start-local.sh` | Convenience startup script | diff --git a/backend/migrations/073_proxy_timezone.sql b/backend/migrations/073_proxy_timezone.sql new file mode 100644 index 00000000..8e000415 --- /dev/null +++ b/backend/migrations/073_proxy_timezone.sql @@ -0,0 +1,12 @@ +-- Add timezone column to proxies table for geo-consistent fingerprinting +-- This allows matching Accept-Language and other headers to proxy location + +ALTER TABLE proxies +ADD COLUMN IF NOT EXISTS timezone VARCHAR(50); + +-- Add timezone to failed_proxies as well +ALTER TABLE failed_proxies +ADD COLUMN IF NOT EXISTS timezone VARCHAR(50); + +-- Comment explaining usage +COMMENT ON COLUMN proxies.timezone IS 'IANA timezone (e.g., America/Phoenix) for geo-consistent fingerprinting'; diff --git a/backend/src/hydration/canonical-upsert.ts b/backend/src/hydration/canonical-upsert.ts index 393b9f27..ea7a5e6c 100644 --- a/backend/src/hydration/canonical-upsert.ts +++ b/backend/src/hydration/canonical-upsert.ts @@ -16,6 +16,12 @@ import { NormalizedBrand, NormalizationResult, } from './types'; +import { + downloadProductImage, + ProductImageContext, + isImageStorageReady, + LocalImageSizes, +} from '../utils/image-storage'; const BATCH_SIZE = 100; @@ -23,10 +29,21 @@ const BATCH_SIZE = 100; // PRODUCT UPSERTS // ============================================================ +export interface NewProductInfo { + id: number; // store_products.id + externalProductId: string; // provider_product_id + name: string; + brandName: string | null; + primaryImageUrl: string | null; + hasLocalImage?: boolean; // True if local_image_path is already set +} + export interface UpsertProductsResult { upserted: number; new: number; updated: number; + newProducts: NewProductInfo[]; // Details of newly created products + productsNeedingImages: NewProductInfo[]; // Products (new or updated) that need image downloads } /** @@ -41,12 +58,14 @@ export async function upsertStoreProducts( options: { dryRun?: boolean } = {} ): Promise { if (products.length === 0) { - return { upserted: 0, new: 0, updated: 0 }; + return { upserted: 0, new: 0, updated: 0, newProducts: [], productsNeedingImages: [] }; } const { dryRun = false } = options; let newCount = 0; let updatedCount = 0; + const newProducts: NewProductInfo[] = []; + const productsNeedingImages: NewProductInfo[] = []; // Process in batches for (let i = 0; i < products.length; i += BATCH_SIZE) { @@ -104,7 +123,7 @@ export async function upsertStoreProducts( image_url = EXCLUDED.image_url, last_seen_at = NOW(), updated_at = NOW() - RETURNING (xmax = 0) as is_new`, + RETURNING id, (xmax = 0) as is_new, (local_image_path IS NOT NULL) as has_local_image`, [ product.dispensaryId, product.platform, @@ -129,10 +148,30 @@ export async function upsertStoreProducts( ] ); - if (result.rows[0]?.is_new) { + const row = result.rows[0]; + const productInfo: NewProductInfo = { + id: row.id, + externalProductId: product.externalProductId, + name: product.name, + brandName: product.brandName, + primaryImageUrl: product.primaryImageUrl, + hasLocalImage: row.has_local_image, + }; + + if (row.is_new) { newCount++; + // Track new products + newProducts.push(productInfo); + // New products always need images (if they have a source URL) + if (product.primaryImageUrl && !row.has_local_image) { + productsNeedingImages.push(productInfo); + } } else { updatedCount++; + // Updated products need images only if they don't have a local image yet + if (product.primaryImageUrl && !row.has_local_image) { + productsNeedingImages.push(productInfo); + } } } @@ -149,6 +188,8 @@ export async function upsertStoreProducts( upserted: newCount + updatedCount, new: newCount, updated: updatedCount, + newProducts, + productsNeedingImages, }; } @@ -564,6 +605,19 @@ export async function upsertBrands( // FULL HYDRATION // ============================================================ +export interface ImageDownloadResult { + downloaded: number; + skipped: number; + failed: number; + bytesTotal: number; +} + +export interface DispensaryContext { + stateCode: string; + storeSlug: string; + hasExistingProducts?: boolean; // True if store already has products with local images +} + export interface HydratePayloadResult { productsUpserted: number; productsNew: number; @@ -574,6 +628,154 @@ export interface HydratePayloadResult { variantsUpserted: number; variantsNew: number; variantSnapshotsCreated: number; + imagesDownloaded: number; + imagesSkipped: number; + imagesFailed: number; + imagesBytesTotal: number; +} + +/** + * Helper to create slug from string + */ +function slugify(str: string): string { + return str + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, '') + .substring(0, 50) || 'unknown'; +} + +/** + * Download images for new products and update their local paths + */ +export async function downloadProductImages( + pool: Pool, + newProducts: NewProductInfo[], + dispensaryContext: DispensaryContext, + options: { dryRun?: boolean; concurrency?: number } = {} +): Promise { + const { dryRun = false, concurrency = 5 } = options; + + // Filter products that have images to download + const productsWithImages = newProducts.filter(p => p.primaryImageUrl); + + if (productsWithImages.length === 0) { + return { downloaded: 0, skipped: 0, failed: 0, bytesTotal: 0 }; + } + + // Check if image storage is ready + if (!isImageStorageReady()) { + console.warn('[ImageDownload] Image storage not initialized, skipping downloads'); + return { downloaded: 0, skipped: productsWithImages.length, failed: 0, bytesTotal: 0 }; + } + + if (dryRun) { + console.log(`[DryRun] Would download ${productsWithImages.length} images`); + return { downloaded: 0, skipped: productsWithImages.length, failed: 0, bytesTotal: 0 }; + } + + let downloaded = 0; + let skipped = 0; + let failed = 0; + let bytesTotal = 0; + + // Process in batches with concurrency limit + for (let i = 0; i < productsWithImages.length; i += concurrency) { + const batch = productsWithImages.slice(i, i + concurrency); + + const results = await Promise.allSettled( + batch.map(async (product) => { + const ctx: ProductImageContext = { + stateCode: dispensaryContext.stateCode, + storeSlug: dispensaryContext.storeSlug, + brandSlug: slugify(product.brandName || 'unknown'), + productId: product.externalProductId, + }; + + const result = await downloadProductImage(product.primaryImageUrl!, ctx, { skipIfExists: true }); + + if (result.success) { + // Update the database with local image path + const imagesJson = JSON.stringify({ + full: result.urls!.full, + medium: result.urls!.medium, + thumb: result.urls!.thumb, + }); + + await pool.query( + `UPDATE store_products + SET local_image_path = $1, images = $2 + WHERE id = $3`, + [result.urls!.full, imagesJson, product.id] + ); + } + + return result; + }) + ); + + for (const result of results) { + if (result.status === 'fulfilled') { + const downloadResult = result.value; + if (downloadResult.success) { + if (downloadResult.skipped) { + skipped++; + } else { + downloaded++; + bytesTotal += downloadResult.bytesDownloaded || 0; + } + } else { + failed++; + console.warn(`[ImageDownload] Failed: ${downloadResult.error}`); + } + } else { + failed++; + console.error(`[ImageDownload] Error:`, result.reason); + } + } + } + + console.log(`[ImageDownload] Downloaded: ${downloaded}, Skipped: ${skipped}, Failed: ${failed}, Bytes: ${bytesTotal}`); + return { downloaded, skipped, failed, bytesTotal }; +} + +/** + * Get dispensary context for image paths + * Also checks if this dispensary already has products with local images + * to skip unnecessary filesystem checks for existing stores + */ +async function getDispensaryContext(pool: Pool, dispensaryId: number): Promise { + try { + const result = await pool.query( + `SELECT + d.state, + d.slug, + d.name, + EXISTS( + SELECT 1 FROM store_products sp + WHERE sp.dispensary_id = d.id + AND sp.local_image_path IS NOT NULL + LIMIT 1 + ) as has_local_images + FROM dispensaries d + WHERE d.id = $1`, + [dispensaryId] + ); + + if (result.rows.length === 0) { + return null; + } + + const row = result.rows[0]; + return { + stateCode: row.state || 'unknown', + storeSlug: row.slug || slugify(row.name || `store-${dispensaryId}`), + hasExistingProducts: row.has_local_images, + }; + } catch (error) { + console.error('[getDispensaryContext] Error:', error); + return null; + } } /** @@ -584,9 +786,9 @@ export async function hydrateToCanonical( dispensaryId: number, normResult: NormalizationResult, crawlRunId: number | null, - options: { dryRun?: boolean } = {} + options: { dryRun?: boolean; downloadImages?: boolean } = {} ): Promise { - const { dryRun = false } = options; + const { dryRun = false, downloadImages: shouldDownloadImages = true } = options; // 1. Upsert brands const brandResult = await upsertBrands(pool, normResult.brands, { dryRun }); @@ -634,6 +836,36 @@ export async function hydrateToCanonical( { dryRun } ); + // 6. Download images for products that need them + // This includes: + // - New products (always need images) + // - Updated products that don't have local images yet (backfill) + // This avoids: + // - Filesystem checks for products that already have local images + // - Unnecessary HTTP requests for products with existing images + let imageResult: ImageDownloadResult = { downloaded: 0, skipped: 0, failed: 0, bytesTotal: 0 }; + + if (shouldDownloadImages && productResult.productsNeedingImages.length > 0) { + const dispensaryContext = await getDispensaryContext(pool, dispensaryId); + + if (dispensaryContext) { + const newCount = productResult.productsNeedingImages.filter(p => !p.hasLocalImage).length; + const backfillCount = productResult.productsNeedingImages.length - newCount; + console.log(`[Hydration] Downloading images for ${productResult.productsNeedingImages.length} products (${productResult.new} new, ${backfillCount} backfill)...`); + imageResult = await downloadProductImages( + pool, + productResult.productsNeedingImages, + dispensaryContext, + { dryRun } + ); + } else { + console.warn(`[Hydration] Could not get dispensary context for ID ${dispensaryId}, skipping image downloads`); + } + } else if (productResult.productsNeedingImages.length === 0 && productResult.upserted > 0) { + // All products already have local images + console.log(`[Hydration] All ${productResult.upserted} products already have local images, skipping downloads`); + } + return { productsUpserted: productResult.upserted, productsNew: productResult.new, @@ -644,5 +876,9 @@ export async function hydrateToCanonical( variantsUpserted: variantResult.upserted, variantsNew: variantResult.new, variantSnapshotsCreated: variantResult.snapshotsCreated, + imagesDownloaded: imageResult.downloaded, + imagesSkipped: imageResult.skipped, + imagesFailed: imageResult.failed, + imagesBytesTotal: imageResult.bytesTotal, }; } diff --git a/backend/src/index.ts b/backend/src/index.ts index d0bf51ca..51cee953 100755 --- a/backend/src/index.ts +++ b/backend/src/index.ts @@ -7,6 +7,7 @@ import { initializeImageStorage } from './utils/image-storage'; import { logger } from './services/logger'; import { cleanupOrphanedJobs } from './services/proxyTestQueue'; import healthRoutes from './routes/health'; +import imageProxyRoutes from './routes/image-proxy'; dotenv.config(); @@ -29,6 +30,10 @@ app.use(express.json()); const LOCAL_IMAGES_PATH = process.env.LOCAL_IMAGES_PATH || './public/images'; app.use('/images', express.static(LOCAL_IMAGES_PATH)); +// Image proxy with on-demand resizing +// Usage: /img/products/az/store/brand/product/image.webp?w=200&h=200 +app.use('/img', imageProxyRoutes); + // Serve static downloads (plugin files, etc.) // Uses ./public/downloads relative to working directory (works for both Docker and local dev) const LOCAL_DOWNLOADS_PATH = process.env.LOCAL_DOWNLOADS_PATH || './public/downloads'; diff --git a/backend/src/platforms/dutchie/client.ts b/backend/src/platforms/dutchie/client.ts index b23f0118..4817e476 100644 --- a/backend/src/platforms/dutchie/client.ts +++ b/backend/src/platforms/dutchie/client.ts @@ -213,7 +213,24 @@ const FINGERPRINTS: Fingerprint[] = [ let currentFingerprintIndex = 0; +// Forward declaration for session (actual CrawlSession interface defined later) +let currentSession: { + sessionId: string; + fingerprint: Fingerprint; + proxyUrl: string | null; + stateCode?: string; + timezone?: string; + startedAt: Date; +} | null = null; + +/** + * Get current fingerprint - returns session fingerprint if active, otherwise default + */ export function getFingerprint(): Fingerprint { + // Use session fingerprint if a session is active + if (currentSession) { + return currentSession.fingerprint; + } return FINGERPRINTS[currentFingerprintIndex]; } @@ -228,6 +245,103 @@ export function resetFingerprint(): void { currentFingerprintIndex = 0; } +/** + * Get a random fingerprint from the pool + */ +export function getRandomFingerprint(): Fingerprint { + const index = Math.floor(Math.random() * FINGERPRINTS.length); + return FINGERPRINTS[index]; +} + +// ============================================================ +// SESSION MANAGEMENT +// Per-session fingerprint rotation for stealth +// ============================================================ + +export interface CrawlSession { + sessionId: string; + fingerprint: Fingerprint; + proxyUrl: string | null; + stateCode?: string; + timezone?: string; + startedAt: Date; +} + +// Note: currentSession variable declared earlier in file for proper scoping + +/** + * Timezone to Accept-Language mapping + * US timezones all use en-US but this can be extended for international + */ +const TIMEZONE_TO_LOCALE: Record = { + 'America/Phoenix': 'en-US,en;q=0.9', + 'America/Los_Angeles': 'en-US,en;q=0.9', + 'America/Denver': 'en-US,en;q=0.9', + 'America/Chicago': 'en-US,en;q=0.9', + 'America/New_York': 'en-US,en;q=0.9', + 'America/Detroit': 'en-US,en;q=0.9', + 'America/Anchorage': 'en-US,en;q=0.9', + 'Pacific/Honolulu': 'en-US,en;q=0.9', +}; + +/** + * Get Accept-Language header for a given timezone + */ +export function getLocaleForTimezone(timezone?: string): string { + if (!timezone) return 'en-US,en;q=0.9'; + return TIMEZONE_TO_LOCALE[timezone] || 'en-US,en;q=0.9'; +} + +/** + * Start a new crawl session with a random fingerprint + * Call this before crawling a store to get a fresh identity + */ +export function startSession(stateCode?: string, timezone?: string): CrawlSession { + const baseFp = getRandomFingerprint(); + + // Override Accept-Language based on timezone for geographic consistency + const fingerprint: Fingerprint = { + ...baseFp, + acceptLanguage: getLocaleForTimezone(timezone), + }; + + currentSession = { + sessionId: `session_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`, + fingerprint, + proxyUrl: currentProxy, + stateCode, + timezone, + startedAt: new Date(), + }; + + console.log(`[Dutchie Client] Started session ${currentSession.sessionId}`); + console.log(`[Dutchie Client] Fingerprint: ${fingerprint.userAgent.slice(0, 50)}...`); + console.log(`[Dutchie Client] Accept-Language: ${fingerprint.acceptLanguage}`); + if (timezone) { + console.log(`[Dutchie Client] Timezone: ${timezone}`); + } + + return currentSession; +} + +/** + * End the current crawl session + */ +export function endSession(): void { + if (currentSession) { + const duration = Math.round((Date.now() - currentSession.startedAt.getTime()) / 1000); + console.log(`[Dutchie Client] Ended session ${currentSession.sessionId} (${duration}s)`); + currentSession = null; + } +} + +/** + * Get current active session + */ +export function getCurrentSession(): CrawlSession | null { + return currentSession; +} + // ============================================================ // CURL HTTP CLIENT // ============================================================ diff --git a/backend/src/platforms/dutchie/index.ts b/backend/src/platforms/dutchie/index.ts index 0c69264c..07efbbbe 100644 --- a/backend/src/platforms/dutchie/index.ts +++ b/backend/src/platforms/dutchie/index.ts @@ -18,6 +18,13 @@ export { getFingerprint, rotateFingerprint, resetFingerprint, + getRandomFingerprint, + getLocaleForTimezone, + + // Session Management (per-store fingerprint rotation) + startSession, + endSession, + getCurrentSession, // Proxy setProxy, @@ -32,6 +39,7 @@ export { // Types type CurlResponse, type Fingerprint, + type CrawlSession, type ExecuteGraphQLOptions, type FetchPageOptions, } from './client'; diff --git a/backend/src/routes/dispensaries.ts b/backend/src/routes/dispensaries.ts index 175addc1..7cc5b58d 100644 --- a/backend/src/routes/dispensaries.ts +++ b/backend/src/routes/dispensaries.ts @@ -8,10 +8,12 @@ router.use(authMiddleware); // Valid menu_type values const VALID_MENU_TYPES = ['dutchie', 'treez', 'jane', 'weedmaps', 'leafly', 'meadow', 'blaze', 'flowhub', 'dispense', 'cova', 'other', 'unknown']; -// Get all dispensaries +// Get all dispensaries (with pagination) router.get('/', async (req, res) => { try { - const { menu_type, city, state, crawl_enabled, dutchie_verified } = req.query; + const { menu_type, city, state, crawl_enabled, dutchie_verified, limit, offset, search } = req.query; + const pageLimit = Math.min(parseInt(limit as string) || 50, 500); + const pageOffset = parseInt(offset as string) || 0; let query = ` SELECT @@ -98,15 +100,34 @@ router.get('/', async (req, res) => { } } - if (conditions.length > 0) { - query += ` WHERE ${conditions.join(' AND ')}`; + // Search filter (name, dba_name, city, company_name) + if (search) { + conditions.push(`(name ILIKE $${params.length + 1} OR dba_name ILIKE $${params.length + 1} OR city ILIKE $${params.length + 1})`); + params.push(`%${search}%`); } + // Build WHERE clause + const whereClause = conditions.length > 0 ? ` WHERE ${conditions.join(' AND ')}` : ''; + + // Get total count first + const countResult = await pool.query(`SELECT COUNT(*) FROM dispensaries${whereClause}`, params); + const total = parseInt(countResult.rows[0].count); + + // Add pagination + query += whereClause; query += ` ORDER BY name`; + query += ` LIMIT $${params.length + 1} OFFSET $${params.length + 2}`; + params.push(pageLimit, pageOffset); const result = await pool.query(query, params); - res.json({ dispensaries: result.rows, total: result.rowCount }); + res.json({ + dispensaries: result.rows, + total, + limit: pageLimit, + offset: pageOffset, + hasMore: pageOffset + result.rows.length < total + }); } catch (error) { console.error('Error fetching dispensaries:', error); res.status(500).json({ error: 'Failed to fetch dispensaries' }); diff --git a/backend/src/routes/image-proxy.ts b/backend/src/routes/image-proxy.ts new file mode 100644 index 00000000..b36d4716 --- /dev/null +++ b/backend/src/routes/image-proxy.ts @@ -0,0 +1,214 @@ +/** + * Image Proxy Route + * + * On-demand image resizing service. Serves images with URL-based transforms. + * + * Usage: + * /img/?w=200&h=200&q=80&fit=cover + * + * Parameters: + * w - width (pixels) + * h - height (pixels) + * q - quality (1-100, default 80) + * fit - resize fit: cover, contain, fill, inside, outside (default: inside) + * blur - blur sigma (0.3-1000) + * gray - grayscale (1 = enabled) + * format - output format: webp, jpeg, png, avif (default: webp) + * + * Examples: + * /img/products/az/store/brand/product/image.webp?w=200 + * /img/products/az/store/brand/product/image.webp?w=600&h=400&fit=cover + * /img/products/az/store/brand/product/image.webp?w=100&blur=5&gray=1 + */ + +import { Router, Request, Response } from 'express'; +import * as fs from 'fs/promises'; +import * as path from 'path'; +// @ts-ignore +const sharp = require('sharp'); + +const router = Router(); + +// Base path for images +function getImagesBasePath(): string { + if (process.env.IMAGES_PATH) { + return process.env.IMAGES_PATH; + } + if (process.env.STORAGE_BASE_PATH) { + return path.join(process.env.STORAGE_BASE_PATH, 'images'); + } + return './storage/images'; +} + +const IMAGES_BASE_PATH = getImagesBasePath(); + +// Allowed fit modes +const ALLOWED_FITS = ['cover', 'contain', 'fill', 'inside', 'outside'] as const; +type FitMode = typeof ALLOWED_FITS[number]; + +// Allowed formats +const ALLOWED_FORMATS = ['webp', 'jpeg', 'jpg', 'png', 'avif'] as const; +type OutputFormat = typeof ALLOWED_FORMATS[number]; + +// Cache headers (1 year for immutable content-addressed images) +const CACHE_MAX_AGE = 31536000; // 1 year in seconds + +interface TransformParams { + width?: number; + height?: number; + quality: number; + fit: FitMode; + blur?: number; + grayscale: boolean; + format: OutputFormat; +} + +function parseTransformParams(query: any): TransformParams { + return { + width: query.w ? Math.min(Math.max(parseInt(query.w, 10), 1), 4000) : undefined, + height: query.h ? Math.min(Math.max(parseInt(query.h, 10), 1), 4000) : undefined, + quality: query.q ? Math.min(Math.max(parseInt(query.q, 10), 1), 100) : 80, + fit: ALLOWED_FITS.includes(query.fit) ? query.fit : 'inside', + blur: query.blur ? Math.min(Math.max(parseFloat(query.blur), 0.3), 1000) : undefined, + grayscale: query.gray === '1' || query.grayscale === '1', + format: ALLOWED_FORMATS.includes(query.format) ? query.format : 'webp', + }; +} + +function getContentType(format: OutputFormat): string { + switch (format) { + case 'jpeg': + case 'jpg': + return 'image/jpeg'; + case 'png': + return 'image/png'; + case 'avif': + return 'image/avif'; + case 'webp': + default: + return 'image/webp'; + } +} + +/** + * Image proxy endpoint + * GET /img/* + */ +router.get('/*', async (req: Request, res: Response) => { + try { + // Get the image path from URL (everything after /img/) + const imagePath = req.params[0]; + + if (!imagePath) { + return res.status(400).json({ error: 'Image path required' }); + } + + // Security: prevent directory traversal + const normalizedPath = path.normalize(imagePath).replace(/^(\.\.(\/|\\|$))+/, ''); + const basePath = path.resolve(IMAGES_BASE_PATH); + const fullPath = path.resolve(path.join(IMAGES_BASE_PATH, normalizedPath)); + + // Ensure path is within base directory + if (!fullPath.startsWith(basePath)) { + console.error(`[ImageProxy] Path traversal attempt: ${fullPath} not in ${basePath}`); + return res.status(403).json({ error: 'Access denied' }); + } + + // Check if file exists + try { + await fs.access(fullPath); + } catch { + return res.status(404).json({ error: 'Image not found' }); + } + + // Parse transform parameters + const params = parseTransformParams(req.query); + + // Check if any transforms are requested + const hasTransforms = params.width || params.height || params.blur || params.grayscale; + + // Read the original image + const imageBuffer = await fs.readFile(fullPath); + + let outputBuffer: Buffer; + + if (hasTransforms) { + // Apply transforms + let pipeline = sharp(imageBuffer); + + // Resize + if (params.width || params.height) { + pipeline = pipeline.resize(params.width, params.height, { + fit: params.fit, + withoutEnlargement: true, + }); + } + + // Blur + if (params.blur) { + pipeline = pipeline.blur(params.blur); + } + + // Grayscale + if (params.grayscale) { + pipeline = pipeline.grayscale(); + } + + // Output format + switch (params.format) { + case 'jpeg': + case 'jpg': + pipeline = pipeline.jpeg({ quality: params.quality }); + break; + case 'png': + pipeline = pipeline.png({ quality: params.quality }); + break; + case 'avif': + pipeline = pipeline.avif({ quality: params.quality }); + break; + case 'webp': + default: + pipeline = pipeline.webp({ quality: params.quality }); + } + + outputBuffer = await pipeline.toBuffer(); + } else { + // No transforms - serve original (but maybe convert format) + if (params.format !== 'webp' || params.quality !== 80) { + let pipeline = sharp(imageBuffer); + switch (params.format) { + case 'jpeg': + case 'jpg': + pipeline = pipeline.jpeg({ quality: params.quality }); + break; + case 'png': + pipeline = pipeline.png({ quality: params.quality }); + break; + case 'avif': + pipeline = pipeline.avif({ quality: params.quality }); + break; + case 'webp': + default: + pipeline = pipeline.webp({ quality: params.quality }); + } + outputBuffer = await pipeline.toBuffer(); + } else { + outputBuffer = imageBuffer; + } + } + + // Set headers + res.setHeader('Content-Type', getContentType(params.format)); + res.setHeader('Cache-Control', `public, max-age=${CACHE_MAX_AGE}, immutable`); + res.setHeader('X-Image-Size', outputBuffer.length); + + // Send image + res.send(outputBuffer); + + } catch (error: any) { + console.error('[ImageProxy] Error:', error.message); + res.status(500).json({ error: 'Failed to process image' }); + } +}); + +export default router; diff --git a/backend/src/routes/version.ts b/backend/src/routes/version.ts index 1ef1d3de..a15bc203 100644 --- a/backend/src/routes/version.ts +++ b/backend/src/routes/version.ts @@ -8,11 +8,13 @@ const router = Router(); */ router.get('/', async (req: Request, res: Response) => { try { + const gitSha = process.env.APP_GIT_SHA || 'unknown'; const versionInfo = { - build_version: process.env.APP_BUILD_VERSION || 'dev', - git_sha: process.env.APP_GIT_SHA || 'local', + build_version: process.env.APP_BUILD_VERSION?.slice(0, 8) || 'dev', + git_sha: gitSha.slice(0, 8) || 'unknown', + git_sha_full: gitSha, build_time: process.env.APP_BUILD_TIME || new Date().toISOString(), - image_tag: process.env.CONTAINER_IMAGE_TAG || 'local', + image_tag: process.env.CONTAINER_IMAGE_TAG?.slice(0, 8) || 'local', }; res.json(versionInfo); diff --git a/backend/src/scripts/crawl-single-store.ts b/backend/src/scripts/crawl-single-store.ts new file mode 100644 index 00000000..5d5ae14c --- /dev/null +++ b/backend/src/scripts/crawl-single-store.ts @@ -0,0 +1,250 @@ +#!/usr/bin/env npx tsx +/** + * Crawl Single Store - Verbose test showing each step + * + * Usage: + * DATABASE_URL="postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \ + * npx tsx src/scripts/crawl-single-store.ts + * + * Example: + * DATABASE_URL="..." npx tsx src/scripts/crawl-single-store.ts 112 + */ + +import { Pool } from 'pg'; +import dotenv from 'dotenv'; +import { + executeGraphQL, + startSession, + endSession, + getFingerprint, + GRAPHQL_HASHES, + DUTCHIE_CONFIG, +} from '../platforms/dutchie'; + +dotenv.config(); + +// ============================================================ +// DATABASE CONNECTION +// ============================================================ + +function getConnectionString(): string { + if (process.env.DATABASE_URL) { + return process.env.DATABASE_URL; + } + if (process.env.CANNAIQ_DB_URL) { + return process.env.CANNAIQ_DB_URL; + } + const host = process.env.CANNAIQ_DB_HOST || 'localhost'; + const port = process.env.CANNAIQ_DB_PORT || '54320'; + const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus'; + const user = process.env.CANNAIQ_DB_USER || 'dutchie'; + const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass'; + return `postgresql://${user}:${pass}@${host}:${port}/${name}`; +} + +const pool = new Pool({ connectionString: getConnectionString() }); + +// ============================================================ +// MAIN +// ============================================================ + +async function main() { + const dispensaryId = parseInt(process.argv[2], 10); + + if (!dispensaryId) { + console.error('Usage: npx tsx src/scripts/crawl-single-store.ts '); + console.error('Example: npx tsx src/scripts/crawl-single-store.ts 112'); + process.exit(1); + } + + console.log(''); + console.log('╔════════════════════════════════════════════════════════════╗'); + console.log('║ SINGLE STORE CRAWL - VERBOSE OUTPUT ║'); + console.log('╚════════════════════════════════════════════════════════════╝'); + console.log(''); + + try { + // ============================================================ + // STEP 1: Get dispensary info from database + // ============================================================ + console.log('┌─────────────────────────────────────────────────────────────┐'); + console.log('│ STEP 1: Load Dispensary Info from Database │'); + console.log('└─────────────────────────────────────────────────────────────┘'); + + const dispResult = await pool.query(` + SELECT + id, + name, + platform_dispensary_id, + menu_url, + menu_type, + city, + state + FROM dispensaries + WHERE id = $1 + `, [dispensaryId]); + + if (dispResult.rows.length === 0) { + throw new Error(`Dispensary ${dispensaryId} not found`); + } + + const disp = dispResult.rows[0]; + console.log(` Dispensary ID: ${disp.id}`); + console.log(` Name: ${disp.name}`); + console.log(` City, State: ${disp.city}, ${disp.state}`); + console.log(` Menu Type: ${disp.menu_type}`); + console.log(` Platform ID: ${disp.platform_dispensary_id}`); + console.log(` Menu URL: ${disp.menu_url}`); + + if (!disp.platform_dispensary_id) { + throw new Error('Dispensary does not have a platform_dispensary_id - cannot crawl'); + } + + // Extract cName from menu_url + const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/); + const cName = cNameMatch ? cNameMatch[1] : 'dispensary'; + console.log(` cName (derived): ${cName}`); + console.log(''); + + // ============================================================ + // STEP 2: Start stealth session + // ============================================================ + console.log('┌─────────────────────────────────────────────────────────────┐'); + console.log('│ STEP 2: Start Stealth Session │'); + console.log('└─────────────────────────────────────────────────────────────┘'); + + // Use Arizona timezone for this store + const session = startSession(disp.state || 'AZ', 'America/Phoenix'); + + const fp = getFingerprint(); + console.log(` Session ID: ${session.sessionId}`); + console.log(` User-Agent: ${fp.userAgent.slice(0, 60)}...`); + console.log(` Accept-Language: ${fp.acceptLanguage}`); + console.log(` Sec-CH-UA: ${fp.secChUa || '(not set)'}`); + console.log(''); + + // ============================================================ + // STEP 3: Execute GraphQL query + // ============================================================ + console.log('┌─────────────────────────────────────────────────────────────┐'); + console.log('│ STEP 3: Execute GraphQL Query (FilteredProducts) │'); + console.log('└─────────────────────────────────────────────────────────────┘'); + + const variables = { + includeEnterpriseSpecials: false, + productsFilter: { + dispensaryId: disp.platform_dispensary_id, + pricingType: 'rec', + Status: 'Active', + types: [], + useCache: true, + isDefaultSort: true, + sortBy: 'popularSortIdx', + sortDirection: 1, + bypassOnlineThresholds: true, + isKioskMenu: false, + removeProductsBelowOptionThresholds: false, + }, + page: 0, + perPage: 100, + }; + + console.log(` Endpoint: ${DUTCHIE_CONFIG.graphqlEndpoint}`); + console.log(` Operation: FilteredProducts`); + console.log(` Hash: ${GRAPHQL_HASHES.FilteredProducts.slice(0, 20)}...`); + console.log(` dispensaryId: ${variables.productsFilter.dispensaryId}`); + console.log(` pricingType: ${variables.productsFilter.pricingType}`); + console.log(` Status: ${variables.productsFilter.Status}`); + console.log(` perPage: ${variables.perPage}`); + console.log(''); + console.log(' Sending request...'); + + const startTime = Date.now(); + const result = await executeGraphQL( + 'FilteredProducts', + variables, + GRAPHQL_HASHES.FilteredProducts, + { cName, maxRetries: 3 } + ); + const elapsed = Date.now() - startTime; + + console.log(` Response time: ${elapsed}ms`); + console.log(''); + + // ============================================================ + // STEP 4: Process response + // ============================================================ + console.log('┌─────────────────────────────────────────────────────────────┐'); + console.log('│ STEP 4: Process Response │'); + console.log('└─────────────────────────────────────────────────────────────┘'); + + const data = result?.data?.filteredProducts; + if (!data) { + console.log(' ERROR: No data returned from GraphQL'); + console.log(' Raw result:', JSON.stringify(result, null, 2).slice(0, 500)); + endSession(); + return; + } + + const products = data.products || []; + const totalCount = data.queryInfo?.totalCount || 0; + const totalPages = Math.ceil(totalCount / 100); + + console.log(` Total products: ${totalCount}`); + console.log(` Products in page: ${products.length}`); + console.log(` Total pages: ${totalPages}`); + console.log(''); + + // Show first few products + console.log(' First 5 products:'); + console.log(' ─────────────────────────────────────────────────────────'); + for (let i = 0; i < Math.min(5, products.length); i++) { + const p = products[i]; + const name = (p.name || 'Unknown').slice(0, 40); + const brand = (p.brand?.name || 'Unknown').slice(0, 15); + const price = p.Prices?.[0]?.price || p.medPrice || p.recPrice || 'N/A'; + const category = p.type || p.category || 'N/A'; + console.log(` ${i + 1}. ${name.padEnd(42)} | ${brand.padEnd(17)} | $${price}`); + } + console.log(''); + + // ============================================================ + // STEP 5: End session + // ============================================================ + console.log('┌─────────────────────────────────────────────────────────────┐'); + console.log('│ STEP 5: End Session │'); + console.log('└─────────────────────────────────────────────────────────────┘'); + + endSession(); + console.log(''); + + // ============================================================ + // SUMMARY + // ============================================================ + console.log('╔════════════════════════════════════════════════════════════╗'); + console.log('║ SUMMARY ║'); + console.log('╠════════════════════════════════════════════════════════════╣'); + console.log(`║ Store: ${disp.name.slice(0, 38).padEnd(38)} ║`); + console.log(`║ Products Found: ${String(totalCount).padEnd(38)} ║`); + console.log(`║ Response Time: ${(elapsed + 'ms').padEnd(38)} ║`); + console.log(`║ Status: ${'SUCCESS'.padEnd(38)} ║`); + console.log('╚════════════════════════════════════════════════════════════╝'); + + } catch (error: any) { + console.error(''); + console.error('╔════════════════════════════════════════════════════════════╗'); + console.error('║ ERROR ║'); + console.error('╚════════════════════════════════════════════════════════════╝'); + console.error(` ${error.message}`); + if (error.stack) { + console.error(''); + console.error('Stack trace:'); + console.error(error.stack.split('\n').slice(0, 5).join('\n')); + } + process.exit(1); + } finally { + await pool.end(); + } +} + +main(); diff --git a/backend/src/scripts/test-crawl-to-canonical.ts b/backend/src/scripts/test-crawl-to-canonical.ts index 752b9e4b..79590a39 100644 --- a/backend/src/scripts/test-crawl-to-canonical.ts +++ b/backend/src/scripts/test-crawl-to-canonical.ts @@ -23,6 +23,7 @@ import { DutchieNormalizer, hydrateToCanonical, } from '../hydration'; +import { initializeImageStorage } from '../utils/image-storage'; dotenv.config(); @@ -137,6 +138,11 @@ async function main() { console.log(`Test Crawl to Canonical - Dispensary ${dispensaryId}`); console.log('============================================================\n'); + // Initialize image storage + console.log('[Init] Initializing image storage...'); + await initializeImageStorage(); + console.log(' Image storage ready\n'); + try { // Step 1: Get dispensary info console.log('[Step 1] Getting dispensary info...'); diff --git a/backend/src/scripts/test-image-download.ts b/backend/src/scripts/test-image-download.ts new file mode 100644 index 00000000..8ef1f143 --- /dev/null +++ b/backend/src/scripts/test-image-download.ts @@ -0,0 +1,268 @@ +#!/usr/bin/env npx tsx +/** + * Test Image Download - Tests image downloading with a small batch of products + * + * Usage: + * DATABASE_URL="postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \ + * STORAGE_DRIVER=local STORAGE_BASE_PATH=./storage \ + * npx tsx src/scripts/test-image-download.ts [limit] + * + * Example: + * DATABASE_URL="..." npx tsx src/scripts/test-image-download.ts 112 5 + */ + +import { Pool } from 'pg'; +import dotenv from 'dotenv'; +import { + executeGraphQL, + startSession, + endSession, + GRAPHQL_HASHES, +} from '../platforms/dutchie'; +import { DutchieNormalizer } from '../hydration/normalizers/dutchie'; +import { hydrateToCanonical } from '../hydration/canonical-upsert'; +import { initializeImageStorage, getStorageStats } from '../utils/image-storage'; + +dotenv.config(); + +// ============================================================ +// DATABASE CONNECTION +// ============================================================ + +function getConnectionString(): string { + if (process.env.DATABASE_URL) { + return process.env.DATABASE_URL; + } + const host = process.env.CANNAIQ_DB_HOST || 'localhost'; + const port = process.env.CANNAIQ_DB_PORT || '54320'; + const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus'; + const user = process.env.CANNAIQ_DB_USER || 'dutchie'; + const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass'; + return `postgresql://${user}:${pass}@${host}:${port}/${name}`; +} + +const pool = new Pool({ connectionString: getConnectionString() }); + +// ============================================================ +// MAIN +// ============================================================ + +async function main() { + const dispensaryId = parseInt(process.argv[2], 10); + const limit = parseInt(process.argv[3], 10) || 5; + + if (!dispensaryId) { + console.error('Usage: npx tsx src/scripts/test-image-download.ts [limit]'); + console.error('Example: npx tsx src/scripts/test-image-download.ts 112 5'); + process.exit(1); + } + + console.log(''); + console.log('╔════════════════════════════════════════════════════════════╗'); + console.log('║ IMAGE DOWNLOAD TEST ║'); + console.log('╚════════════════════════════════════════════════════════════╝'); + console.log(''); + + try { + // Initialize image storage + console.log('┌─────────────────────────────────────────────────────────────┐'); + console.log('│ STEP 1: Initialize Image Storage │'); + console.log('└─────────────────────────────────────────────────────────────┘'); + await initializeImageStorage(); + const statsBefore = await getStorageStats(); + console.log(` Base path: ${statsBefore.basePath}`); + console.log(` Products before: ${statsBefore.productCount}`); + console.log(` Brands before: ${statsBefore.brandCount}`); + console.log(''); + + // Get dispensary info + console.log('┌─────────────────────────────────────────────────────────────┐'); + console.log('│ STEP 2: Load Dispensary Info │'); + console.log('└─────────────────────────────────────────────────────────────┘'); + + const dispResult = await pool.query(` + SELECT + id, name, platform_dispensary_id, menu_url, state, slug + FROM dispensaries + WHERE id = $1 + `, [dispensaryId]); + + if (dispResult.rows.length === 0) { + throw new Error(`Dispensary ${dispensaryId} not found`); + } + + const disp = dispResult.rows[0]; + console.log(` Dispensary: ${disp.name}`); + console.log(` State: ${disp.state}`); + console.log(` Slug: ${disp.slug}`); + console.log(` Platform ID: ${disp.platform_dispensary_id}`); + console.log(''); + + // Delete some existing store_products to force "new" products + console.log('┌─────────────────────────────────────────────────────────────┐'); + console.log('│ STEP 3: Clear Store Products (to test new product flow) │'); + console.log('└─────────────────────────────────────────────────────────────┘'); + + const deleteResult = await pool.query(` + DELETE FROM store_products + WHERE dispensary_id = $1 + RETURNING id + `, [dispensaryId]); + console.log(` Deleted ${deleteResult.rowCount} existing store_products`); + console.log(''); + + // Fetch products from Dutchie + console.log('┌─────────────────────────────────────────────────────────────┐'); + console.log('│ STEP 4: Fetch Products from Dutchie (limited) │'); + console.log('└─────────────────────────────────────────────────────────────┘'); + + const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/); + const cName = cNameMatch ? cNameMatch[1] : 'dispensary'; + + const session = startSession(disp.state || 'AZ', 'America/Phoenix'); + console.log(` Session ID: ${session.sessionId}`); + console.log(` cName: ${cName}`); + console.log(` Limit: ${limit} products`); + + const variables = { + includeEnterpriseSpecials: false, + productsFilter: { + dispensaryId: disp.platform_dispensary_id, + pricingType: 'rec', + Status: 'Active', + types: [], + useCache: true, + isDefaultSort: true, + sortBy: 'popularSortIdx', + sortDirection: 1, + bypassOnlineThresholds: true, + isKioskMenu: false, + removeProductsBelowOptionThresholds: false, + }, + page: 0, + perPage: limit, // Only fetch limited products + }; + + const startTime = Date.now(); + const result = await executeGraphQL( + 'FilteredProducts', + variables, + GRAPHQL_HASHES.FilteredProducts, + { cName, maxRetries: 3 } + ); + const elapsed = Date.now() - startTime; + + endSession(); + + const products = result?.data?.filteredProducts?.products || []; + console.log(` Fetched: ${products.length} products in ${elapsed}ms`); + + // Show products with images + console.log(''); + console.log(' Products with images:'); + for (let i = 0; i < products.length; i++) { + const p = products[i]; + const hasImage = !!p.Image; + const brandName = p.brand?.name || 'Unknown'; + console.log(` ${i + 1}. ${p.name?.slice(0, 40).padEnd(42)} | ${brandName.slice(0, 15).padEnd(17)} | ${hasImage ? '✓ has image' : '✗ no image'}`); + } + console.log(''); + + // Normalize and hydrate + console.log('┌─────────────────────────────────────────────────────────────┐'); + console.log('│ STEP 5: Normalize and Hydrate (with image download) │'); + console.log('└─────────────────────────────────────────────────────────────┘'); + + const normalizer = new DutchieNormalizer(); + // Wrap products in expected payload format + const payload = { + raw_json: products, // DutchieNormalizer.extractProducts handles arrays + dispensary_id: dispensaryId, + }; + const normResult = normalizer.normalize(payload); + console.log(` Normalized products: ${normResult.products.length}`); + console.log(` Brands found: ${normResult.brands.length}`); + + const hydrateStart = Date.now(); + const hydrateResult = await hydrateToCanonical( + pool, + dispensaryId, + normResult, + null, // no crawl run ID for test + { dryRun: false, downloadImages: true } + ); + const hydrateElapsed = Date.now() - hydrateStart; + + console.log(''); + console.log(` Hydration time: ${hydrateElapsed}ms`); + console.log(` Products new: ${hydrateResult.productsNew}`); + console.log(` Products updated: ${hydrateResult.productsUpdated}`); + console.log(` Images downloaded: ${hydrateResult.imagesDownloaded}`); + console.log(` Images skipped: ${hydrateResult.imagesSkipped}`); + console.log(` Images failed: ${hydrateResult.imagesFailed}`); + console.log(` Image bytes: ${(hydrateResult.imagesBytesTotal / 1024).toFixed(1)} KB`); + console.log(''); + + // Check storage stats + console.log('┌─────────────────────────────────────────────────────────────┐'); + console.log('│ STEP 6: Verify Storage │'); + console.log('└─────────────────────────────────────────────────────────────┘'); + + const statsAfter = await getStorageStats(); + console.log(` Products after: ${statsAfter.productCount}`); + console.log(` Brands after: ${statsAfter.brandCount}`); + console.log(` Total size: ${(statsAfter.totalSizeBytes / 1024).toFixed(1)} KB`); + console.log(''); + + // Check database for local_image_path + console.log('┌─────────────────────────────────────────────────────────────┐'); + console.log('│ STEP 7: Check Database for Local Image Paths │'); + console.log('└─────────────────────────────────────────────────────────────┘'); + + const dbCheck = await pool.query(` + SELECT + id, name_raw, local_image_path, images + FROM store_products + WHERE dispensary_id = $1 + LIMIT 10 + `, [dispensaryId]); + + for (const row of dbCheck.rows) { + const hasLocal = !!row.local_image_path; + const hasImages = !!row.images; + console.log(` ${row.id}: ${row.name_raw?.slice(0, 40).padEnd(42)} | local: ${hasLocal ? '✓' : '✗'} | images: ${hasImages ? '✓' : '✗'}`); + if (row.local_image_path) { + console.log(` → ${row.local_image_path}`); + } + } + console.log(''); + + // Summary + console.log('╔════════════════════════════════════════════════════════════╗'); + console.log('║ SUMMARY ║'); + console.log('╠════════════════════════════════════════════════════════════╣'); + console.log(`║ Dispensary: ${disp.name.slice(0, 37).padEnd(37)} ║`); + console.log(`║ Products crawled: ${String(products.length).padEnd(37)} ║`); + console.log(`║ Images downloaded: ${String(hydrateResult.imagesDownloaded).padEnd(37)} ║`); + console.log(`║ Total image bytes: ${((hydrateResult.imagesBytesTotal / 1024).toFixed(1) + ' KB').padEnd(37)} ║`); + console.log(`║ Status: ${'SUCCESS'.padEnd(37)} ║`); + console.log('╚════════════════════════════════════════════════════════════╝'); + + } catch (error: any) { + console.error(''); + console.error('╔════════════════════════════════════════════════════════════╗'); + console.error('║ ERROR ║'); + console.error('╚════════════════════════════════════════════════════════════╝'); + console.error(` ${error.message}`); + if (error.stack) { + console.error(''); + console.error('Stack trace:'); + console.error(error.stack.split('\n').slice(0, 5).join('\n')); + } + process.exit(1); + } finally { + await pool.end(); + } +} + +main(); diff --git a/backend/src/scripts/test-image-proxy.ts b/backend/src/scripts/test-image-proxy.ts new file mode 100644 index 00000000..fd463139 --- /dev/null +++ b/backend/src/scripts/test-image-proxy.ts @@ -0,0 +1,80 @@ +#!/usr/bin/env npx tsx +/** + * Test Image Proxy - Standalone test without backend + * + * Usage: + * npx tsx src/scripts/test-image-proxy.ts + */ + +import express from 'express'; +import imageProxyRoutes from '../routes/image-proxy'; + +const app = express(); +const PORT = 3099; + +// Mount the image proxy +app.use('/img', imageProxyRoutes); + +// Start server +app.listen(PORT, async () => { + console.log(`Test image proxy running on http://localhost:${PORT}`); + console.log(''); + console.log('Testing image proxy...'); + console.log(''); + + const axios = require('axios'); + + // Test cases + const tests = [ + { + name: 'Original image', + url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp', + }, + { + name: 'Resize to 200px width', + url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200', + }, + { + name: 'Resize to 100x100 cover', + url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=100&h=100&fit=cover', + }, + { + name: 'Grayscale + blur', + url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200&gray=1&blur=2', + }, + { + name: 'Convert to JPEG', + url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200&format=jpeg&q=70', + }, + { + name: 'Non-existent image', + url: '/img/products/az/nonexistent/image.webp', + }, + ]; + + for (const test of tests) { + try { + const response = await axios.get(`http://localhost:${PORT}${test.url}`, { + responseType: 'arraybuffer', + validateStatus: () => true, + }); + + const contentType = response.headers['content-type']; + const size = response.data.length; + const status = response.status; + + console.log(`${test.name}:`); + console.log(` URL: ${test.url.slice(0, 80)}${test.url.length > 80 ? '...' : ''}`); + console.log(` Status: ${status}`); + console.log(` Content-Type: ${contentType}`); + console.log(` Size: ${(size / 1024).toFixed(1)} KB`); + console.log(''); + } catch (error: any) { + console.log(`${test.name}: ERROR - ${error.message}`); + console.log(''); + } + } + + console.log('Tests complete!'); + process.exit(0); +}); diff --git a/backend/src/scripts/test-stealth-session.ts b/backend/src/scripts/test-stealth-session.ts new file mode 100644 index 00000000..289f5a96 --- /dev/null +++ b/backend/src/scripts/test-stealth-session.ts @@ -0,0 +1,117 @@ +/** + * Test script for stealth session management + * + * Tests: + * 1. Per-session fingerprint rotation + * 2. Geographic consistency (timezone → Accept-Language) + * 3. Proxy location loading from database + * + * Usage: + * npx tsx src/scripts/test-stealth-session.ts + */ + +import { + startSession, + endSession, + getCurrentSession, + getFingerprint, + getRandomFingerprint, + getLocaleForTimezone, + buildHeaders, +} from '../platforms/dutchie'; + +console.log('='.repeat(60)); +console.log('STEALTH SESSION TEST'); +console.log('='.repeat(60)); + +// Test 1: Timezone to Locale mapping +console.log('\n[Test 1] Timezone to Locale Mapping:'); +const testTimezones = [ + 'America/Phoenix', + 'America/Los_Angeles', + 'America/New_York', + 'America/Chicago', + undefined, + 'Invalid/Timezone', +]; + +for (const tz of testTimezones) { + const locale = getLocaleForTimezone(tz); + console.log(` ${tz || '(undefined)'} → ${locale}`); +} + +// Test 2: Random fingerprint selection +console.log('\n[Test 2] Random Fingerprint Selection (5 samples):'); +for (let i = 0; i < 5; i++) { + const fp = getRandomFingerprint(); + console.log(` ${i + 1}. ${fp.userAgent.slice(0, 60)}...`); +} + +// Test 3: Session Management +console.log('\n[Test 3] Session Management:'); + +// Before session - should use default fingerprint +console.log(' Before session:'); +const beforeFp = getFingerprint(); +console.log(` getFingerprint(): ${beforeFp.userAgent.slice(0, 50)}...`); +console.log(` getCurrentSession(): ${getCurrentSession()}`); + +// Start session with Arizona timezone +console.log('\n Starting session (AZ, America/Phoenix):'); +const session1 = startSession('AZ', 'America/Phoenix'); +console.log(` Session ID: ${session1.sessionId}`); +console.log(` Fingerprint UA: ${session1.fingerprint.userAgent.slice(0, 50)}...`); +console.log(` Accept-Language: ${session1.fingerprint.acceptLanguage}`); +console.log(` Timezone: ${session1.timezone}`); + +// During session - should use session fingerprint +console.log('\n During session:'); +const duringFp = getFingerprint(); +console.log(` getFingerprint(): ${duringFp.userAgent.slice(0, 50)}...`); +console.log(` Same as session? ${duringFp.userAgent === session1.fingerprint.userAgent}`); + +// Test buildHeaders with session +console.log('\n buildHeaders() during session:'); +const headers = buildHeaders('/embedded-menu/test-store'); +console.log(` User-Agent: ${headers['user-agent'].slice(0, 50)}...`); +console.log(` Accept-Language: ${headers['accept-language']}`); +console.log(` Origin: ${headers['origin']}`); +console.log(` Referer: ${headers['referer']}`); + +// End session +console.log('\n Ending session:'); +endSession(); +console.log(` getCurrentSession(): ${getCurrentSession()}`); + +// Test 4: Multiple sessions should have different fingerprints +console.log('\n[Test 4] Multiple Sessions (fingerprint variety):'); +const fingerprints: string[] = []; +for (let i = 0; i < 10; i++) { + const session = startSession('CA', 'America/Los_Angeles'); + fingerprints.push(session.fingerprint.userAgent); + endSession(); +} + +const uniqueCount = new Set(fingerprints).size; +console.log(` 10 sessions created, ${uniqueCount} unique fingerprints`); +console.log(` Variety: ${uniqueCount >= 3 ? '✅ Good' : '⚠️ Low - may need more fingerprint options'}`); + +// Test 5: Geographic consistency check +console.log('\n[Test 5] Geographic Consistency:'); +const geoTests = [ + { state: 'AZ', tz: 'America/Phoenix' }, + { state: 'CA', tz: 'America/Los_Angeles' }, + { state: 'NY', tz: 'America/New_York' }, + { state: 'IL', tz: 'America/Chicago' }, +]; + +for (const { state, tz } of geoTests) { + const session = startSession(state, tz); + const consistent = session.fingerprint.acceptLanguage.includes('en-US'); + console.log(` ${state} (${tz}): Accept-Language=${session.fingerprint.acceptLanguage} ${consistent ? '✅' : '❌'}`); + endSession(); +} + +console.log('\n' + '='.repeat(60)); +console.log('TEST COMPLETE'); +console.log('='.repeat(60)); diff --git a/backend/src/scripts/test-stealth-with-db.ts b/backend/src/scripts/test-stealth-with-db.ts new file mode 100644 index 00000000..1f5094f6 --- /dev/null +++ b/backend/src/scripts/test-stealth-with-db.ts @@ -0,0 +1,144 @@ +/** + * Test script for stealth session with REAL proxy data from database + * + * Tests: + * 1. Load proxies from database (with location data) + * 2. Verify location fields (city, state, timezone) are loaded + * 3. Start session with proxy's timezone + * 4. Verify Accept-Language matches timezone + * + * Usage: + * DATABASE_URL="postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" npx tsx src/scripts/test-stealth-with-db.ts + */ + +import { Pool } from 'pg'; +import { + CrawlRotator, + ProxyRotator, +} from '../services/crawl-rotator'; +import { + startSession, + endSession, + getLocaleForTimezone, +} from '../platforms/dutchie'; + +const DATABASE_URL = process.env.DATABASE_URL; + +if (!DATABASE_URL) { + console.error('ERROR: DATABASE_URL environment variable is required'); + process.exit(1); +} + +async function main() { + console.log('='.repeat(60)); + console.log('STEALTH SESSION TEST WITH DATABASE'); + console.log('='.repeat(60)); + + const pool = new Pool({ connectionString: DATABASE_URL }); + + try { + // Test 1: Load proxies with location data + console.log('\n[Test 1] Loading proxies from database...'); + const rotator = new CrawlRotator(pool); + await rotator.initialize(); + + const stats = rotator.proxy.getStats(); + console.log(` Total proxies: ${stats.totalProxies}`); + console.log(` Active proxies: ${stats.activeProxies}`); + + if (stats.activeProxies === 0) { + console.log('\n WARNING: No active proxies in database!'); + console.log(' Insert test proxies with:'); + console.log(` INSERT INTO proxies (host, port, protocol, city, state, country_code, timezone, active)`); + console.log(` VALUES ('proxy1.example.com', 8080, 'http', 'Phoenix', 'AZ', 'US', 'America/Phoenix', true);`); + return; + } + + // Test 2: Check location data on proxies + console.log('\n[Test 2] Checking proxy location data...'); + let proxyCount = 0; + let withLocationCount = 0; + + // Iterate through proxies + for (let i = 0; i < stats.totalProxies; i++) { + const proxy = rotator.proxy.getNext(); + if (!proxy) break; + + proxyCount++; + const hasLocation = !!(proxy.stateCode || proxy.timezone); + if (hasLocation) withLocationCount++; + + console.log(` Proxy ${proxy.id}: ${proxy.host}:${proxy.port}`); + console.log(` City: ${proxy.city || '(not set)'}`); + console.log(` State: ${proxy.stateCode || '(not set)'}`); + console.log(` Country: ${proxy.countryCode || '(not set)'}`); + console.log(` Timezone: ${proxy.timezone || '(not set)'}`); + console.log(` Has location data: ${hasLocation ? '✅' : '❌'}`); + } + + console.log(`\n Summary: ${withLocationCount}/${proxyCount} proxies have location data`); + + // Test 3: Start session using proxy's timezone + console.log('\n[Test 3] Starting session with proxy timezone...'); + + // Get first proxy with timezone + const firstProxy = rotator.proxy.getNext(); + if (firstProxy && firstProxy.timezone) { + console.log(` Using proxy: ${firstProxy.host} (${firstProxy.city}, ${firstProxy.stateCode})`); + console.log(` Proxy timezone: ${firstProxy.timezone}`); + + const session = startSession(firstProxy.stateCode, firstProxy.timezone); + console.log(` Session ID: ${session.sessionId}`); + console.log(` Session timezone: ${session.timezone}`); + console.log(` Session Accept-Language: ${session.fingerprint.acceptLanguage}`); + + // Verify Accept-Language matches expected locale for timezone + const expectedLocale = getLocaleForTimezone(firstProxy.timezone); + const matches = session.fingerprint.acceptLanguage === expectedLocale; + console.log(` Expected locale: ${expectedLocale}`); + console.log(` Locale matches: ${matches ? '✅' : '❌'}`); + + endSession(); + } else { + console.log(' WARNING: No proxy with timezone data found'); + } + + // Test 4: Test each timezone in database + console.log('\n[Test 4] Testing all proxy timezones...'); + const seenTimezones = new Set(); + + // Reset to beginning + for (let i = 0; i < stats.totalProxies; i++) { + const proxy = rotator.proxy.getNext(); + if (!proxy || !proxy.timezone) continue; + if (seenTimezones.has(proxy.timezone)) continue; + + seenTimezones.add(proxy.timezone); + const session = startSession(proxy.stateCode, proxy.timezone); + console.log(` ${proxy.timezone}:`); + console.log(` State: ${proxy.stateCode || 'unknown'}`); + console.log(` Accept-Language: ${session.fingerprint.acceptLanguage}`); + endSession(); + } + + console.log('\n' + '='.repeat(60)); + console.log('TEST COMPLETE'); + console.log('='.repeat(60)); + + if (withLocationCount === 0) { + console.log('\n⚠️ No proxies have location data.'); + console.log(' Geographic consistency will use default locale (en-US).'); + console.log(' To enable geo-consistency, populate city/state/timezone on proxies.'); + } else { + console.log('\n✅ Stealth session with geo-consistency is working!'); + console.log(' Sessions will use Accept-Language matching proxy timezone.'); + } + + } catch (error) { + console.error('Error:', error); + } finally { + await pool.end(); + } +} + +main(); diff --git a/backend/src/utils/image-storage.ts b/backend/src/utils/image-storage.ts index f99ac6d2..6818a6f1 100644 --- a/backend/src/utils/image-storage.ts +++ b/backend/src/utils/image-storage.ts @@ -1,26 +1,29 @@ /** * Local Image Storage Utility * - * Downloads and stores product images to local filesystem. - * Replaces MinIO-based storage with simple local file storage. + * Downloads and stores product images to local filesystem with proper hierarchy. * * Directory structure: - * /images/products//.webp - * /images/products//-thumb.webp - * /images/products//-medium.webp - * /images/brands/.webp + * /images/products/////image.webp + * /images/products/////image-medium.webp + * /images/products/////image-thumb.webp + * /images/brands//logo.webp + * + * This structure allows: + * - Easy migration to MinIO/S3 (bucket per state) + * - Browsing by state/store/brand + * - Multiple images per product (future: gallery) */ import axios from 'axios'; -import sharp from 'sharp'; +// @ts-ignore - sharp module typing quirk +const sharp = require('sharp'); import * as fs from 'fs/promises'; import * as path from 'path'; import { createHash } from 'crypto'; // Base path for image storage - configurable via env -// Uses project-relative paths by default, NOT /app or other privileged paths function getImagesBasePath(): string { - // Priority: IMAGES_PATH > STORAGE_BASE_PATH/images > ./storage/images if (process.env.IMAGES_PATH) { return process.env.IMAGES_PATH; } @@ -35,16 +38,28 @@ const IMAGES_BASE_PATH = getImagesBasePath(); const IMAGES_PUBLIC_URL = process.env.IMAGES_PUBLIC_URL || '/images'; export interface LocalImageSizes { - full: string; // URL path: /images/products/123/456.webp - medium: string; // URL path: /images/products/123/456-medium.webp - thumb: string; // URL path: /images/products/123/456-thumb.webp + original: string; // URL path to original image + // Legacy compatibility - all point to original until we add image proxy + full: string; + medium: string; + thumb: string; } export interface DownloadResult { success: boolean; urls?: LocalImageSizes; + localPaths?: LocalImageSizes; error?: string; bytesDownloaded?: number; + skipped?: boolean; // True if image already exists +} + +export interface ProductImageContext { + stateCode: string; // e.g., "AZ", "CA" + storeSlug: string; // e.g., "deeply-rooted" + brandSlug: string; // e.g., "high-west-farms" + productId: string; // External product ID + dispensaryId?: number; // For backwards compat } /** @@ -58,6 +73,17 @@ async function ensureDir(dirPath: string): Promise { } } +/** + * Sanitize a string for use in file paths + */ +function slugify(str: string): string { + return str + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, '') + .substring(0, 50) || 'unknown'; +} + /** * Generate a short hash from a URL for deduplication */ @@ -81,53 +107,30 @@ async function downloadImage(imageUrl: string): Promise { } /** - * Process and save image in multiple sizes - * Returns the file paths relative to IMAGES_BASE_PATH + * Process and save original image (convert to webp for consistency) + * + * We store only the original - resizing will be done on-demand via + * an image proxy service (imgproxy, thumbor, or similar) in the future. */ async function processAndSaveImage( buffer: Buffer, outputDir: string, baseFilename: string -): Promise<{ full: string; medium: string; thumb: string; totalBytes: number }> { +): Promise<{ original: string; totalBytes: number }> { await ensureDir(outputDir); - const fullPath = path.join(outputDir, `${baseFilename}.webp`); - const mediumPath = path.join(outputDir, `${baseFilename}-medium.webp`); - const thumbPath = path.join(outputDir, `${baseFilename}-thumb.webp`); + const originalPath = path.join(outputDir, `${baseFilename}.webp`); - // Process images in parallel - const [fullBuffer, mediumBuffer, thumbBuffer] = await Promise.all([ - // Full: max 1200x1200, high quality - sharp(buffer) - .resize(1200, 1200, { fit: 'inside', withoutEnlargement: true }) - .webp({ quality: 85 }) - .toBuffer(), - // Medium: 600x600 - sharp(buffer) - .resize(600, 600, { fit: 'inside', withoutEnlargement: true }) - .webp({ quality: 80 }) - .toBuffer(), - // Thumb: 200x200 - sharp(buffer) - .resize(200, 200, { fit: 'inside', withoutEnlargement: true }) - .webp({ quality: 75 }) - .toBuffer(), - ]); + // Convert to webp, preserve original dimensions, high quality + const originalBuffer = await sharp(buffer) + .webp({ quality: 90 }) + .toBuffer(); - // Save all sizes - await Promise.all([ - fs.writeFile(fullPath, fullBuffer), - fs.writeFile(mediumPath, mediumBuffer), - fs.writeFile(thumbPath, thumbBuffer), - ]); - - const totalBytes = fullBuffer.length + mediumBuffer.length + thumbBuffer.length; + await fs.writeFile(originalPath, originalBuffer); return { - full: fullPath, - medium: mediumPath, - thumb: thumbPath, - totalBytes, + original: originalPath, + totalBytes: originalBuffer.length, }; } @@ -135,47 +138,107 @@ async function processAndSaveImage( * Convert a file path to a public URL */ function pathToUrl(filePath: string): string { + // Find /products/ or /brands/ in the path and extract from there + const productsMatch = filePath.match(/(\/products\/.*)/); + const brandsMatch = filePath.match(/(\/brands\/.*)/); + + if (productsMatch) { + return `${IMAGES_PUBLIC_URL}${productsMatch[1]}`; + } + if (brandsMatch) { + return `${IMAGES_PUBLIC_URL}${brandsMatch[1]}`; + } + + // Fallback: try to replace base path (works if paths match exactly) const relativePath = filePath.replace(IMAGES_BASE_PATH, ''); return `${IMAGES_PUBLIC_URL}${relativePath}`; } /** - * Download and store a product image locally + * Build the directory path for a product image + * Structure: /images/products///// + */ +function buildProductImagePath(ctx: ProductImageContext): string { + const state = slugify(ctx.stateCode || 'unknown'); + const store = slugify(ctx.storeSlug || 'unknown'); + const brand = slugify(ctx.brandSlug || 'unknown'); + const product = slugify(ctx.productId || 'unknown'); + + return path.join(IMAGES_BASE_PATH, 'products', state, store, brand, product); +} + +/** + * Download and store a product image with proper hierarchy * * @param imageUrl - The third-party image URL to download - * @param dispensaryId - The dispensary ID (for directory organization) - * @param productId - The product ID or external ID (for filename) + * @param ctx - Product context (state, store, brand, product) + * @param options - Download options * @returns Download result with local URLs */ export async function downloadProductImage( imageUrl: string, - dispensaryId: number, - productId: string | number + ctx: ProductImageContext, + options: { skipIfExists?: boolean } = {} ): Promise { + const { skipIfExists = true } = options; + try { if (!imageUrl) { return { success: false, error: 'No image URL provided' }; } + const outputDir = buildProductImagePath(ctx); + const urlHash = hashUrl(imageUrl); + const baseFilename = `image-${urlHash}`; + + // Check if image already exists + if (skipIfExists) { + const existingPath = path.join(outputDir, `${baseFilename}.webp`); + try { + await fs.access(existingPath); + // Image exists, return existing URL + const url = pathToUrl(existingPath); + return { + success: true, + skipped: true, + urls: { + original: url, + full: url, + medium: url, + thumb: url, + }, + localPaths: { + original: existingPath, + full: existingPath, + medium: existingPath, + thumb: existingPath, + }, + }; + } catch { + // Image doesn't exist, continue to download + } + } + // Download the image const buffer = await downloadImage(imageUrl); - // Organize by dispensary ID - const outputDir = path.join(IMAGES_BASE_PATH, 'products', String(dispensaryId)); - - // Use product ID + URL hash for uniqueness - const urlHash = hashUrl(imageUrl); - const baseFilename = `${productId}-${urlHash}`; - - // Process and save + // Process and save (original only) const result = await processAndSaveImage(buffer, outputDir, baseFilename); + const url = pathToUrl(result.original); return { success: true, urls: { - full: pathToUrl(result.full), - medium: pathToUrl(result.medium), - thumb: pathToUrl(result.thumb), + original: url, + full: url, + medium: url, + thumb: url, + }, + localPaths: { + original: result.original, + full: result.original, + medium: result.original, + thumb: result.original, }, bytesDownloaded: result.totalBytes, }; @@ -188,33 +251,70 @@ export async function downloadProductImage( } /** - * Download and store a brand logo locally + * Legacy function - backwards compatible with old signature + * Maps to new hierarchy using dispensary_id as store identifier + */ +export async function downloadProductImageLegacy( + imageUrl: string, + dispensaryId: number, + productId: string | number +): Promise { + return downloadProductImage(imageUrl, { + stateCode: 'unknown', + storeSlug: `store-${dispensaryId}`, + brandSlug: 'unknown', + productId: String(productId), + dispensaryId, + }); +} + +/** + * Download and store a brand logo * * @param logoUrl - The brand logo URL - * @param brandId - The brand ID or slug + * @param brandSlug - The brand slug/ID * @returns Download result with local URL */ export async function downloadBrandLogo( logoUrl: string, - brandId: string + brandSlug: string, + options: { skipIfExists?: boolean } = {} ): Promise { + const { skipIfExists = true } = options; + try { if (!logoUrl) { return { success: false, error: 'No logo URL provided' }; } + const safeBrandSlug = slugify(brandSlug); + const outputDir = path.join(IMAGES_BASE_PATH, 'brands', safeBrandSlug); + const urlHash = hashUrl(logoUrl); + const baseFilename = `logo-${urlHash}`; + + // Check if logo already exists + if (skipIfExists) { + const existingPath = path.join(outputDir, `${baseFilename}.webp`); + try { + await fs.access(existingPath); + return { + success: true, + skipped: true, + urls: { + full: pathToUrl(existingPath), + medium: pathToUrl(existingPath), + thumb: pathToUrl(existingPath), + }, + }; + } catch { + // Logo doesn't exist, continue + } + } + // Download the image const buffer = await downloadImage(logoUrl); - // Brand logos go in /images/brands/ - const outputDir = path.join(IMAGES_BASE_PATH, 'brands'); - - // Sanitize brand ID for filename - const safeBrandId = brandId.replace(/[^a-zA-Z0-9-_]/g, '_'); - const urlHash = hashUrl(logoUrl); - const baseFilename = `${safeBrandId}-${urlHash}`; - - // Process and save (single size for logos) + // Brand logos in their own directory await ensureDir(outputDir); const logoPath = path.join(outputDir, `${baseFilename}.webp`); @@ -243,20 +343,16 @@ export async function downloadBrandLogo( } /** - * Check if a local image already exists + * Check if a product image already exists */ -export async function imageExists( - dispensaryId: number, - productId: string | number, +export async function productImageExists( + ctx: ProductImageContext, imageUrl: string ): Promise { + const outputDir = buildProductImagePath(ctx); const urlHash = hashUrl(imageUrl); - const imagePath = path.join( - IMAGES_BASE_PATH, - 'products', - String(dispensaryId), - `${productId}-${urlHash}.webp` - ); + const imagePath = path.join(outputDir, `image-${urlHash}.webp`); + try { await fs.access(imagePath); return true; @@ -266,24 +362,27 @@ export async function imageExists( } /** - * Delete a product's local images + * Get the local image URL for a product (if exists) */ -export async function deleteProductImages( - dispensaryId: number, - productId: string | number, - imageUrl?: string -): Promise { - const productDir = path.join(IMAGES_BASE_PATH, 'products', String(dispensaryId)); - const prefix = imageUrl - ? `${productId}-${hashUrl(imageUrl)}` - : String(productId); +export async function getProductImageUrl( + ctx: ProductImageContext, + imageUrl: string +): Promise { + const outputDir = buildProductImagePath(ctx); + const urlHash = hashUrl(imageUrl); + const imagePath = path.join(outputDir, `image-${urlHash}.webp`); try { - const files = await fs.readdir(productDir); - const toDelete = files.filter(f => f.startsWith(prefix)); - await Promise.all(toDelete.map(f => fs.unlink(path.join(productDir, f)))); + await fs.access(imagePath); + const url = pathToUrl(imagePath); + return { + original: url, + full: url, + medium: url, + thumb: url, + }; } catch { - // Directory might not exist, that's fine + return null; } } @@ -296,19 +395,17 @@ export function isImageStorageReady(): boolean { /** * Initialize the image storage directories - * Does NOT throw on failure - logs warning and continues */ export async function initializeImageStorage(): Promise { try { await ensureDir(path.join(IMAGES_BASE_PATH, 'products')); await ensureDir(path.join(IMAGES_BASE_PATH, 'brands')); - console.log(`✅ Image storage initialized at ${IMAGES_BASE_PATH}`); + console.log(`[ImageStorage] Initialized at ${IMAGES_BASE_PATH}`); imageStorageReady = true; } catch (error: any) { - console.warn(`⚠️ WARNING: Could not initialize image storage at ${IMAGES_BASE_PATH}: ${error.message}`); - console.warn(' Image upload/processing is disabled. Server will continue without image features.'); + console.warn(`[ImageStorage] WARNING: Could not initialize at ${IMAGES_BASE_PATH}: ${error.message}`); + console.warn(' Image features disabled. Server will continue without image downloads.'); imageStorageReady = false; - // Do NOT throw - server should still start } } @@ -316,34 +413,43 @@ export async function initializeImageStorage(): Promise { * Get storage stats */ export async function getStorageStats(): Promise<{ - productsDir: string; - brandsDir: string; + basePath: string; productCount: number; brandCount: number; + totalSizeBytes: number; }> { - const productsDir = path.join(IMAGES_BASE_PATH, 'products'); - const brandsDir = path.join(IMAGES_BASE_PATH, 'brands'); - let productCount = 0; let brandCount = 0; + let totalSizeBytes = 0; - try { - const productDirs = await fs.readdir(productsDir); - for (const dir of productDirs) { - const files = await fs.readdir(path.join(productsDir, dir)); - productCount += files.filter(f => f.endsWith('.webp') && !f.includes('-')).length; - } - } catch { /* ignore */ } + async function countDir(dirPath: string): Promise<{ count: number; size: number }> { + let count = 0; + let size = 0; + try { + const entries = await fs.readdir(dirPath, { withFileTypes: true }); + for (const entry of entries) { + const fullPath = path.join(dirPath, entry.name); + if (entry.isDirectory()) { + const sub = await countDir(fullPath); + count += sub.count; + size += sub.size; + } else if (entry.name.endsWith('.webp') && !entry.name.includes('-')) { + count++; + const stat = await fs.stat(fullPath); + size += stat.size; + } + } + } catch { /* ignore */ } + return { count, size }; + } - try { - const brandFiles = await fs.readdir(brandsDir); - brandCount = brandFiles.filter(f => f.endsWith('.webp')).length; - } catch { /* ignore */ } + const products = await countDir(path.join(IMAGES_BASE_PATH, 'products')); + const brands = await countDir(path.join(IMAGES_BASE_PATH, 'brands')); return { - productsDir, - brandsDir, - productCount, - brandCount, + basePath: IMAGES_BASE_PATH, + productCount: products.count, + brandCount: brands.count, + totalSizeBytes: products.size + brands.size, }; } diff --git a/cannaiq/src/lib/api.ts b/cannaiq/src/lib/api.ts index 8082ae51..ec3e8a02 100755 --- a/cannaiq/src/lib/api.ts +++ b/cannaiq/src/lib/api.ts @@ -113,8 +113,16 @@ class ApiClient { }); } - async getDispensaries() { - return this.request<{ dispensaries: any[] }>('/api/dispensaries'); + async getDispensaries(params?: { limit?: number; offset?: number; search?: string; city?: string; state?: string; crawl_enabled?: string }) { + const searchParams = new URLSearchParams(); + if (params?.limit) searchParams.append('limit', params.limit.toString()); + if (params?.offset) searchParams.append('offset', params.offset.toString()); + if (params?.search) searchParams.append('search', params.search); + if (params?.city) searchParams.append('city', params.city); + if (params?.state) searchParams.append('state', params.state); + if (params?.crawl_enabled) searchParams.append('crawl_enabled', params.crawl_enabled); + const queryString = searchParams.toString() ? `?${searchParams.toString()}` : ''; + return this.request<{ dispensaries: any[]; total: number; limit: number; offset: number; hasMore: boolean }>(`/api/dispensaries${queryString}`); } async getDispensary(slug: string) { diff --git a/cannaiq/src/lib/images.ts b/cannaiq/src/lib/images.ts new file mode 100644 index 00000000..b22651d1 --- /dev/null +++ b/cannaiq/src/lib/images.ts @@ -0,0 +1,119 @@ +/** + * Image URL utilities for on-demand resizing + * + * Uses the backend's /img proxy endpoint for local images. + * Falls back to original URL for remote images. + */ + +const API_BASE = import.meta.env.VITE_API_URL || ''; + +interface ImageOptions { + width?: number; + height?: number; + quality?: number; + fit?: 'cover' | 'contain' | 'fill' | 'inside' | 'outside'; +} + +/** + * Check if URL is a local image path + */ +function isLocalImage(url: string): boolean { + return url.startsWith('/images/') || url.startsWith('/img/'); +} + +/** + * Build an image URL with optional resize parameters + * + * @param imageUrl - Original image URL (local or remote) + * @param options - Resize options + * @returns Optimized image URL + * + * @example + * // Thumbnail (50px) + * getImageUrl(product.image_url, { width: 50 }) + * + * // Card image (200px) + * getImageUrl(product.image_url, { width: 200 }) + * + * // Detail view (600px) + * getImageUrl(product.image_url, { width: 600 }) + * + * // Square crop + * getImageUrl(product.image_url, { width: 200, height: 200, fit: 'cover' }) + */ +export function getImageUrl( + imageUrl: string | null | undefined, + options: ImageOptions = {} +): string | null { + if (!imageUrl) return null; + + // For remote images (AWS, Dutchie CDN, etc.), return as-is + // These can't be resized by our proxy + if (imageUrl.startsWith('http://') || imageUrl.startsWith('https://')) { + return imageUrl; + } + + // For local images, use the /img proxy with resize params + if (isLocalImage(imageUrl)) { + // Convert /images/ path to /img/ proxy path + let proxyPath = imageUrl; + if (imageUrl.startsWith('/images/')) { + proxyPath = imageUrl.replace('/images/', '/img/'); + } + + // Build query params + const params = new URLSearchParams(); + if (options.width) params.set('w', String(options.width)); + if (options.height) params.set('h', String(options.height)); + if (options.quality) params.set('q', String(options.quality)); + if (options.fit) params.set('fit', options.fit); + + const queryString = params.toString(); + const url = queryString ? `${proxyPath}?${queryString}` : proxyPath; + + // Prepend API base if needed + return API_BASE ? `${API_BASE}${url}` : url; + } + + // Unknown format, return as-is + return imageUrl; +} + +/** + * Preset sizes for common use cases + */ +export const ImageSizes = { + /** Tiny thumbnail for lists (50px) */ + thumb: { width: 50 }, + /** Small card (100px) */ + small: { width: 100 }, + /** Medium card (200px) */ + medium: { width: 200 }, + /** Large card (400px) */ + large: { width: 400 }, + /** Detail view (600px) */ + detail: { width: 600 }, + /** Full size (no resize) */ + full: {}, +} as const; + +/** + * Convenience function for thumbnail + */ +export function getThumbUrl(imageUrl: string | null | undefined): string | null { + return getImageUrl(imageUrl, ImageSizes.thumb); +} + +/** + * Convenience function for card images + */ +export function getCardUrl(imageUrl: string | null | undefined): string | null { + return getImageUrl(imageUrl, ImageSizes.medium); +} + +/** + * Convenience function for detail images + */ +export function getDetailUrl(imageUrl: string | null | undefined): string | null { + return getImageUrl(imageUrl, ImageSizes.detail); +} diff --git a/cannaiq/src/pages/Dispensaries.tsx b/cannaiq/src/pages/Dispensaries.tsx index 47be67ff..d2fef3d7 100644 --- a/cannaiq/src/pages/Dispensaries.tsx +++ b/cannaiq/src/pages/Dispensaries.tsx @@ -1,33 +1,71 @@ -import React, { useEffect, useState } from 'react'; +import React, { useEffect, useState, useCallback } from 'react'; import { useNavigate } from 'react-router-dom'; import { Layout } from '../components/Layout'; import { api } from '../lib/api'; -import { Building2, Phone, Mail, MapPin, ExternalLink, Search, Eye, Pencil, X, Save } from 'lucide-react'; +import { Building2, Phone, Mail, MapPin, ExternalLink, Search, Eye, Pencil, X, Save, ChevronLeft, ChevronRight } from 'lucide-react'; + +const PAGE_SIZE = 50; export function Dispensaries() { const navigate = useNavigate(); const [dispensaries, setDispensaries] = useState([]); const [loading, setLoading] = useState(true); const [searchTerm, setSearchTerm] = useState(''); - const [filterCity, setFilterCity] = useState(''); + const [debouncedSearch, setDebouncedSearch] = useState(''); + const [filterState, setFilterState] = useState(''); const [editingDispensary, setEditingDispensary] = useState(null); const [editForm, setEditForm] = useState({}); + const [total, setTotal] = useState(0); + const [offset, setOffset] = useState(0); + const [hasMore, setHasMore] = useState(false); + const [states, setStates] = useState([]); + // Debounce search useEffect(() => { - loadDispensaries(); + const timer = setTimeout(() => { + setDebouncedSearch(searchTerm); + setOffset(0); // Reset to first page on search + }, 300); + return () => clearTimeout(timer); + }, [searchTerm]); + + // Load states once for filter dropdown + useEffect(() => { + const loadStates = async () => { + try { + const data = await api.getDispensaries({ limit: 500, crawl_enabled: 'all' }); + const uniqueStates = Array.from(new Set(data.dispensaries.map((d: any) => d.state).filter(Boolean))).sort() as string[]; + setStates(uniqueStates); + } catch (error) { + console.error('Failed to load states:', error); + } + }; + loadStates(); }, []); - const loadDispensaries = async () => { + const loadDispensaries = useCallback(async () => { setLoading(true); try { - const data = await api.getDispensaries(); + const data = await api.getDispensaries({ + limit: PAGE_SIZE, + offset, + search: debouncedSearch || undefined, + state: filterState || undefined, + crawl_enabled: 'all' + }); setDispensaries(data.dispensaries); + setTotal(data.total); + setHasMore(data.hasMore); } catch (error) { console.error('Failed to load dispensaries:', error); } finally { setLoading(false); } - }; + }, [offset, debouncedSearch, filterState]); + + useEffect(() => { + loadDispensaries(); + }, [loadDispensaries]); const handleEdit = (dispensary: any) => { setEditingDispensary(dispensary); @@ -59,17 +97,18 @@ export function Dispensaries() { setEditForm({}); }; - const filteredDispensaries = dispensaries.filter(disp => { - const searchLower = searchTerm.toLowerCase(); - const matchesSearch = !searchTerm || - disp.name.toLowerCase().includes(searchLower) || - (disp.company_name && disp.company_name.toLowerCase().includes(searchLower)) || - (disp.dba_name && disp.dba_name.toLowerCase().includes(searchLower)); - const matchesCity = !filterCity || disp.city === filterCity; - return matchesSearch && matchesCity; - }); + const currentPage = Math.floor(offset / PAGE_SIZE) + 1; + const totalPages = Math.ceil(total / PAGE_SIZE); - const cities = Array.from(new Set(dispensaries.map(d => d.city).filter(Boolean))).sort(); + const goToPage = (page: number) => { + const newOffset = (page - 1) * PAGE_SIZE; + setOffset(newOffset); + }; + + const handleStateFilter = (state: string) => { + setFilterState(state); + setOffset(0); // Reset to first page + }; return ( @@ -78,7 +117,7 @@ export function Dispensaries() {

Dispensaries

- AZDHS official dispensary directory ({dispensaries.length} total) + USA and Canada Dispensary Directory ({total} total)

@@ -102,16 +141,16 @@ export function Dispensaries() {
@@ -133,9 +172,6 @@ export function Dispensaries() { Name - - Company - Address @@ -157,14 +193,14 @@ export function Dispensaries() { - {filteredDispensaries.length === 0 ? ( + {dispensaries.length === 0 ? ( - + No dispensaries found ) : ( - filteredDispensaries.map((disp) => ( + dispensaries.map((disp) => (
@@ -181,13 +217,10 @@ export function Dispensaries() {
- - {disp.company_name || '-'} -
- {disp.address || '-'} + {disp.address1 || '-'}
@@ -266,10 +299,33 @@ export function Dispensaries() { - {/* Footer */} + {/* Footer with Pagination */}
-
- Showing {filteredDispensaries.length} of {dispensaries.length} dispensaries +
+
+ Showing {offset + 1}-{Math.min(offset + dispensaries.length, total)} of {total} dispensaries +
+
+ + + Page {currentPage} of {totalPages} + + +
diff --git a/cannaiq/src/pages/DispensaryDetail.tsx b/cannaiq/src/pages/DispensaryDetail.tsx index d5c7a2b8..3464a3a4 100644 --- a/cannaiq/src/pages/DispensaryDetail.tsx +++ b/cannaiq/src/pages/DispensaryDetail.tsx @@ -2,6 +2,7 @@ import { useEffect, useState } from 'react'; import { useParams, useNavigate, Link } from 'react-router-dom'; import { Layout } from '../components/Layout'; import { api } from '../lib/api'; +import { getImageUrl, ImageSizes } from '../lib/images'; import { Building2, Phone, @@ -497,7 +498,7 @@ export function DispensaryDetail() { {product.image_url ? ( {product.name} e.currentTarget.style.display = 'none'} @@ -686,7 +687,7 @@ export function DispensaryDetail() {
{special.image_url && ( {special.name} e.currentTarget.style.display = 'none'} diff --git a/cannaiq/src/pages/OrchestratorProducts.tsx b/cannaiq/src/pages/OrchestratorProducts.tsx index a9f6c136..774dacd5 100644 --- a/cannaiq/src/pages/OrchestratorProducts.tsx +++ b/cannaiq/src/pages/OrchestratorProducts.tsx @@ -3,6 +3,7 @@ import { Layout } from '../components/Layout'; import { Package, ArrowLeft, TrendingUp, TrendingDown, DollarSign, Search, Filter, ChevronDown, X, LineChart } from 'lucide-react'; import { useNavigate, useSearchParams } from 'react-router-dom'; import { api } from '../lib/api'; +import { getImageUrl, ImageSizes } from '../lib/images'; interface Product { id: number; @@ -324,7 +325,7 @@ export function OrchestratorProducts() {
{product.image_url ? ( {product.name} @@ -395,7 +396,7 @@ export function OrchestratorProducts() {
{selectedProduct.image_url ? ( {selectedProduct.name} diff --git a/cannaiq/src/pages/PriceCompare.tsx b/cannaiq/src/pages/PriceCompare.tsx index aeb5e440..b181e8f1 100644 --- a/cannaiq/src/pages/PriceCompare.tsx +++ b/cannaiq/src/pages/PriceCompare.tsx @@ -3,6 +3,7 @@ import { Layout } from '../components/Layout'; import { Scale, Search, Package, Store, Trophy, TrendingDown, TrendingUp, MapPin } from 'lucide-react'; import { useNavigate, useSearchParams } from 'react-router-dom'; import { api } from '../lib/api'; +import { getImageUrl, ImageSizes } from '../lib/images'; interface CompareResult { product_id: number; @@ -311,7 +312,7 @@ export function PriceCompare() {
{item.image_url ? ( {item.product_name} diff --git a/cannaiq/src/pages/ProductDetail.tsx b/cannaiq/src/pages/ProductDetail.tsx index 7d74d818..22ce8c65 100644 --- a/cannaiq/src/pages/ProductDetail.tsx +++ b/cannaiq/src/pages/ProductDetail.tsx @@ -2,6 +2,7 @@ import { useEffect, useState } from 'react'; import { useParams, useNavigate } from 'react-router-dom'; import { Layout } from '../components/Layout'; import { api } from '../lib/api'; +import { getImageUrl, ImageSizes } from '../lib/images'; import { ArrowLeft, ExternalLink, Package, Code, Copy, CheckCircle, FileJson, TrendingUp, TrendingDown, Minus, BarChart3 } from 'lucide-react'; export function ProductDetail() { @@ -114,14 +115,9 @@ export function ProductDetail() { const metadata = product.metadata || {}; - const getImageUrl = () => { - if (product.image_url_full) return product.image_url_full; - if (product.medium_path) return `/api/images/dutchie/${product.medium_path}`; - if (product.thumbnail_path) return `/api/images/dutchie/${product.thumbnail_path}`; - return null; - }; - - const imageUrl = getImageUrl(); + // Use the centralized image URL helper for on-demand resizing + const productImageUrl = product.image_url_full || product.image_url || product.medium_path || product.thumbnail_path; + const imageUrl = getImageUrl(productImageUrl, ImageSizes.detail); return ( diff --git a/cannaiq/src/pages/Products.tsx b/cannaiq/src/pages/Products.tsx index f1089e4a..90359727 100755 --- a/cannaiq/src/pages/Products.tsx +++ b/cannaiq/src/pages/Products.tsx @@ -2,6 +2,7 @@ import { useEffect, useState } from 'react'; import { useSearchParams, useNavigate } from 'react-router-dom'; import { Layout } from '../components/Layout'; import { api } from '../lib/api'; +import { getImageUrl, ImageSizes } from '../lib/images'; export function Products() { const [searchParams, setSearchParams] = useSearchParams(); @@ -417,9 +418,9 @@ function ProductCard({ product, onViewDetails }: { product: any; onViewDetails: onMouseEnter={(e) => e.currentTarget.style.transform = 'translateY(-4px)'} onMouseLeave={(e) => e.currentTarget.style.transform = 'translateY(0)'} > - {product.image_url_full ? ( + {(product.image_url_full || product.image_url) ? ( {product.name} {special.image_url ? ( {special.product_name} diff --git a/cannaiq/src/pages/StoreDetail.tsx b/cannaiq/src/pages/StoreDetail.tsx index d1734dd1..99ce9628 100644 --- a/cannaiq/src/pages/StoreDetail.tsx +++ b/cannaiq/src/pages/StoreDetail.tsx @@ -2,6 +2,7 @@ import { useEffect, useState } from 'react'; import { useParams, useNavigate } from 'react-router-dom'; import { Layout } from '../components/Layout'; import { api } from '../lib/api'; +import { getImageUrl as getResizedImageUrl, ImageSizes } from '../lib/images'; import { Package, Tag, Zap, Clock, ExternalLink, CheckCircle, XCircle, AlertCircle, Building, MapPin, RefreshCw, Calendar, Activity @@ -101,9 +102,10 @@ export function StoreDetail() { }; const getImageUrl = (product: any) => { - if (product.image_url_full) return product.image_url_full; - if (product.medium_path) return `/api/images/dutchie/${product.medium_path}`; - if (product.thumbnail_path) return `/api/images/dutchie/${product.thumbnail_path}`; + const rawUrl = product.image_url_full || product.image_url || product.medium_path || product.thumbnail_path; + if (rawUrl) { + return getResizedImageUrl(rawUrl, ImageSizes.medium) || rawUrl; + } return 'https://via.placeholder.com/300x300?text=No+Image'; }; diff --git a/cannaiq/src/pages/StoreDetailPage.tsx b/cannaiq/src/pages/StoreDetailPage.tsx index 15d49777..8d968e12 100644 --- a/cannaiq/src/pages/StoreDetailPage.tsx +++ b/cannaiq/src/pages/StoreDetailPage.tsx @@ -3,6 +3,7 @@ import { useParams, useNavigate } from 'react-router-dom'; import { Layout } from '../components/Layout'; import { api } from '../lib/api'; import { trackProductView } from '../lib/analytics'; +import { getImageUrl, ImageSizes } from '../lib/images'; import { Building2, Phone, @@ -470,7 +471,7 @@ export function StoreDetailPage() { {product.image_url ? ( {product.name} e.currentTarget.style.display = 'none'}