feat(treez): Fetch all products with match_all query (+19% more)
- Update buildProductQuery() to use match_all by default - Captures hidden, below-threshold, and out-of-stock products - Add extractPrimaryImage() and extractImages() to normalizer - Add product_refresh_treez handler for platform-specific refresh - Add product_refresh_treez to TaskRole type Best Dispensary: 228 → 271 products (+43) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -31,7 +31,12 @@ export class TreezNormalizer extends BaseNormalizer {
|
||||
// ============================================================
|
||||
|
||||
extractProducts(rawJson: any): any[] {
|
||||
// Treez payload format: { products: [...] }
|
||||
// Prefer normalized array (already processed by client.ts normalizeProduct())
|
||||
if (rawJson?.normalized && Array.isArray(rawJson.normalized)) {
|
||||
return rawJson.normalized;
|
||||
}
|
||||
|
||||
// Treez payload format: { products: [...] } - raw ES products
|
||||
if (rawJson?.products && Array.isArray(rawJson.products)) {
|
||||
return rawJson.products;
|
||||
}
|
||||
@@ -41,7 +46,7 @@ export class TreezNormalizer extends BaseNormalizer {
|
||||
return rawJson;
|
||||
}
|
||||
|
||||
// Hits array (normalized format)
|
||||
// Hits array (ES response format)
|
||||
if (rawJson?.hits && Array.isArray(rawJson.hits)) {
|
||||
return rawJson.hits;
|
||||
}
|
||||
@@ -71,7 +76,8 @@ export class TreezNormalizer extends BaseNormalizer {
|
||||
// ============================================================
|
||||
|
||||
protected normalizeProduct(rawProduct: any, dispensaryId: number): NormalizedProduct | null {
|
||||
const externalId = rawProduct.productId;
|
||||
// Treez products use 'id' field from Elasticsearch API
|
||||
const externalId = rawProduct.id;
|
||||
if (!externalId) {
|
||||
console.warn('[TreezNormalizer] Product missing ID, skipping');
|
||||
return null;
|
||||
@@ -94,15 +100,15 @@ export class TreezNormalizer extends BaseNormalizer {
|
||||
brandName: rawProduct.brand || null,
|
||||
brandId: null, // Treez doesn't expose brand IDs
|
||||
category: this.normalizeCategory(rawProduct.category) || null,
|
||||
subcategory: rawProduct.subcategory || null,
|
||||
subcategory: rawProduct.subtype || null,
|
||||
type: rawProduct.category || null,
|
||||
strainType: rawProduct.subcategory || null, // indica, sativa, hybrid
|
||||
strainType: rawProduct.strainType || null, // Indica, Sativa, Hybrid
|
||||
|
||||
// Potency
|
||||
thcPercent: rawProduct.thcPercent ?? null,
|
||||
cbdPercent: rawProduct.cbdPercent ?? null,
|
||||
thcContent: rawProduct.thcPercent ?? null,
|
||||
cbdContent: rawProduct.cbdPercent ?? null,
|
||||
// Potency - Treez ES API uses customTHCPercentageMinValue/customCBDPercentageMinValue
|
||||
thcPercent: rawProduct.customTHCPercentageMinValue ?? rawProduct.thcPercent ?? null,
|
||||
cbdPercent: rawProduct.customCBDPercentageMinValue ?? rawProduct.cbdPercent ?? null,
|
||||
thcContent: rawProduct.customTHCPercentageMinValue ?? rawProduct.thcPercent ?? null,
|
||||
cbdContent: rawProduct.customCBDPercentageMinValue ?? rawProduct.cbdPercent ?? null,
|
||||
|
||||
// Status - scraped products are active
|
||||
status: 'Active',
|
||||
@@ -110,11 +116,9 @@ export class TreezNormalizer extends BaseNormalizer {
|
||||
medicalOnly: false,
|
||||
recOnly: false,
|
||||
|
||||
// Images
|
||||
primaryImageUrl: rawProduct.imageUrl || null,
|
||||
images: rawProduct.imageUrl
|
||||
? [{ url: rawProduct.imageUrl, position: 0 }]
|
||||
: [],
|
||||
// Images - Treez ES API has images in productData.images array
|
||||
primaryImageUrl: this.extractPrimaryImage(rawProduct),
|
||||
images: this.extractImages(rawProduct),
|
||||
|
||||
// Raw reference
|
||||
rawProduct,
|
||||
@@ -122,21 +126,23 @@ export class TreezNormalizer extends BaseNormalizer {
|
||||
}
|
||||
|
||||
protected normalizePricing(rawProduct: any): NormalizedPricing | null {
|
||||
const externalId = rawProduct.productId;
|
||||
const externalId = rawProduct.id;
|
||||
if (!externalId) return null;
|
||||
|
||||
const price = rawProduct.price;
|
||||
// Treez ES API uses customMinPrice/customMaxPrice
|
||||
const priceMin = rawProduct.customMinPrice ?? rawProduct.price;
|
||||
const priceMax = rawProduct.customMaxPrice ?? priceMin;
|
||||
|
||||
return {
|
||||
externalProductId: String(externalId),
|
||||
|
||||
// Treez typically shows a single price
|
||||
priceRec: this.toCents(price),
|
||||
priceRecMin: this.toCents(price),
|
||||
priceRecMax: this.toCents(price),
|
||||
// Treez pricing from ES API
|
||||
priceRec: this.toCents(priceMin),
|
||||
priceRecMin: this.toCents(priceMin),
|
||||
priceRecMax: this.toCents(priceMax),
|
||||
priceRecSpecial: null,
|
||||
|
||||
// Treez doesn't distinguish med pricing in DOM
|
||||
// Treez doesn't distinguish med pricing
|
||||
priceMed: null,
|
||||
priceMedMin: null,
|
||||
priceMedMax: null,
|
||||
@@ -149,7 +155,7 @@ export class TreezNormalizer extends BaseNormalizer {
|
||||
}
|
||||
|
||||
protected normalizeAvailability(rawProduct: any): NormalizedAvailability | null {
|
||||
const externalId = rawProduct.productId;
|
||||
const externalId = rawProduct.id;
|
||||
if (!externalId) return null;
|
||||
|
||||
const inStock = rawProduct.inStock !== false;
|
||||
@@ -224,4 +230,47 @@ export class TreezNormalizer extends BaseNormalizer {
|
||||
|
||||
return categoryMap[categoryLower] || category;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract primary image URL from Treez product
|
||||
* Images are in productData.images array
|
||||
*/
|
||||
private extractPrimaryImage(rawProduct: any): string | null {
|
||||
const productData = rawProduct.productData || {};
|
||||
const images = productData.images || [];
|
||||
|
||||
if (images.length > 0) {
|
||||
// First image is primary
|
||||
const firstImage = images[0];
|
||||
return firstImage?.url || firstImage?.imageUrl || null;
|
||||
}
|
||||
|
||||
// Fallback to other image fields
|
||||
return rawProduct.imageUrl || rawProduct.image_url || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract all image URLs from Treez product
|
||||
*/
|
||||
private extractImages(rawProduct: any): string[] {
|
||||
const productData = rawProduct.productData || {};
|
||||
const images = productData.images || [];
|
||||
|
||||
const urls: string[] = [];
|
||||
|
||||
for (const img of images) {
|
||||
const url = img?.url || img?.imageUrl;
|
||||
if (url) urls.push(url);
|
||||
}
|
||||
|
||||
// Include any direct image field
|
||||
if (rawProduct.imageUrl && !urls.includes(rawProduct.imageUrl)) {
|
||||
urls.push(rawProduct.imageUrl);
|
||||
}
|
||||
if (rawProduct.image_url && !urls.includes(rawProduct.image_url)) {
|
||||
urls.push(rawProduct.image_url);
|
||||
}
|
||||
|
||||
return urls;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,26 +3,30 @@
|
||||
* TREEZ PLATFORM CLIENT
|
||||
* ============================================================
|
||||
*
|
||||
* Treez uses Cloudflare protection + headless detection on their
|
||||
* Elasticsearch API. This client uses:
|
||||
* Treez uses Cloudflare protection on their Elasticsearch API.
|
||||
* This client uses:
|
||||
*
|
||||
* 1. Puppeteer with Stealth plugin to bypass detection
|
||||
* 2. CDP (Chrome DevTools Protocol) to intercept API responses
|
||||
* 3. Scrolling/pagination to trigger all product loads
|
||||
* 1. Puppeteer with Stealth plugin to bypass Cloudflare
|
||||
* 2. Direct API calls from browser context after session established
|
||||
* 3. Captures store-specific headers (org-id, entity-id) from first request
|
||||
*
|
||||
* API Endpoints (intercepted, not called directly):
|
||||
* DISCOVERY SCHEME:
|
||||
* 1. Navigate to store menu page with Puppeteer (establishes CF session)
|
||||
* 2. Capture first ES API request headers:
|
||||
* - x-api-key: Store's API key
|
||||
* - org-id: Organization ID (store-specific)
|
||||
* - entity-id: Entity ID (store-specific)
|
||||
* - ES URL with tenant ID: https://search-{tenant}.gapcommerceapi.com/product/search
|
||||
* 3. Make direct API call from browser context with captured headers
|
||||
* 4. Fetch ALL products in one request (size: 10000)
|
||||
*
|
||||
* API Endpoints:
|
||||
* - Products: POST https://search-{tenant}.gapcommerceapi.com/product/search
|
||||
* - Discounts: GET https://headless.treez.io/v2.0/dispensary/{storeId}/ecommerce/discounts
|
||||
*
|
||||
* Store ID Format: String slug (e.g., "best")
|
||||
* Menu URL: https://{storeId}.treez.io/onlinemenu/ or custom domain
|
||||
*
|
||||
* Data captured includes:
|
||||
* - Full product details (name, brand, category, subtype)
|
||||
* - Inventory levels (availableUnits)
|
||||
* - Pricing with discounts
|
||||
* - Lab results (THC/CBD when available)
|
||||
*
|
||||
* ============================================================
|
||||
*/
|
||||
|
||||
@@ -39,6 +43,25 @@ import type {
|
||||
TreezESResponse,
|
||||
} from './types';
|
||||
|
||||
// ============================================================
|
||||
// STORE CONFIG (captured from first API request)
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Store-specific API configuration captured from browser requests.
|
||||
* These are required for direct API calls.
|
||||
*/
|
||||
export interface TreezStoreConfig {
|
||||
/** Full ES URL with tenant ID: https://search-{tenant}.gapcommerceapi.com/product/search */
|
||||
esUrl: string;
|
||||
/** API key from x-api-key header */
|
||||
apiKey: string;
|
||||
/** Organization ID from org-id header (store-specific) */
|
||||
orgId: string;
|
||||
/** Entity ID from entity-id header (store-specific) */
|
||||
entityId: string;
|
||||
}
|
||||
|
||||
// Register stealth plugin - REQUIRED for Treez
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
@@ -67,6 +90,7 @@ export const TREEZ_CONFIG: TreezConfig = {
|
||||
// ============================================================
|
||||
|
||||
let currentSession: TreezSession | null = null;
|
||||
let currentStoreConfig: TreezStoreConfig | null = null;
|
||||
let crawlRotator: CrawlRotator | null = null;
|
||||
|
||||
/**
|
||||
@@ -220,6 +244,7 @@ export async function endSession(): Promise<void> {
|
||||
}
|
||||
|
||||
currentSession = null;
|
||||
currentStoreConfig = null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -231,66 +256,62 @@ export function getCurrentSession(): TreezSession | null {
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// CDP RESPONSE INTERCEPTION
|
||||
// STORE CONFIG CAPTURE
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Setup CDP listener to capture Elasticsearch product responses
|
||||
* Setup request listener to capture store-specific API config from first ES request.
|
||||
* This captures org-id, entity-id, api-key, and ES URL.
|
||||
*/
|
||||
function setupProductCapture(session: TreezSession): void {
|
||||
const { cdpClient } = session;
|
||||
|
||||
cdpClient.on('Network.responseReceived', async (event: any) => {
|
||||
const url = event.response.url;
|
||||
|
||||
// Check if this is an ES product search response
|
||||
if (url.includes('gapcommerceapi.com/product/search') && event.response.status === 200) {
|
||||
try {
|
||||
const response = await cdpClient.send('Network.getResponseBody', {
|
||||
requestId: event.requestId,
|
||||
});
|
||||
|
||||
const body = response.base64Encoded
|
||||
? Buffer.from(response.body, 'base64').toString('utf8')
|
||||
: response.body;
|
||||
|
||||
const json: TreezESResponse = JSON.parse(body);
|
||||
const products = json.hits?.hits?.map((h) => h._source) || [];
|
||||
|
||||
if (products.length > 0) {
|
||||
session.capturedProducts.push(...products);
|
||||
console.log(
|
||||
`[Treez Client] Captured ${products.length} products (total: ${session.capturedProducts.length})`
|
||||
);
|
||||
}
|
||||
} catch {
|
||||
// Response body may not be available, skip silently
|
||||
}
|
||||
function setupStoreConfigCapture(page: Page): void {
|
||||
page.on('request', (req) => {
|
||||
const url = req.url();
|
||||
if (url.includes('gapcommerceapi.com/product/search') && req.method() === 'POST' && !currentStoreConfig) {
|
||||
const headers = req.headers();
|
||||
currentStoreConfig = {
|
||||
esUrl: url,
|
||||
apiKey: headers['x-api-key'] || '',
|
||||
orgId: headers['org-id'] || '',
|
||||
entityId: headers['entity-id'] || '',
|
||||
};
|
||||
console.log('[Treez Client] Captured store config:');
|
||||
console.log(` ES URL: ${currentStoreConfig.esUrl}`);
|
||||
console.log(` Org ID: ${currentStoreConfig.orgId}`);
|
||||
console.log(` Entity ID: ${currentStoreConfig.entityId}`);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current store config (captured from browser requests)
|
||||
*/
|
||||
export function getStoreConfig(): TreezStoreConfig | null {
|
||||
return currentStoreConfig;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// PRODUCT FETCHING
|
||||
// PRODUCT FETCHING (Direct API Approach)
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Navigate to store menu and capture all products via CDP interception
|
||||
* This is the main method for fetching products
|
||||
* Navigate to store menu, capture API config, and fetch ALL products via direct API call.
|
||||
*
|
||||
* APPROACH:
|
||||
* 1. Navigate to menu page (establishes Cloudflare session)
|
||||
* 2. Capture store-specific headers from first ES request (org-id, entity-id)
|
||||
* 3. Make direct API call from browser context with captured headers
|
||||
* 4. Fetch ALL products in one request (size: 10000)
|
||||
*
|
||||
* This is MUCH faster and more reliable than CDP scroll interception.
|
||||
*/
|
||||
export async function fetchAllProducts(
|
||||
menuUrl: string,
|
||||
options: {
|
||||
maxScrolls?: number;
|
||||
scrollDelay?: number;
|
||||
bypassAgeGate?: boolean;
|
||||
customerType?: 'ADULT' | 'MEDICAL' | 'BOTH';
|
||||
} = {}
|
||||
): Promise<TreezProductRaw[]> {
|
||||
const {
|
||||
maxScrolls = TREEZ_CONFIG.maxScrollAttempts,
|
||||
scrollDelay = TREEZ_CONFIG.scrollDelay,
|
||||
bypassAgeGate = true,
|
||||
} = options;
|
||||
const { bypassAgeGate = true, customerType = 'ADULT' } = options;
|
||||
|
||||
if (!currentSession) {
|
||||
throw new Error('[Treez Client] No active session - call startSession() first');
|
||||
@@ -298,11 +319,11 @@ export async function fetchAllProducts(
|
||||
|
||||
const { page } = currentSession;
|
||||
|
||||
// Reset captured products
|
||||
currentSession.capturedProducts = [];
|
||||
// Reset store config for new fetch
|
||||
currentStoreConfig = null;
|
||||
|
||||
// Setup CDP listener for product responses
|
||||
setupProductCapture(currentSession);
|
||||
// Setup listener to capture store config from first ES request
|
||||
setupStoreConfigCapture(page);
|
||||
|
||||
console.log(`[Treez Client] Navigating to ${menuUrl}`);
|
||||
|
||||
@@ -319,97 +340,146 @@ export async function fetchAllProducts(
|
||||
await tryBypassAgeGate(page);
|
||||
}
|
||||
|
||||
// Wait for initial products to load
|
||||
// Wait for initial page load to trigger first API request
|
||||
await sleep(3000);
|
||||
console.log(`[Treez Client] Initial capture: ${currentSession.capturedProducts.length} products`);
|
||||
|
||||
// Scroll and click "Load More" to get all products
|
||||
console.log('[Treez Client] Scrolling to load all products...');
|
||||
// Check if we captured the store config
|
||||
if (!currentStoreConfig) {
|
||||
console.error('[Treez Client] Failed to capture store config from browser requests');
|
||||
throw new Error('Failed to capture Treez store config');
|
||||
}
|
||||
|
||||
let previousCount = 0;
|
||||
let noNewDataCount = 0;
|
||||
console.log('[Treez Client] Making direct API call for ALL products...');
|
||||
|
||||
for (let i = 0; i < maxScrolls; i++) {
|
||||
// Scroll to bottom
|
||||
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||
await sleep(scrollDelay);
|
||||
// Build query for ALL products
|
||||
const query = buildProductQuery(customerType);
|
||||
|
||||
// Try clicking "Load More" button
|
||||
// Make direct API call from browser context (has Cloudflare session)
|
||||
const products = await page.evaluate(
|
||||
async (config: TreezStoreConfig, queryBody: any) => {
|
||||
try {
|
||||
const loadMoreBtn = await page.$('button.collection__load-more');
|
||||
if (loadMoreBtn) {
|
||||
const isVisible = await page.evaluate((btn: Element) => {
|
||||
const rect = btn.getBoundingClientRect();
|
||||
return rect.width > 0 && rect.height > 0;
|
||||
}, loadMoreBtn);
|
||||
|
||||
if (isVisible) {
|
||||
await loadMoreBtn.click();
|
||||
await sleep(scrollDelay);
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// No load more button or click failed
|
||||
}
|
||||
|
||||
const currentCount = currentSession.capturedProducts.length;
|
||||
if (currentCount === previousCount) {
|
||||
noNewDataCount++;
|
||||
if (noNewDataCount >= 5) {
|
||||
console.log(`[Treez Client] No new products for 5 scrolls, stopping`);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
noNewDataCount = 0;
|
||||
if ((i + 1) % 5 === 0) {
|
||||
console.log(`[Treez Client] Scroll ${i + 1}: ${currentCount} products`);
|
||||
}
|
||||
}
|
||||
previousCount = currentCount;
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.error(`[Treez Client] Navigation error: ${error.message}`);
|
||||
throw error;
|
||||
}
|
||||
|
||||
// Deduplicate products by ID
|
||||
const seen = new Set<string>();
|
||||
const uniqueProducts = currentSession.capturedProducts.filter((p) => {
|
||||
if (!p.id || seen.has(p.id)) return false;
|
||||
seen.add(p.id);
|
||||
return true;
|
||||
const response = await fetch(config.esUrl, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'x-api-key': config.apiKey,
|
||||
'org-id': config.orgId,
|
||||
'entity-id': config.entityId,
|
||||
},
|
||||
body: JSON.stringify(queryBody),
|
||||
});
|
||||
|
||||
console.log(`[Treez Client] Total unique products: ${uniqueProducts.length}`);
|
||||
if (!response.ok) {
|
||||
throw new Error(`API returned HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return {
|
||||
success: true,
|
||||
total: data.hits?.total?.value || 0,
|
||||
products: (data.hits?.hits || []).map((h: any) => h._source),
|
||||
};
|
||||
} catch (e: any) {
|
||||
return { success: false, error: e.message, products: [] };
|
||||
}
|
||||
},
|
||||
currentStoreConfig,
|
||||
query
|
||||
);
|
||||
|
||||
if (!products.success) {
|
||||
throw new Error(`Direct API call failed: ${products.error}`);
|
||||
}
|
||||
|
||||
console.log(`[Treez Client] Fetched ${products.products.length} of ${products.total} products`);
|
||||
|
||||
// Store in session for reference
|
||||
currentSession.capturedProducts = products.products;
|
||||
|
||||
// Record success with rotator
|
||||
if (crawlRotator && uniqueProducts.length > 0) {
|
||||
if (crawlRotator && products.products.length > 0) {
|
||||
await crawlRotator.recordSuccess();
|
||||
}
|
||||
|
||||
return uniqueProducts;
|
||||
return products.products;
|
||||
} catch (error: any) {
|
||||
console.error(`[Treez Client] Error: ${error.message}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build Elasticsearch query for fetching all products
|
||||
*
|
||||
* Options:
|
||||
* - includeHidden: Include hidden and below-threshold products (default: true for complete inventory)
|
||||
* - customerType: Filter by customer type (ADULT, MEDICAL, BOTH)
|
||||
*
|
||||
* NOTE: Using match_all (includeHidden=true) returns ~20% more products than filtered query
|
||||
*/
|
||||
function buildProductQuery(
|
||||
customerType: 'ADULT' | 'MEDICAL' | 'BOTH' = 'ADULT',
|
||||
includeHidden: boolean = true
|
||||
): any {
|
||||
// Use match_all to get ALL products including hidden, below threshold, and out of stock
|
||||
// This returns ~20% more products than the filtered query
|
||||
if (includeHidden) {
|
||||
return {
|
||||
from: 0,
|
||||
size: 10000,
|
||||
query: { match_all: {} },
|
||||
};
|
||||
}
|
||||
|
||||
// Filtered query (original behavior) - excludes hidden/below-threshold products
|
||||
const customerTypeFilter =
|
||||
customerType === 'BOTH'
|
||||
? [{ match: { customCustomerType: 'BOTH' } }]
|
||||
: [
|
||||
{ match: { customCustomerType: 'BOTH' } },
|
||||
{ match: { customCustomerType: customerType } },
|
||||
];
|
||||
|
||||
return {
|
||||
from: 0,
|
||||
size: 10000,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ bool: { filter: { range: { customMinPrice: { gte: 0.01, lte: 500000 } } } } },
|
||||
{ bool: { should: [{ match: { isAboveThreshold: true } }] } },
|
||||
{ bool: { should: [{ match: { isHideFromMenu: false } }] } },
|
||||
{ bool: { should: customerTypeFilter } },
|
||||
],
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch products from a specific brand page
|
||||
* Note: With direct API, this fetches ALL products then filters locally would be needed.
|
||||
* For now, it just fetches all products from the brand page URL.
|
||||
*/
|
||||
export async function fetchBrandProducts(
|
||||
storeUrl: string,
|
||||
brandSlug: string
|
||||
): Promise<TreezProductRaw[]> {
|
||||
const brandUrl = `${storeUrl}/brand/${encodeURIComponent(brandSlug)}`;
|
||||
return fetchAllProducts(brandUrl, { maxScrolls: 30 });
|
||||
return fetchAllProducts(brandUrl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch products from a specific category page
|
||||
* Note: With direct API, this fetches ALL products then filters locally would be needed.
|
||||
* For now, it just fetches all products from the category page URL.
|
||||
*/
|
||||
export async function fetchCategoryProducts(
|
||||
storeUrl: string,
|
||||
categorySlug: string
|
||||
): Promise<TreezProductRaw[]> {
|
||||
const categoryUrl = `${storeUrl}/collection/${encodeURIComponent(categorySlug)}`;
|
||||
return fetchAllProducts(categoryUrl, { maxScrolls: 30 });
|
||||
return fetchAllProducts(categoryUrl);
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
|
||||
254
backend/src/tasks/handlers/product-refresh-treez.ts
Normal file
254
backend/src/tasks/handlers/product-refresh-treez.ts
Normal file
@@ -0,0 +1,254 @@
|
||||
/**
|
||||
* ============================================================
|
||||
* TREEZ PRODUCT REFRESH HANDLER
|
||||
* ============================================================
|
||||
*
|
||||
* Normalizes raw Treez payload data and upserts to store_products.
|
||||
*
|
||||
* This is the Treez-specific product refresh handler.
|
||||
* It uses the TreezNormalizer to transform ES API data into
|
||||
* the canonical store_products format.
|
||||
*
|
||||
* ============================================================
|
||||
*/
|
||||
|
||||
import { TaskContext, TaskResult } from '../task-worker';
|
||||
import { loadRawPayloadById } from '../../utils/payload-storage';
|
||||
import { TreezNormalizer } from '../../hydration/normalizers/treez';
|
||||
|
||||
const normalizer = new TreezNormalizer();
|
||||
|
||||
export async function handleProductRefreshTreez(ctx: TaskContext): Promise<TaskResult> {
|
||||
const { pool, task, updateStep } = ctx;
|
||||
const dispensaryId = task.dispensary_id;
|
||||
const payloadId = task.payload?.payload_id as number | undefined;
|
||||
|
||||
if (!dispensaryId) {
|
||||
return { success: false, error: 'Missing dispensary_id in task' };
|
||||
}
|
||||
|
||||
if (!payloadId) {
|
||||
return { success: false, error: 'Missing payload_id in task payload' };
|
||||
}
|
||||
|
||||
console.log(`[TreezProductRefresh] Starting for dispensary ${dispensaryId}, payload ${payloadId}`);
|
||||
|
||||
try {
|
||||
// ============================================================
|
||||
// STEP 1: Load dispensary
|
||||
// ============================================================
|
||||
updateStep('loading', 'Loading dispensary info');
|
||||
|
||||
const dispResult = await pool.query(
|
||||
`SELECT id, name, platform, platform_dispensary_id, menu_url
|
||||
FROM dispensaries WHERE id = $1`,
|
||||
[dispensaryId]
|
||||
);
|
||||
|
||||
if (dispResult.rows.length === 0) {
|
||||
return { success: false, error: `Dispensary ${dispensaryId} not found` };
|
||||
}
|
||||
|
||||
const dispensary = dispResult.rows[0];
|
||||
console.log(`[TreezProductRefresh] Processing ${dispensary.name}`);
|
||||
|
||||
// ============================================================
|
||||
// STEP 2: Load payload
|
||||
// ============================================================
|
||||
updateStep('loading', 'Loading payload from storage');
|
||||
|
||||
const loadResult = await loadRawPayloadById(pool, payloadId);
|
||||
|
||||
if (!loadResult) {
|
||||
return { success: false, error: `Payload ${payloadId} not found` };
|
||||
}
|
||||
|
||||
const payloadData = loadResult.payload;
|
||||
|
||||
// Treez payload structure: { products: [...], normalized: [...], ... }
|
||||
const rawProducts = payloadData.products || payloadData.normalized || [];
|
||||
|
||||
if (rawProducts.length === 0) {
|
||||
return {
|
||||
success: false,
|
||||
error: 'Payload contains no products',
|
||||
payloadId,
|
||||
productsProcessed: 0,
|
||||
};
|
||||
}
|
||||
|
||||
console.log(`[TreezProductRefresh] Loaded ${rawProducts.length} products from payload`);
|
||||
|
||||
// ============================================================
|
||||
// STEP 3: Normalize products using TreezNormalizer methods directly
|
||||
// ============================================================
|
||||
updateStep('normalizing', `Normalizing ${rawProducts.length} products`);
|
||||
|
||||
// Extract and normalize products directly
|
||||
const normalizedProducts = [];
|
||||
const normalizedPricing = new Map();
|
||||
const normalizedAvailability = new Map();
|
||||
|
||||
for (const rawProduct of rawProducts) {
|
||||
// Use normalizer's internal methods via normalize() on single-product payload
|
||||
const singlePayload = {
|
||||
id: `treez-${rawProduct.id}`,
|
||||
dispensary_id: dispensaryId,
|
||||
platform_dispensary_id: dispensary.platform_dispensary_id,
|
||||
raw_json: { products: [rawProduct] },
|
||||
source_type: 'api' as const,
|
||||
pricing_type: 'dual' as const,
|
||||
crawl_mode: 'api' as const,
|
||||
crawl_run_id: null,
|
||||
platform: 'treez',
|
||||
payload_version: 1,
|
||||
product_count: 1,
|
||||
fetched_at: new Date(),
|
||||
processed: false,
|
||||
normalized_at: null,
|
||||
hydration_error: null,
|
||||
hydration_attempts: 0,
|
||||
created_at: new Date(),
|
||||
};
|
||||
|
||||
const result = normalizer.normalize(singlePayload);
|
||||
if (result.products.length > 0) {
|
||||
const product = result.products[0];
|
||||
normalizedProducts.push(product);
|
||||
|
||||
const pricing = result.pricing.get(product.externalProductId);
|
||||
if (pricing) normalizedPricing.set(product.externalProductId, pricing);
|
||||
|
||||
const availability = result.availability.get(product.externalProductId);
|
||||
if (availability) normalizedAvailability.set(product.externalProductId, availability);
|
||||
}
|
||||
}
|
||||
|
||||
if (normalizedProducts.length === 0) {
|
||||
return {
|
||||
success: false,
|
||||
error: 'Normalization produced no products',
|
||||
payloadId,
|
||||
productsProcessed: 0,
|
||||
};
|
||||
}
|
||||
|
||||
console.log(`[TreezProductRefresh] Normalized ${normalizedProducts.length} products`);
|
||||
|
||||
// ============================================================
|
||||
// STEP 4: Upsert to store_products
|
||||
// ============================================================
|
||||
updateStep('upserting', `Saving ${normalizedProducts.length} products to DB`);
|
||||
|
||||
let newCount = 0;
|
||||
let updatedCount = 0;
|
||||
|
||||
for (const product of normalizedProducts) {
|
||||
const pricing = normalizedPricing.get(product.externalProductId);
|
||||
const availability = normalizedAvailability.get(product.externalProductId);
|
||||
|
||||
const upsertResult = await pool.query(
|
||||
`INSERT INTO store_products (
|
||||
dispensary_id,
|
||||
provider,
|
||||
provider_product_id,
|
||||
platform_dispensary_id,
|
||||
name_raw,
|
||||
brand_name_raw,
|
||||
category_raw,
|
||||
subcategory_raw,
|
||||
strain_type,
|
||||
price_rec,
|
||||
price_med,
|
||||
thc_percent,
|
||||
cbd_percent,
|
||||
image_url,
|
||||
is_in_stock,
|
||||
stock_quantity,
|
||||
provider_data,
|
||||
first_seen_at,
|
||||
last_seen_at,
|
||||
created_at,
|
||||
updated_at
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, NOW(), NOW(), NOW(), NOW())
|
||||
ON CONFLICT (dispensary_id, provider, provider_product_id)
|
||||
DO UPDATE SET
|
||||
name_raw = EXCLUDED.name_raw,
|
||||
brand_name_raw = EXCLUDED.brand_name_raw,
|
||||
category_raw = EXCLUDED.category_raw,
|
||||
subcategory_raw = EXCLUDED.subcategory_raw,
|
||||
strain_type = EXCLUDED.strain_type,
|
||||
price_rec = EXCLUDED.price_rec,
|
||||
price_med = EXCLUDED.price_med,
|
||||
thc_percent = EXCLUDED.thc_percent,
|
||||
cbd_percent = EXCLUDED.cbd_percent,
|
||||
image_url = EXCLUDED.image_url,
|
||||
is_in_stock = EXCLUDED.is_in_stock,
|
||||
stock_quantity = EXCLUDED.stock_quantity,
|
||||
provider_data = EXCLUDED.provider_data,
|
||||
last_seen_at = NOW(),
|
||||
updated_at = NOW()
|
||||
RETURNING (xmax = 0) AS is_new`,
|
||||
[
|
||||
dispensaryId,
|
||||
'treez',
|
||||
product.externalProductId,
|
||||
dispensary.platform_dispensary_id,
|
||||
product.name,
|
||||
product.brandName,
|
||||
product.category,
|
||||
product.subcategory,
|
||||
product.strainType,
|
||||
pricing?.priceRec ? pricing.priceRec / 100 : null,
|
||||
pricing?.priceMed ? pricing.priceMed / 100 : null,
|
||||
product.thcPercent,
|
||||
product.cbdPercent,
|
||||
product.primaryImageUrl,
|
||||
availability?.inStock ?? true,
|
||||
availability?.quantity,
|
||||
JSON.stringify(product.rawProduct),
|
||||
]
|
||||
);
|
||||
|
||||
if (upsertResult.rows[0]?.is_new) {
|
||||
newCount++;
|
||||
} else {
|
||||
updatedCount++;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`[TreezProductRefresh] Upserted: ${normalizedProducts.length} (${newCount} new, ${updatedCount} updated)`);
|
||||
|
||||
// ============================================================
|
||||
// STEP 5: Update dispensary
|
||||
// ============================================================
|
||||
await pool.query(
|
||||
`UPDATE dispensaries
|
||||
SET product_count = $2,
|
||||
stage = 'production',
|
||||
updated_at = NOW()
|
||||
WHERE id = $1`,
|
||||
[dispensaryId, normalizedProducts.length]
|
||||
);
|
||||
|
||||
console.log(`[TreezProductRefresh] Completed ${dispensary.name}`);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
payloadId,
|
||||
productsProcessed: normalizedProducts.length,
|
||||
newProducts: newCount,
|
||||
updatedProducts: updatedCount,
|
||||
};
|
||||
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||||
console.error(`[TreezProductRefresh] Error:`, errorMessage);
|
||||
|
||||
return {
|
||||
success: false,
|
||||
error: errorMessage,
|
||||
payloadId,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -109,6 +109,7 @@ import { handleProductDiscoveryJane } from './handlers/product-discovery-jane';
|
||||
|
||||
// Treez Platform Handlers
|
||||
import { handleProductDiscoveryTreez } from './handlers/product-discovery-treez';
|
||||
import { handleProductRefreshTreez } from './handlers/product-refresh-treez';
|
||||
|
||||
const POLL_INTERVAL_MS = parseInt(process.env.POLL_INTERVAL_MS || '5000');
|
||||
const HEARTBEAT_INTERVAL_MS = parseInt(process.env.HEARTBEAT_INTERVAL_MS || '30000');
|
||||
@@ -242,7 +243,16 @@ function getHandlerForTask(task: WorkerTask): TaskHandler | undefined {
|
||||
console.log(`[TaskWorker] Using Treez handler for product_discovery`);
|
||||
return handleProductDiscoveryTreez;
|
||||
}
|
||||
// Treez uses shared product_refresh handler via normalizer registry
|
||||
if (role === 'product_refresh_treez') {
|
||||
console.log(`[TaskWorker] Using Treez handler for product_refresh_treez`);
|
||||
return handleProductRefreshTreez;
|
||||
}
|
||||
}
|
||||
|
||||
// Also route product_refresh_treez regardless of platform field
|
||||
if (role === 'product_refresh_treez') {
|
||||
console.log(`[TaskWorker] Using Treez handler for product_refresh_treez`);
|
||||
return handleProductRefreshTreez;
|
||||
}
|
||||
|
||||
// ==========================================================================
|
||||
|
||||
Reference in New Issue
Block a user