feat(treez): CDP interception client for Elasticsearch API capture

Rewrites Treez platform client to use CDP (Chrome DevTools Protocol)
interception instead of DOM scraping. Key changes:

- Uses Puppeteer Stealth plugin to bypass headless detection
- Intercepts Elasticsearch API responses via CDP Network.responseReceived
- Captures full product data including inventory levels (availableUnits)
- Adds comprehensive TypeScript types for all Treez data structures
- Updates queries.ts with automatic session management
- Fixes product-discovery-treez handler for new API shape

Tested with Best Dispensary: 142 products across 10 categories captured
with inventory data, pricing, and lab results.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-13 19:25:49 -07:00
parent 83f629fec4
commit a020e31a46
6 changed files with 1159 additions and 502 deletions

View File

@@ -1,107 +1,172 @@
/**
* Test script for Treez platform client
* Tests the new Treez integration with Best Dispensary
* ============================================================
* TREEZ CLIENT TEST SCRIPT
* ============================================================
*
* Tests the Treez CDP interception client using Best Dispensary.
*
* This verifies:
* - Stealth plugin bypasses headless detection
* - CDP intercepts Elasticsearch API responses
* - Products are captured and normalized correctly
* - Inventory data is available
*
* Usage: npx ts-node scripts/test-treez-client.ts
*
* ============================================================
*/
import {
fetchProductsByStoreId,
} from '../src/platforms/treez';
import { TreezNormalizer } from '../src/hydration/normalizers/treez';
import { fetchProductsFromUrl } from '../src/platforms/treez';
const TEST_STORE_ID = 'best';
const TEST_URL = 'https://shop.bestdispensary.com/shop';
async function main() {
console.log('='.repeat(60));
console.log('Treez Platform Client Test');
console.log('TREEZ CLIENT TEST - CDP INTERCEPTION');
console.log('='.repeat(60));
console.log(`Test Store: ${TEST_STORE_ID}`);
console.log(`URL: ${TEST_URL}`);
console.log('Method: Puppeteer + Stealth + CDP response capture');
console.log('');
try {
// Test 1: Fetch products from store
console.log('[Test 1] Fetching products from Treez store...');
const result = await fetchProductsByStoreId(TEST_STORE_ID);
console.log('[Starting] Launching browser with Stealth plugin...\n');
console.log('');
console.log('[Results]');
console.log(` Store: ${result.store.name}`);
console.log(` Store ID: ${result.store.storeId}`);
console.log(` Products captured: ${result.products.length}`);
console.log(` Scroll count: ${result.scrollCount}`);
const result = await fetchProductsFromUrl(TEST_URL);
if (result.products.length > 0) {
console.log('');
console.log('[Sample Products (first 5)]');
for (const p of result.products.slice(0, 5)) {
console.log(` - ${p.name}`);
console.log(` Brand: ${p.brand || 'N/A'}`);
console.log(` Category: ${p.category || 'N/A'} / ${p.subcategory || 'N/A'}`);
console.log(` Price: ${p.price ? '$' + p.price : 'N/A'}`);
console.log(` THC: ${p.thcPercent !== null ? p.thcPercent + '%' : 'N/A'}`);
}
console.log('\n' + '='.repeat(60));
console.log('RESULTS');
console.log('='.repeat(60));
console.log(`Total products: ${result.totalCaptured}`);
console.log(`Store ID: ${result.storeId || 'N/A (custom domain)'}`);
console.log(`Source URL: ${result.sourceUrl}`);
console.log(`Fetched at: ${result.fetchedAt.toISOString()}`);
// Test 2: Normalize products
console.log('');
console.log('[Test 2] Testing normalizer...');
const normalizer = new TreezNormalizer();
// Build a fake payload structure
const fakePayload = {
id: 'test-payload',
dispensary_id: 9999,
crawl_run_id: null,
platform: 'treez',
payload_version: 1,
raw_json: { products: result.products },
product_count: result.products.length,
pricing_type: null,
crawl_mode: null,
fetched_at: new Date(),
processed: false,
normalized_at: null,
hydration_error: null,
hydration_attempts: 0,
created_at: new Date(),
};
const normalized = normalizer.normalize(fakePayload);
console.log(` Products normalized: ${normalized.products.length}`);
console.log(` Brands extracted: ${normalized.brands.length}`);
console.log(` Categories extracted: ${normalized.categories.length}`);
console.log(` Errors: ${normalized.errors.length}`);
if (normalized.products.length > 0) {
console.log('');
console.log('[Sample Normalized Product]');
const np = normalized.products[0];
console.log(` External ID: ${np.externalProductId}`);
console.log(` Name: ${np.name}`);
console.log(` Brand: ${np.brandName}`);
console.log(` Category: ${np.category}`);
console.log(` Type: ${np.type}`);
console.log(` Strain: ${np.strainType}`);
console.log(` THC: ${np.thcPercent !== null ? np.thcPercent + '%' : 'N/A'}`);
console.log(` CBD: ${np.cbdPercent !== null ? np.cbdPercent + '%' : 'N/A'}`);
console.log(` Image: ${np.primaryImageUrl?.slice(0, 60) || 'N/A'}...`);
const pricing = normalized.pricing.get(np.externalProductId);
if (pricing) {
console.log(` Price (cents): ${pricing.priceRec}`);
}
}
if (result.products.length === 0) {
console.log('\n[WARNING] No products captured!');
console.log('This could mean:');
console.log(' - Stealth plugin is not bypassing detection');
console.log(' - CDP is not intercepting the correct URLs');
console.log(' - Page structure has changed');
process.exit(1);
}
console.log('');
// Show sample raw product
console.log('\n' + '='.repeat(60));
console.log('SAMPLE RAW PRODUCT (from Elasticsearch)');
console.log('='.repeat(60));
const raw = result.products[0];
console.log(JSON.stringify({
id: raw.id,
name: raw.name,
menuTitle: raw.menuTitle,
brand: raw.brand,
category: raw.category,
subtype: raw.subtype,
status: raw.status,
availableUnits: raw.availableUnits,
customMinPrice: raw.customMinPrice,
customMaxPrice: raw.customMaxPrice,
isActive: raw.isActive,
isAboveThreshold: raw.isAboveThreshold,
}, null, 2));
// Show sample normalized product
console.log('\n' + '='.repeat(60));
console.log('SAMPLE NORMALIZED PRODUCT');
console.log('='.repeat(60));
const normalized = result.normalized[0];
console.log(JSON.stringify({
id: normalized.id,
name: normalized.name,
brand: normalized.brand,
category: normalized.category,
subtype: normalized.subtype,
price: normalized.price,
priceMin: normalized.priceMin,
priceMax: normalized.priceMax,
discountedPrice: normalized.discountedPrice,
discountPercent: normalized.discountPercent,
availableUnits: normalized.availableUnits,
inStock: normalized.inStock,
thcPercent: normalized.thcPercent,
cbdPercent: normalized.cbdPercent,
strainType: normalized.strainType,
effects: normalized.effects,
flavors: normalized.flavors,
imageUrl: normalized.imageUrl,
images: normalized.images?.slice(0, 2),
}, null, 2));
// Brand breakdown
console.log('\n' + '='.repeat(60));
console.log('BRANDS (top 15)');
console.log('='.repeat(60));
const brandCounts = new Map<string, number>();
for (const p of result.normalized) {
const brand = p.brand || 'Unknown';
brandCounts.set(brand, (brandCounts.get(brand) || 0) + 1);
}
const sorted = [...brandCounts.entries()].sort((a, b) => b[1] - a[1]);
console.log(`Total unique brands: ${sorted.length}\n`);
sorted.slice(0, 15).forEach(([brand, count]) => {
console.log(` ${brand}: ${count} products`);
});
// Category breakdown
console.log('\n' + '='.repeat(60));
console.log('CATEGORIES');
console.log('='.repeat(60));
const categoryCounts = new Map<string, number>();
for (const p of result.normalized) {
const cat = p.category || 'Unknown';
categoryCounts.set(cat, (categoryCounts.get(cat) || 0) + 1);
}
const catSorted = [...categoryCounts.entries()].sort((a, b) => b[1] - a[1]);
catSorted.forEach(([cat, count]) => {
console.log(` ${cat}: ${count} products`);
});
// Inventory stats
console.log('\n' + '='.repeat(60));
console.log('INVENTORY STATS');
console.log('='.repeat(60));
const inStock = result.normalized.filter(p => p.inStock).length;
const outOfStock = result.normalized.filter(p => !p.inStock).length;
const hasInventoryData = result.normalized.filter(p => p.availableUnits > 0).length;
console.log(`In stock: ${inStock}`);
console.log(`Out of stock: ${outOfStock}`);
console.log(`With inventory levels: ${hasInventoryData}`);
// Show inventory examples
if (hasInventoryData > 0) {
console.log('\nSample inventory levels:');
result.normalized
.filter(p => p.availableUnits > 0)
.slice(0, 5)
.forEach(p => {
console.log(` ${p.name}: ${p.availableUnits} units`);
});
}
// Check for THC/CBD data
const hasThc = result.normalized.filter(p => p.thcPercent !== null).length;
const hasCbd = result.normalized.filter(p => p.cbdPercent !== null).length;
console.log(`\nWith THC data: ${hasThc} (${Math.round(hasThc / result.totalCaptured * 100)}%)`);
console.log(`With CBD data: ${hasCbd} (${Math.round(hasCbd / result.totalCaptured * 100)}%)`);
// Check for images
const hasImages = result.normalized.filter(p => p.imageUrl).length;
console.log(`With images: ${hasImages} (${Math.round(hasImages / result.totalCaptured * 100)}%)`);
console.log('\n' + '='.repeat(60));
console.log('TEST PASSED');
console.log('='.repeat(60));
} catch (error: any) {
console.error('');
console.error('='.repeat(60));
console.error('\n' + '='.repeat(60));
console.error('TEST FAILED');
console.error('='.repeat(60));
console.error(`Error: ${error.message}`);