feat(treez): CDP interception client for Elasticsearch API capture
Rewrites Treez platform client to use CDP (Chrome DevTools Protocol) interception instead of DOM scraping. Key changes: - Uses Puppeteer Stealth plugin to bypass headless detection - Intercepts Elasticsearch API responses via CDP Network.responseReceived - Captures full product data including inventory levels (availableUnits) - Adds comprehensive TypeScript types for all Treez data structures - Updates queries.ts with automatic session management - Fixes product-discovery-treez handler for new API shape Tested with Best Dispensary: 142 products across 10 categories captured with inventory data, pricing, and lab results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,107 +1,172 @@
|
||||
/**
|
||||
* Test script for Treez platform client
|
||||
* Tests the new Treez integration with Best Dispensary
|
||||
* ============================================================
|
||||
* TREEZ CLIENT TEST SCRIPT
|
||||
* ============================================================
|
||||
*
|
||||
* Tests the Treez CDP interception client using Best Dispensary.
|
||||
*
|
||||
* This verifies:
|
||||
* - Stealth plugin bypasses headless detection
|
||||
* - CDP intercepts Elasticsearch API responses
|
||||
* - Products are captured and normalized correctly
|
||||
* - Inventory data is available
|
||||
*
|
||||
* Usage: npx ts-node scripts/test-treez-client.ts
|
||||
*
|
||||
* ============================================================
|
||||
*/
|
||||
|
||||
import {
|
||||
fetchProductsByStoreId,
|
||||
} from '../src/platforms/treez';
|
||||
import { TreezNormalizer } from '../src/hydration/normalizers/treez';
|
||||
import { fetchProductsFromUrl } from '../src/platforms/treez';
|
||||
|
||||
const TEST_STORE_ID = 'best';
|
||||
const TEST_URL = 'https://shop.bestdispensary.com/shop';
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('Treez Platform Client Test');
|
||||
console.log('TREEZ CLIENT TEST - CDP INTERCEPTION');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Test Store: ${TEST_STORE_ID}`);
|
||||
console.log(`URL: ${TEST_URL}`);
|
||||
console.log('Method: Puppeteer + Stealth + CDP response capture');
|
||||
console.log('');
|
||||
|
||||
try {
|
||||
// Test 1: Fetch products from store
|
||||
console.log('[Test 1] Fetching products from Treez store...');
|
||||
const result = await fetchProductsByStoreId(TEST_STORE_ID);
|
||||
console.log('[Starting] Launching browser with Stealth plugin...\n');
|
||||
|
||||
console.log('');
|
||||
console.log('[Results]');
|
||||
console.log(` Store: ${result.store.name}`);
|
||||
console.log(` Store ID: ${result.store.storeId}`);
|
||||
console.log(` Products captured: ${result.products.length}`);
|
||||
console.log(` Scroll count: ${result.scrollCount}`);
|
||||
const result = await fetchProductsFromUrl(TEST_URL);
|
||||
|
||||
if (result.products.length > 0) {
|
||||
console.log('');
|
||||
console.log('[Sample Products (first 5)]');
|
||||
for (const p of result.products.slice(0, 5)) {
|
||||
console.log(` - ${p.name}`);
|
||||
console.log(` Brand: ${p.brand || 'N/A'}`);
|
||||
console.log(` Category: ${p.category || 'N/A'} / ${p.subcategory || 'N/A'}`);
|
||||
console.log(` Price: ${p.price ? '$' + p.price : 'N/A'}`);
|
||||
console.log(` THC: ${p.thcPercent !== null ? p.thcPercent + '%' : 'N/A'}`);
|
||||
}
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('RESULTS');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Total products: ${result.totalCaptured}`);
|
||||
console.log(`Store ID: ${result.storeId || 'N/A (custom domain)'}`);
|
||||
console.log(`Source URL: ${result.sourceUrl}`);
|
||||
console.log(`Fetched at: ${result.fetchedAt.toISOString()}`);
|
||||
|
||||
// Test 2: Normalize products
|
||||
console.log('');
|
||||
console.log('[Test 2] Testing normalizer...');
|
||||
const normalizer = new TreezNormalizer();
|
||||
|
||||
// Build a fake payload structure
|
||||
const fakePayload = {
|
||||
id: 'test-payload',
|
||||
dispensary_id: 9999,
|
||||
crawl_run_id: null,
|
||||
platform: 'treez',
|
||||
payload_version: 1,
|
||||
raw_json: { products: result.products },
|
||||
product_count: result.products.length,
|
||||
pricing_type: null,
|
||||
crawl_mode: null,
|
||||
fetched_at: new Date(),
|
||||
processed: false,
|
||||
normalized_at: null,
|
||||
hydration_error: null,
|
||||
hydration_attempts: 0,
|
||||
created_at: new Date(),
|
||||
};
|
||||
|
||||
const normalized = normalizer.normalize(fakePayload);
|
||||
|
||||
console.log(` Products normalized: ${normalized.products.length}`);
|
||||
console.log(` Brands extracted: ${normalized.brands.length}`);
|
||||
console.log(` Categories extracted: ${normalized.categories.length}`);
|
||||
console.log(` Errors: ${normalized.errors.length}`);
|
||||
|
||||
if (normalized.products.length > 0) {
|
||||
console.log('');
|
||||
console.log('[Sample Normalized Product]');
|
||||
const np = normalized.products[0];
|
||||
console.log(` External ID: ${np.externalProductId}`);
|
||||
console.log(` Name: ${np.name}`);
|
||||
console.log(` Brand: ${np.brandName}`);
|
||||
console.log(` Category: ${np.category}`);
|
||||
console.log(` Type: ${np.type}`);
|
||||
console.log(` Strain: ${np.strainType}`);
|
||||
console.log(` THC: ${np.thcPercent !== null ? np.thcPercent + '%' : 'N/A'}`);
|
||||
console.log(` CBD: ${np.cbdPercent !== null ? np.cbdPercent + '%' : 'N/A'}`);
|
||||
console.log(` Image: ${np.primaryImageUrl?.slice(0, 60) || 'N/A'}...`);
|
||||
|
||||
const pricing = normalized.pricing.get(np.externalProductId);
|
||||
if (pricing) {
|
||||
console.log(` Price (cents): ${pricing.priceRec}`);
|
||||
}
|
||||
}
|
||||
if (result.products.length === 0) {
|
||||
console.log('\n[WARNING] No products captured!');
|
||||
console.log('This could mean:');
|
||||
console.log(' - Stealth plugin is not bypassing detection');
|
||||
console.log(' - CDP is not intercepting the correct URLs');
|
||||
console.log(' - Page structure has changed');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log('');
|
||||
// Show sample raw product
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('SAMPLE RAW PRODUCT (from Elasticsearch)');
|
||||
console.log('='.repeat(60));
|
||||
const raw = result.products[0];
|
||||
console.log(JSON.stringify({
|
||||
id: raw.id,
|
||||
name: raw.name,
|
||||
menuTitle: raw.menuTitle,
|
||||
brand: raw.brand,
|
||||
category: raw.category,
|
||||
subtype: raw.subtype,
|
||||
status: raw.status,
|
||||
availableUnits: raw.availableUnits,
|
||||
customMinPrice: raw.customMinPrice,
|
||||
customMaxPrice: raw.customMaxPrice,
|
||||
isActive: raw.isActive,
|
||||
isAboveThreshold: raw.isAboveThreshold,
|
||||
}, null, 2));
|
||||
|
||||
// Show sample normalized product
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('SAMPLE NORMALIZED PRODUCT');
|
||||
console.log('='.repeat(60));
|
||||
const normalized = result.normalized[0];
|
||||
console.log(JSON.stringify({
|
||||
id: normalized.id,
|
||||
name: normalized.name,
|
||||
brand: normalized.brand,
|
||||
category: normalized.category,
|
||||
subtype: normalized.subtype,
|
||||
price: normalized.price,
|
||||
priceMin: normalized.priceMin,
|
||||
priceMax: normalized.priceMax,
|
||||
discountedPrice: normalized.discountedPrice,
|
||||
discountPercent: normalized.discountPercent,
|
||||
availableUnits: normalized.availableUnits,
|
||||
inStock: normalized.inStock,
|
||||
thcPercent: normalized.thcPercent,
|
||||
cbdPercent: normalized.cbdPercent,
|
||||
strainType: normalized.strainType,
|
||||
effects: normalized.effects,
|
||||
flavors: normalized.flavors,
|
||||
imageUrl: normalized.imageUrl,
|
||||
images: normalized.images?.slice(0, 2),
|
||||
}, null, 2));
|
||||
|
||||
// Brand breakdown
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('BRANDS (top 15)');
|
||||
console.log('='.repeat(60));
|
||||
const brandCounts = new Map<string, number>();
|
||||
for (const p of result.normalized) {
|
||||
const brand = p.brand || 'Unknown';
|
||||
brandCounts.set(brand, (brandCounts.get(brand) || 0) + 1);
|
||||
}
|
||||
|
||||
const sorted = [...brandCounts.entries()].sort((a, b) => b[1] - a[1]);
|
||||
console.log(`Total unique brands: ${sorted.length}\n`);
|
||||
sorted.slice(0, 15).forEach(([brand, count]) => {
|
||||
console.log(` ${brand}: ${count} products`);
|
||||
});
|
||||
|
||||
// Category breakdown
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('CATEGORIES');
|
||||
console.log('='.repeat(60));
|
||||
const categoryCounts = new Map<string, number>();
|
||||
for (const p of result.normalized) {
|
||||
const cat = p.category || 'Unknown';
|
||||
categoryCounts.set(cat, (categoryCounts.get(cat) || 0) + 1);
|
||||
}
|
||||
|
||||
const catSorted = [...categoryCounts.entries()].sort((a, b) => b[1] - a[1]);
|
||||
catSorted.forEach(([cat, count]) => {
|
||||
console.log(` ${cat}: ${count} products`);
|
||||
});
|
||||
|
||||
// Inventory stats
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('INVENTORY STATS');
|
||||
console.log('='.repeat(60));
|
||||
const inStock = result.normalized.filter(p => p.inStock).length;
|
||||
const outOfStock = result.normalized.filter(p => !p.inStock).length;
|
||||
const hasInventoryData = result.normalized.filter(p => p.availableUnits > 0).length;
|
||||
|
||||
console.log(`In stock: ${inStock}`);
|
||||
console.log(`Out of stock: ${outOfStock}`);
|
||||
console.log(`With inventory levels: ${hasInventoryData}`);
|
||||
|
||||
// Show inventory examples
|
||||
if (hasInventoryData > 0) {
|
||||
console.log('\nSample inventory levels:');
|
||||
result.normalized
|
||||
.filter(p => p.availableUnits > 0)
|
||||
.slice(0, 5)
|
||||
.forEach(p => {
|
||||
console.log(` ${p.name}: ${p.availableUnits} units`);
|
||||
});
|
||||
}
|
||||
|
||||
// Check for THC/CBD data
|
||||
const hasThc = result.normalized.filter(p => p.thcPercent !== null).length;
|
||||
const hasCbd = result.normalized.filter(p => p.cbdPercent !== null).length;
|
||||
console.log(`\nWith THC data: ${hasThc} (${Math.round(hasThc / result.totalCaptured * 100)}%)`);
|
||||
console.log(`With CBD data: ${hasCbd} (${Math.round(hasCbd / result.totalCaptured * 100)}%)`);
|
||||
|
||||
// Check for images
|
||||
const hasImages = result.normalized.filter(p => p.imageUrl).length;
|
||||
console.log(`With images: ${hasImages} (${Math.round(hasImages / result.totalCaptured * 100)}%)`);
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('TEST PASSED');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('');
|
||||
console.error('='.repeat(60));
|
||||
console.error('\n' + '='.repeat(60));
|
||||
console.error('TEST FAILED');
|
||||
console.error('='.repeat(60));
|
||||
console.error(`Error: ${error.message}`);
|
||||
|
||||
Reference in New Issue
Block a user