- Add WorkerFingerprint interface with timezone, city, state, ip, locale - Store fingerprint in TaskWorker after preflight passes - Pass fingerprint through TaskContext to handlers - Apply timezone via CDP and locale via Accept-Language header - Ensures browser fingerprint matches proxy IP location This fixes anti-detect detection where timezone/locale mismatch with proxy IP was getting blocked by Cloudflare. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
560 lines
18 KiB
TypeScript
560 lines
18 KiB
TypeScript
/**
|
|
* Treez Platform Smoke Test
|
|
*
|
|
* Discovers DOM structure and extracts products from Treez menu pages.
|
|
* Used to determine actual CSS selectors for the platform client.
|
|
*
|
|
* Usage: npx ts-node scripts/test-treez-discovery.ts
|
|
*/
|
|
|
|
import puppeteer, { Page } from 'puppeteer';
|
|
import puppeteerExtra from 'puppeteer-extra';
|
|
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|
|
|
// Register stealth plugin (even though Treez doesn't use Cloudflare, good practice)
|
|
puppeteerExtra.use(StealthPlugin());
|
|
|
|
const TEST_URL = 'https://best.treez.io/onlinemenu/?customerType=ADULT';
|
|
const STORE_ID = 'best';
|
|
|
|
interface TreezProductRaw {
|
|
productId: string;
|
|
name: string;
|
|
brand: string;
|
|
category: string;
|
|
subcategory: string;
|
|
thcPercent: number | null;
|
|
cbdPercent: number | null;
|
|
price: number | null;
|
|
priceUnit: string;
|
|
imageUrl: string | null;
|
|
inStock: boolean;
|
|
weight: string | null;
|
|
}
|
|
|
|
async function sleep(ms: number): Promise<void> {
|
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
}
|
|
|
|
/**
|
|
* Scroll to load all products (infinite scroll)
|
|
*/
|
|
async function scrollToLoadAll(page: Page, maxScrolls = 30): Promise<number> {
|
|
let previousHeight = 0;
|
|
let scrollCount = 0;
|
|
let sameHeightCount = 0;
|
|
|
|
console.log('[Scroll] Starting infinite scroll...');
|
|
|
|
while (scrollCount < maxScrolls) {
|
|
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
|
|
|
if (currentHeight === previousHeight) {
|
|
sameHeightCount++;
|
|
if (sameHeightCount >= 3) {
|
|
console.log('[Scroll] No new content after 3 attempts, stopping');
|
|
break;
|
|
}
|
|
} else {
|
|
sameHeightCount = 0;
|
|
}
|
|
|
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
|
await sleep(1500); // Wait for products to load
|
|
|
|
previousHeight = currentHeight;
|
|
scrollCount++;
|
|
|
|
// Check how many products we have
|
|
const productCount = await page.evaluate(() => {
|
|
// Try multiple possible selectors
|
|
const selectors = [
|
|
'[class*="product"]',
|
|
'[class*="Product"]',
|
|
'[data-product]',
|
|
'.menu-item',
|
|
'[class*="card"]',
|
|
'[class*="Card"]',
|
|
];
|
|
|
|
for (const sel of selectors) {
|
|
const els = document.querySelectorAll(sel);
|
|
if (els.length > 10) return els.length;
|
|
}
|
|
return 0;
|
|
});
|
|
|
|
console.log(`[Scroll] Scroll ${scrollCount}: height=${currentHeight}, products~${productCount}`);
|
|
}
|
|
|
|
return scrollCount;
|
|
}
|
|
|
|
/**
|
|
* Analyze DOM structure to find product selectors
|
|
*/
|
|
async function analyzeDOM(page: Page): Promise<void> {
|
|
console.log('\n' + '='.repeat(60));
|
|
console.log('DOM STRUCTURE ANALYSIS');
|
|
console.log('='.repeat(60));
|
|
|
|
// Find elements with "product" in class name
|
|
const productClasses = await page.evaluate(() => {
|
|
const classes = new Set<string>();
|
|
document.querySelectorAll('*').forEach((el) => {
|
|
const className = el.className;
|
|
if (typeof className === 'string' && className.toLowerCase().includes('product')) {
|
|
className.split(' ').forEach((c) => {
|
|
if (c.toLowerCase().includes('product')) {
|
|
classes.add(c);
|
|
}
|
|
});
|
|
}
|
|
});
|
|
return Array.from(classes).slice(0, 20);
|
|
});
|
|
|
|
console.log('\n[Classes containing "product"]:');
|
|
productClasses.forEach((c: string) => console.log(` .${c}`));
|
|
|
|
// Find elements with "card" in class name
|
|
const cardClasses = await page.evaluate(() => {
|
|
const classes = new Set<string>();
|
|
document.querySelectorAll('*').forEach((el) => {
|
|
const className = el.className;
|
|
if (typeof className === 'string' && className.toLowerCase().includes('card')) {
|
|
className.split(' ').forEach((c) => {
|
|
if (c.toLowerCase().includes('card')) {
|
|
classes.add(c);
|
|
}
|
|
});
|
|
}
|
|
});
|
|
return Array.from(classes).slice(0, 20);
|
|
});
|
|
|
|
console.log('\n[Classes containing "card"]:');
|
|
cardClasses.forEach((c: string) => console.log(` .${c}`));
|
|
|
|
// Find data attributes
|
|
const dataAttrs = await page.evaluate(() => {
|
|
const attrs = new Set<string>();
|
|
document.querySelectorAll('*').forEach((el) => {
|
|
Array.from(el.attributes).forEach((attr) => {
|
|
if (attr.name.startsWith('data-') && !attr.name.includes('reactid')) {
|
|
attrs.add(attr.name);
|
|
}
|
|
});
|
|
});
|
|
return Array.from(attrs).slice(0, 30);
|
|
});
|
|
|
|
console.log('\n[Data attributes found]:');
|
|
dataAttrs.forEach((attr: string) => console.log(` ${attr}`));
|
|
|
|
// Get sample HTML of potential product container
|
|
const sampleHTML = await page.evaluate(() => {
|
|
// Try to find a product container
|
|
const selectors = [
|
|
'[class*="ProductCard"]',
|
|
'[class*="product-card"]',
|
|
'[class*="menuItem"]',
|
|
'[class*="menu-item"]',
|
|
'[data-testid*="product"]',
|
|
];
|
|
|
|
for (const sel of selectors) {
|
|
const el = document.querySelector(sel);
|
|
if (el) {
|
|
return {
|
|
selector: sel,
|
|
html: el.outerHTML.slice(0, 2000),
|
|
childCount: el.children.length,
|
|
};
|
|
}
|
|
}
|
|
|
|
// Fallback: find repeating structures
|
|
const containers = document.querySelectorAll('div[class]');
|
|
const classCounts = new Map<string, number>();
|
|
|
|
containers.forEach((el) => {
|
|
if (el.children.length > 2 && el.className) {
|
|
classCounts.set(el.className, (classCounts.get(el.className) || 0) + 1);
|
|
}
|
|
});
|
|
|
|
// Find class that appears many times (likely product cards)
|
|
let bestClass = '';
|
|
let bestCount = 0;
|
|
classCounts.forEach((count, className) => {
|
|
if (count > bestCount && count > 5) {
|
|
bestCount = count;
|
|
bestClass = className;
|
|
}
|
|
});
|
|
|
|
if (bestClass) {
|
|
const el = document.querySelector(`.${bestClass.split(' ')[0]}`);
|
|
if (el) {
|
|
return {
|
|
selector: `.${bestClass.split(' ')[0]}`,
|
|
html: el.outerHTML.slice(0, 2000),
|
|
childCount: el.children.length,
|
|
count: bestCount,
|
|
};
|
|
}
|
|
}
|
|
|
|
return null;
|
|
});
|
|
|
|
if (sampleHTML) {
|
|
console.log('\n[Sample Product Container]:');
|
|
console.log(` Selector: ${sampleHTML.selector}`);
|
|
console.log(` Children: ${sampleHTML.childCount}`);
|
|
if ((sampleHTML as any).count) {
|
|
console.log(` Occurrences: ${(sampleHTML as any).count}`);
|
|
}
|
|
console.log('\n[Sample HTML (first 1000 chars)]:');
|
|
console.log(sampleHTML.html.slice(0, 1000));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Extract products using discovered selectors
|
|
* Based on DOM analysis of Treez/GapCommerce React app
|
|
*/
|
|
async function extractProducts(page: Page): Promise<TreezProductRaw[]> {
|
|
console.log('\n' + '='.repeat(60));
|
|
console.log('PRODUCT EXTRACTION');
|
|
console.log('='.repeat(60));
|
|
|
|
const products = await page.evaluate(() => {
|
|
const results: any[] = [];
|
|
|
|
// Treez uses classes like: product_product__ERWtJ
|
|
// Find all product cards using the discovered class patterns
|
|
const productSelectors = [
|
|
'[class*="product_product__"]', // Main product container
|
|
'[class*="ProductCard"]', // Alternative pattern
|
|
];
|
|
|
|
let productElements: Element[] = [];
|
|
|
|
for (const selector of productSelectors) {
|
|
const elements = document.querySelectorAll(selector);
|
|
// Filter to only get the actual product cards, not child elements
|
|
const filtered = Array.from(elements).filter(el => {
|
|
// Must have a name element and price
|
|
const hasName = el.querySelector('[class*="product__name"]') || el.querySelector('[class*="name__"]');
|
|
const hasPrice = el.querySelector('[class*="price"]');
|
|
return hasName || hasPrice;
|
|
});
|
|
|
|
if (filtered.length > 0) {
|
|
productElements = filtered;
|
|
console.log(`Found ${filtered.length} products with selector: ${selector}`);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Dedupe - some cards may be captured multiple times
|
|
const seen = new Set<string>();
|
|
|
|
// Extract data from each product element
|
|
for (const el of productElements) {
|
|
try {
|
|
// Get product name - look for name class
|
|
const nameEl = el.querySelector('[class*="product__name"], [class*="name__"]');
|
|
const name = nameEl?.textContent?.trim() || '';
|
|
|
|
if (!name || seen.has(name)) continue;
|
|
seen.add(name);
|
|
|
|
// Get product ID from link
|
|
const linkEl = el.querySelector('a[href*="/product/"]');
|
|
let productId = '';
|
|
if (linkEl) {
|
|
const href = linkEl.getAttribute('href') || '';
|
|
const match = href.match(/\/product\/([^\/\?]+)/);
|
|
productId = match ? match[1] : '';
|
|
}
|
|
if (!productId) {
|
|
productId = `treez_${name.replace(/\s+/g, '_').toLowerCase().slice(0, 30)}`;
|
|
}
|
|
|
|
// Get brand from the info section or product name parsing
|
|
const brandEl = el.querySelector('[class*="brand"], [class*="Brand"]');
|
|
let brand = brandEl?.textContent?.trim() || '';
|
|
|
|
// Get price - look for price class with $ symbol
|
|
const priceEl = el.querySelector('[class*="price__ins"], [class*="price"]');
|
|
const priceText = priceEl?.textContent || '';
|
|
const priceMatch = priceText.match(/\$(\d+(?:\.\d{2})?)/);
|
|
const price = priceMatch ? parseFloat(priceMatch[1]) : null;
|
|
|
|
// Get image URL
|
|
const imgEl = el.querySelector('img');
|
|
let imageUrl = imgEl?.getAttribute('src') || null;
|
|
// Handle Next.js image optimization URLs
|
|
if (imageUrl && imageUrl.includes('/_next/image')) {
|
|
const urlMatch = imageUrl.match(/url=([^&]+)/);
|
|
if (urlMatch) {
|
|
imageUrl = decodeURIComponent(urlMatch[1]);
|
|
}
|
|
}
|
|
|
|
// Get text content for THC/CBD extraction
|
|
const text = el.textContent || '';
|
|
|
|
// Get THC/CBD - look for patterns like "THC 25.5%" or "25.5% THC"
|
|
const thcMatch = text.match(/(?:THC[:\s]*)?(\d+(?:\.\d+)?)\s*%?\s*THC/i) ||
|
|
text.match(/THC[:\s]*(\d+(?:\.\d+)?)\s*%?/i);
|
|
const cbdMatch = text.match(/(?:CBD[:\s]*)?(\d+(?:\.\d+)?)\s*%?\s*CBD/i) ||
|
|
text.match(/CBD[:\s]*(\d+(?:\.\d+)?)\s*%?/i);
|
|
const thcPercent = thcMatch ? parseFloat(thcMatch[1]) : null;
|
|
const cbdPercent = cbdMatch ? parseFloat(cbdMatch[1]) : null;
|
|
|
|
// Get weight from name or text (e.g., "3.5G", "1G")
|
|
const weightMatch = name.match(/(\d+(?:\.\d+)?)\s*(G|g|MG|mg|OZ|oz)/i) ||
|
|
text.match(/(\d+(?:\.\d+)?)\s*(G|g|MG|mg|OZ|oz)/i);
|
|
const weight = weightMatch ? `${weightMatch[1]}${weightMatch[2].toLowerCase()}` : null;
|
|
|
|
// Price unit from weight
|
|
let priceUnit = '';
|
|
if (weight) {
|
|
priceUnit = weight;
|
|
}
|
|
|
|
// Get category/strain type
|
|
const strainTypes = ['indica', 'sativa', 'hybrid'];
|
|
let subcategory = '';
|
|
const textLower = text.toLowerCase();
|
|
for (const strain of strainTypes) {
|
|
if (textLower.includes(strain)) {
|
|
subcategory = strain;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Determine category from various signals
|
|
let category = '';
|
|
const categoryPatterns = [
|
|
{ pattern: /flower|bud/i, category: 'flower' },
|
|
{ pattern: /vape|cart|pen/i, category: 'vape' },
|
|
{ pattern: /edible|gummy|chocolate/i, category: 'edible' },
|
|
{ pattern: /concentrate|dab|wax|shatter/i, category: 'concentrate' },
|
|
{ pattern: /pre.?roll|joint/i, category: 'pre-roll' },
|
|
{ pattern: /topical|balm|cream/i, category: 'topical' },
|
|
{ pattern: /tincture/i, category: 'tincture' },
|
|
];
|
|
for (const { pattern, category: cat } of categoryPatterns) {
|
|
if (pattern.test(text)) {
|
|
category = cat;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Check stock status
|
|
const inStock = !textLower.includes('out of stock') && !textLower.includes('sold out');
|
|
|
|
results.push({
|
|
productId,
|
|
name,
|
|
brand,
|
|
category,
|
|
subcategory,
|
|
thcPercent,
|
|
cbdPercent,
|
|
price,
|
|
priceUnit,
|
|
imageUrl,
|
|
inStock,
|
|
weight,
|
|
});
|
|
} catch (err) {
|
|
console.log('Error extracting product:', err);
|
|
}
|
|
}
|
|
|
|
return results;
|
|
});
|
|
|
|
return products;
|
|
}
|
|
|
|
/**
|
|
* Bypass age gate if present
|
|
*/
|
|
async function bypassAgeGate(page: Page): Promise<boolean> {
|
|
console.log('[Age Gate] Checking for age gate...');
|
|
|
|
try {
|
|
// Wait for either age gate or main content
|
|
const ageGate = await page.$('[data-testid="age-gate-modal"], [class*="AgeGate"]');
|
|
|
|
if (ageGate) {
|
|
console.log('[Age Gate] Age gate detected, clicking confirm button...');
|
|
|
|
// Click the submit button
|
|
const submitBtn = await page.$('[data-testid="age-gate-submit-button"], button[type="submit"]');
|
|
if (submitBtn) {
|
|
await submitBtn.click();
|
|
console.log('[Age Gate] Clicked confirm button');
|
|
|
|
// Wait for age gate to disappear and menu to load
|
|
await sleep(2000);
|
|
|
|
// Wait for navigation or content change
|
|
await page.waitForFunction(
|
|
() => !document.querySelector('[data-testid="age-gate-modal"]'),
|
|
{ timeout: 10000 }
|
|
).catch(() => {
|
|
console.log('[Age Gate] Gate may still be visible, continuing anyway');
|
|
});
|
|
|
|
console.log('[Age Gate] Age gate bypassed');
|
|
return true;
|
|
} else {
|
|
console.log('[Age Gate] No submit button found');
|
|
}
|
|
} else {
|
|
console.log('[Age Gate] No age gate detected');
|
|
}
|
|
|
|
return false;
|
|
} catch (err: any) {
|
|
console.log(`[Age Gate] Error: ${err.message}`);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
async function main() {
|
|
console.log('='.repeat(60));
|
|
console.log('TREEZ PLATFORM SMOKE TEST');
|
|
console.log('='.repeat(60));
|
|
console.log(`Store ID: ${STORE_ID}`);
|
|
console.log(`URL: ${TEST_URL}`);
|
|
console.log('');
|
|
|
|
const browser = await puppeteerExtra.launch({
|
|
headless: true,
|
|
args: [
|
|
'--no-sandbox',
|
|
'--disable-setuid-sandbox',
|
|
'--disable-dev-shm-usage',
|
|
'--disable-blink-features=AutomationControlled',
|
|
],
|
|
});
|
|
|
|
try {
|
|
const page = await browser.newPage();
|
|
|
|
// Set viewport
|
|
await page.setViewport({ width: 1920, height: 1080 });
|
|
|
|
// Set user agent
|
|
await page.setUserAgent(
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
|
|
);
|
|
|
|
console.log('[Navigation] Going to Treez menu page...');
|
|
await page.goto(TEST_URL, {
|
|
waitUntil: 'networkidle2',
|
|
timeout: 60000,
|
|
});
|
|
|
|
console.log('[Navigation] Page loaded, waiting for React app...');
|
|
await sleep(2000);
|
|
|
|
// Bypass age gate
|
|
await bypassAgeGate(page);
|
|
|
|
// Wait for menu content to load
|
|
console.log('[Navigation] Waiting for menu content...');
|
|
await sleep(3000);
|
|
|
|
// Check if page loaded correctly
|
|
const pageTitle = await page.title();
|
|
console.log(`[Navigation] Page title: ${pageTitle}`);
|
|
|
|
// Take a screenshot for debugging
|
|
await page.screenshot({ path: '/tmp/treez-smoke-test.png', fullPage: false });
|
|
console.log('[Debug] Screenshot saved to /tmp/treez-smoke-test.png');
|
|
|
|
// Analyze DOM structure
|
|
await analyzeDOM(page);
|
|
|
|
// Scroll to load all products
|
|
await scrollToLoadAll(page);
|
|
|
|
// Extract products
|
|
const products = await extractProducts(page);
|
|
|
|
console.log('\n' + '='.repeat(60));
|
|
console.log('RESULTS');
|
|
console.log('='.repeat(60));
|
|
console.log(`Total products extracted: ${products.length}`);
|
|
|
|
if (products.length > 0) {
|
|
// Show statistics
|
|
const withPrice = products.filter((p) => p.price !== null).length;
|
|
const withThc = products.filter((p) => p.thcPercent !== null).length;
|
|
const withBrand = products.filter((p) => p.brand).length;
|
|
const withImage = products.filter((p) => p.imageUrl).length;
|
|
|
|
console.log(`\n[Data Quality]`);
|
|
console.log(` With price: ${withPrice}/${products.length} (${Math.round((withPrice / products.length) * 100)}%)`);
|
|
console.log(` With THC%: ${withThc}/${products.length} (${Math.round((withThc / products.length) * 100)}%)`);
|
|
console.log(` With brand: ${withBrand}/${products.length} (${Math.round((withBrand / products.length) * 100)}%)`);
|
|
console.log(` With image: ${withImage}/${products.length} (${Math.round((withImage / products.length) * 100)}%)`);
|
|
|
|
// Show sample products
|
|
console.log('\n[Sample Products (first 10)]:');
|
|
for (const p of products.slice(0, 10)) {
|
|
console.log(`\n ${p.name}`);
|
|
console.log(` ID: ${p.productId}`);
|
|
console.log(` Brand: ${p.brand || 'N/A'}`);
|
|
console.log(` Category: ${p.category || 'N/A'} / ${p.subcategory || 'N/A'}`);
|
|
console.log(` THC: ${p.thcPercent !== null ? p.thcPercent + '%' : 'N/A'}`);
|
|
console.log(` CBD: ${p.cbdPercent !== null ? p.cbdPercent + '%' : 'N/A'}`);
|
|
console.log(` Price: ${p.price !== null ? '$' + p.price : 'N/A'} ${p.priceUnit}`);
|
|
console.log(` Weight: ${p.weight || 'N/A'}`);
|
|
console.log(` Image: ${p.imageUrl?.slice(0, 60) || 'N/A'}...`);
|
|
console.log(` In Stock: ${p.inStock}`);
|
|
}
|
|
|
|
// Save full results to file
|
|
const fs = await import('fs');
|
|
fs.writeFileSync('/tmp/treez-products.json', JSON.stringify(products, null, 2));
|
|
console.log('\n[Debug] Full product list saved to /tmp/treez-products.json');
|
|
} else {
|
|
console.log('\n[WARNING] No products extracted!');
|
|
console.log('Check /tmp/treez-smoke-test.png for page state');
|
|
|
|
// Dump page HTML for debugging
|
|
const html = await page.content();
|
|
const fs = await import('fs');
|
|
fs.writeFileSync('/tmp/treez-page.html', html);
|
|
console.log('[Debug] Page HTML saved to /tmp/treez-page.html');
|
|
}
|
|
|
|
console.log('\n' + '='.repeat(60));
|
|
console.log(products.length > 0 ? 'SMOKE TEST PASSED' : 'SMOKE TEST NEEDS ADJUSTMENT');
|
|
console.log('='.repeat(60));
|
|
|
|
} catch (error: any) {
|
|
console.error('\n' + '='.repeat(60));
|
|
console.error('SMOKE TEST FAILED');
|
|
console.error('='.repeat(60));
|
|
console.error(`Error: ${error.message}`);
|
|
console.error(error.stack);
|
|
process.exit(1);
|
|
} finally {
|
|
await browser.close();
|
|
}
|
|
}
|
|
|
|
main().catch(console.error);
|