- Document priority order (Evomi API first, DB fallback) - List environment variables and defaults - Show K8s secret location - Explain proxy URL format with geo targeting 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
258 lines
7.9 KiB
TypeScript
258 lines
7.9 KiB
TypeScript
/**
|
|
* Test Treez brand-based product extraction
|
|
* 1. Load /brands page
|
|
* 2. Click "load more brands" to get all brands
|
|
* 3. Extract brand URLs
|
|
* 4. Visit each brand and extract products
|
|
*/
|
|
|
|
import puppeteer, { Page } from 'puppeteer';
|
|
|
|
const STORE_ID = 'best';
|
|
|
|
async function sleep(ms: number): Promise<void> {
|
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
}
|
|
|
|
async function bypassAgeGate(page: Page): Promise<void> {
|
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
|
if (ageGate) {
|
|
console.log('[AgeGate] Detected, bypassing...');
|
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
|
if (btn) await btn.click();
|
|
await sleep(2000);
|
|
}
|
|
}
|
|
|
|
async function loadAllBrands(page: Page): Promise<void> {
|
|
console.log('[Brands] Looking for "load more" option...');
|
|
|
|
// Look for select/dropdown with "load more" or "all brands" option
|
|
const selectInfo = await page.evaluate(() => {
|
|
const selects = document.querySelectorAll('select');
|
|
const info: { selector: string; options: string[] }[] = [];
|
|
|
|
selects.forEach((sel, i) => {
|
|
const options = Array.from(sel.options).map(o => o.text);
|
|
info.push({ selector: `select:nth-of-type(${i + 1})`, options });
|
|
});
|
|
|
|
return info;
|
|
});
|
|
|
|
console.log('[Brands] Found selects:', JSON.stringify(selectInfo, null, 2));
|
|
|
|
// Look for any button or link with "load more" or "show all"
|
|
const loadMoreButtons = await page.evaluate(() => {
|
|
const elements = document.querySelectorAll('button, a, [role="button"]');
|
|
const matches: { text: string; tag: string }[] = [];
|
|
|
|
elements.forEach(el => {
|
|
const text = el.textContent?.toLowerCase() || '';
|
|
if (text.includes('load more') || text.includes('show all') || text.includes('view all')) {
|
|
matches.push({ text: el.textContent?.trim() || '', tag: el.tagName });
|
|
}
|
|
});
|
|
|
|
return matches;
|
|
});
|
|
|
|
console.log('[Brands] Found load more buttons:', loadMoreButtons);
|
|
|
|
// Try to find and interact with the brands dropdown
|
|
// First, let's see all interactive elements with "brand" in them
|
|
const brandElements = await page.evaluate(() => {
|
|
const all = document.querySelectorAll('*');
|
|
const matches: { tag: string; class: string; text: string }[] = [];
|
|
|
|
all.forEach(el => {
|
|
const className = el.className?.toString?.() || '';
|
|
const text = el.textContent?.trim().slice(0, 100) || '';
|
|
if (className.toLowerCase().includes('brand') || className.toLowerCase().includes('select')) {
|
|
matches.push({
|
|
tag: el.tagName,
|
|
class: className.slice(0, 100),
|
|
text: text.slice(0, 50),
|
|
});
|
|
}
|
|
});
|
|
|
|
return matches.slice(0, 20);
|
|
});
|
|
|
|
console.log('[Brands] Brand-related elements:', JSON.stringify(brandElements.slice(0, 10), null, 2));
|
|
}
|
|
|
|
async function extractBrandLinks(page: Page): Promise<{ name: string; url: string }[]> {
|
|
const brands = await page.evaluate(() => {
|
|
const links: { name: string; url: string }[] = [];
|
|
|
|
// Look for brand cards/links
|
|
const selectors = [
|
|
'a[href*="/brand/"]',
|
|
'a[href*="/brands/"]',
|
|
'[class*="brand"] a',
|
|
'[class*="Brand"] a',
|
|
];
|
|
|
|
selectors.forEach(sel => {
|
|
document.querySelectorAll(sel).forEach(el => {
|
|
const href = el.getAttribute('href');
|
|
const name = el.textContent?.trim() || '';
|
|
if (href && name && !links.some(l => l.url === href)) {
|
|
links.push({ name, url: href });
|
|
}
|
|
});
|
|
});
|
|
|
|
return links;
|
|
});
|
|
|
|
return brands;
|
|
}
|
|
|
|
async function extractProductsFromBrandPage(page: Page): Promise<any[]> {
|
|
// Scroll to load all products
|
|
let previousHeight = 0;
|
|
let scrollCount = 0;
|
|
let sameHeightCount = 0;
|
|
|
|
while (scrollCount < 20) {
|
|
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
|
|
|
if (currentHeight === previousHeight) {
|
|
sameHeightCount++;
|
|
if (sameHeightCount >= 3) break;
|
|
} else {
|
|
sameHeightCount = 0;
|
|
}
|
|
|
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
|
await sleep(1000);
|
|
|
|
previousHeight = currentHeight;
|
|
scrollCount++;
|
|
}
|
|
|
|
// Extract products
|
|
const products = await page.evaluate(() => {
|
|
const results: any[] = [];
|
|
const seen = new Set<string>();
|
|
|
|
document.querySelectorAll('[class*="product_product__"]').forEach(el => {
|
|
const nameEl = el.querySelector('[class*="product__name"], [class*="name__"]');
|
|
const name = nameEl?.textContent?.trim() || '';
|
|
|
|
if (!name || seen.has(name)) return;
|
|
seen.add(name);
|
|
|
|
const priceEl = el.querySelector('[class*="price"]');
|
|
const priceText = priceEl?.textContent || '';
|
|
const priceMatch = priceText.match(/\$(\d+(?:\.\d{2})?)/);
|
|
const price = priceMatch ? parseFloat(priceMatch[1]) : null;
|
|
|
|
const linkEl = el.querySelector('a[href*="/product/"]');
|
|
let productId = '';
|
|
if (linkEl) {
|
|
const href = linkEl.getAttribute('href') || '';
|
|
const match = href.match(/\/product\/([^\/?]+)/);
|
|
productId = match ? match[1] : '';
|
|
}
|
|
|
|
results.push({
|
|
productId: productId || `treez_${name.replace(/\s+/g, '_').toLowerCase().slice(0, 30)}`,
|
|
name,
|
|
price,
|
|
});
|
|
});
|
|
|
|
return results;
|
|
});
|
|
|
|
return products;
|
|
}
|
|
|
|
async function main() {
|
|
console.log('='.repeat(60));
|
|
console.log('Testing Treez Brand-Based Extraction');
|
|
console.log('='.repeat(60));
|
|
|
|
const browser = await puppeteer.launch({
|
|
headless: true,
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
|
});
|
|
|
|
const page = await browser.newPage();
|
|
await page.setViewport({ width: 1920, height: 1080 });
|
|
|
|
// Block images
|
|
await page.setRequestInterception(true);
|
|
page.on('request', (req) => {
|
|
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
|
req.abort();
|
|
} else {
|
|
req.continue();
|
|
}
|
|
});
|
|
|
|
try {
|
|
// Navigate to brands page
|
|
const brandsUrl = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
|
|
console.log(`\n[1] Navigating to ${brandsUrl}`);
|
|
await page.goto(brandsUrl, { waitUntil: 'networkidle2', timeout: 60000 });
|
|
await sleep(2000);
|
|
await bypassAgeGate(page);
|
|
await sleep(1000);
|
|
|
|
// Screenshot to see what we're working with
|
|
await page.screenshot({ path: '/tmp/treez-brands-page.png', fullPage: false });
|
|
console.log('[1] Screenshot saved to /tmp/treez-brands-page.png');
|
|
|
|
// Try to load all brands
|
|
console.log('\n[2] Exploring brand selection options...');
|
|
await loadAllBrands(page);
|
|
|
|
// Extract brand links
|
|
console.log('\n[3] Extracting brand links...');
|
|
const brandLinks = await extractBrandLinks(page);
|
|
console.log(`Found ${brandLinks.length} brand links:`);
|
|
brandLinks.slice(0, 10).forEach(b => console.log(` - ${b.name}: ${b.url}`));
|
|
|
|
// If we found brand links, visit a couple to test
|
|
if (brandLinks.length > 0) {
|
|
console.log('\n[4] Testing product extraction from first 3 brands...');
|
|
|
|
let totalProducts = 0;
|
|
const allProducts: any[] = [];
|
|
|
|
for (const brand of brandLinks.slice(0, 3)) {
|
|
const brandUrl = brand.url.startsWith('http')
|
|
? brand.url
|
|
: `https://${STORE_ID}.treez.io${brand.url}`;
|
|
|
|
console.log(`\n Visiting brand: ${brand.name}`);
|
|
console.log(` URL: ${brandUrl}`);
|
|
|
|
await page.goto(brandUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
await sleep(2000);
|
|
|
|
const products = await extractProductsFromBrandPage(page);
|
|
console.log(` Products found: ${products.length}`);
|
|
|
|
allProducts.push(...products.map(p => ({ ...p, brand: brand.name })));
|
|
totalProducts += products.length;
|
|
}
|
|
|
|
console.log(`\n[5] Summary from 3 brands: ${totalProducts} products`);
|
|
console.log(`Estimated total (${brandLinks.length} brands): ~${Math.round(totalProducts / 3 * brandLinks.length)} products`);
|
|
}
|
|
|
|
} catch (error: any) {
|
|
console.error('Error:', error.message);
|
|
} finally {
|
|
await browser.close();
|
|
}
|
|
}
|
|
|
|
main().catch(console.error);
|