docs: Add Evomi residential proxy API documentation
- Document priority order (Evomi API first, DB fallback) - List environment variables and defaults - Show K8s secret location - Explain proxy URL format with geo targeting 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
257
backend/scripts/test-treez-brands.ts
Normal file
257
backend/scripts/test-treez-brands.ts
Normal file
@@ -0,0 +1,257 @@
|
||||
/**
|
||||
* Test Treez brand-based product extraction
|
||||
* 1. Load /brands page
|
||||
* 2. Click "load more brands" to get all brands
|
||||
* 3. Extract brand URLs
|
||||
* 4. Visit each brand and extract products
|
||||
*/
|
||||
|
||||
import puppeteer, { Page } from 'puppeteer';
|
||||
|
||||
const STORE_ID = 'best';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function bypassAgeGate(page: Page): Promise<void> {
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
console.log('[AgeGate] Detected, bypassing...');
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
}
|
||||
|
||||
async function loadAllBrands(page: Page): Promise<void> {
|
||||
console.log('[Brands] Looking for "load more" option...');
|
||||
|
||||
// Look for select/dropdown with "load more" or "all brands" option
|
||||
const selectInfo = await page.evaluate(() => {
|
||||
const selects = document.querySelectorAll('select');
|
||||
const info: { selector: string; options: string[] }[] = [];
|
||||
|
||||
selects.forEach((sel, i) => {
|
||||
const options = Array.from(sel.options).map(o => o.text);
|
||||
info.push({ selector: `select:nth-of-type(${i + 1})`, options });
|
||||
});
|
||||
|
||||
return info;
|
||||
});
|
||||
|
||||
console.log('[Brands] Found selects:', JSON.stringify(selectInfo, null, 2));
|
||||
|
||||
// Look for any button or link with "load more" or "show all"
|
||||
const loadMoreButtons = await page.evaluate(() => {
|
||||
const elements = document.querySelectorAll('button, a, [role="button"]');
|
||||
const matches: { text: string; tag: string }[] = [];
|
||||
|
||||
elements.forEach(el => {
|
||||
const text = el.textContent?.toLowerCase() || '';
|
||||
if (text.includes('load more') || text.includes('show all') || text.includes('view all')) {
|
||||
matches.push({ text: el.textContent?.trim() || '', tag: el.tagName });
|
||||
}
|
||||
});
|
||||
|
||||
return matches;
|
||||
});
|
||||
|
||||
console.log('[Brands] Found load more buttons:', loadMoreButtons);
|
||||
|
||||
// Try to find and interact with the brands dropdown
|
||||
// First, let's see all interactive elements with "brand" in them
|
||||
const brandElements = await page.evaluate(() => {
|
||||
const all = document.querySelectorAll('*');
|
||||
const matches: { tag: string; class: string; text: string }[] = [];
|
||||
|
||||
all.forEach(el => {
|
||||
const className = el.className?.toString?.() || '';
|
||||
const text = el.textContent?.trim().slice(0, 100) || '';
|
||||
if (className.toLowerCase().includes('brand') || className.toLowerCase().includes('select')) {
|
||||
matches.push({
|
||||
tag: el.tagName,
|
||||
class: className.slice(0, 100),
|
||||
text: text.slice(0, 50),
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return matches.slice(0, 20);
|
||||
});
|
||||
|
||||
console.log('[Brands] Brand-related elements:', JSON.stringify(brandElements.slice(0, 10), null, 2));
|
||||
}
|
||||
|
||||
async function extractBrandLinks(page: Page): Promise<{ name: string; url: string }[]> {
|
||||
const brands = await page.evaluate(() => {
|
||||
const links: { name: string; url: string }[] = [];
|
||||
|
||||
// Look for brand cards/links
|
||||
const selectors = [
|
||||
'a[href*="/brand/"]',
|
||||
'a[href*="/brands/"]',
|
||||
'[class*="brand"] a',
|
||||
'[class*="Brand"] a',
|
||||
];
|
||||
|
||||
selectors.forEach(sel => {
|
||||
document.querySelectorAll(sel).forEach(el => {
|
||||
const href = el.getAttribute('href');
|
||||
const name = el.textContent?.trim() || '';
|
||||
if (href && name && !links.some(l => l.url === href)) {
|
||||
links.push({ name, url: href });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return links;
|
||||
});
|
||||
|
||||
return brands;
|
||||
}
|
||||
|
||||
async function extractProductsFromBrandPage(page: Page): Promise<any[]> {
|
||||
// Scroll to load all products
|
||||
let previousHeight = 0;
|
||||
let scrollCount = 0;
|
||||
let sameHeightCount = 0;
|
||||
|
||||
while (scrollCount < 20) {
|
||||
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||
|
||||
if (currentHeight === previousHeight) {
|
||||
sameHeightCount++;
|
||||
if (sameHeightCount >= 3) break;
|
||||
} else {
|
||||
sameHeightCount = 0;
|
||||
}
|
||||
|
||||
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||
await sleep(1000);
|
||||
|
||||
previousHeight = currentHeight;
|
||||
scrollCount++;
|
||||
}
|
||||
|
||||
// Extract products
|
||||
const products = await page.evaluate(() => {
|
||||
const results: any[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
document.querySelectorAll('[class*="product_product__"]').forEach(el => {
|
||||
const nameEl = el.querySelector('[class*="product__name"], [class*="name__"]');
|
||||
const name = nameEl?.textContent?.trim() || '';
|
||||
|
||||
if (!name || seen.has(name)) return;
|
||||
seen.add(name);
|
||||
|
||||
const priceEl = el.querySelector('[class*="price"]');
|
||||
const priceText = priceEl?.textContent || '';
|
||||
const priceMatch = priceText.match(/\$(\d+(?:\.\d{2})?)/);
|
||||
const price = priceMatch ? parseFloat(priceMatch[1]) : null;
|
||||
|
||||
const linkEl = el.querySelector('a[href*="/product/"]');
|
||||
let productId = '';
|
||||
if (linkEl) {
|
||||
const href = linkEl.getAttribute('href') || '';
|
||||
const match = href.match(/\/product\/([^\/?]+)/);
|
||||
productId = match ? match[1] : '';
|
||||
}
|
||||
|
||||
results.push({
|
||||
productId: productId || `treez_${name.replace(/\s+/g, '_').toLowerCase().slice(0, 30)}`,
|
||||
name,
|
||||
price,
|
||||
});
|
||||
});
|
||||
|
||||
return results;
|
||||
});
|
||||
|
||||
return products;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('Testing Treez Brand-Based Extraction');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
// Block images
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
// Navigate to brands page
|
||||
const brandsUrl = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
|
||||
console.log(`\n[1] Navigating to ${brandsUrl}`);
|
||||
await page.goto(brandsUrl, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await sleep(2000);
|
||||
await bypassAgeGate(page);
|
||||
await sleep(1000);
|
||||
|
||||
// Screenshot to see what we're working with
|
||||
await page.screenshot({ path: '/tmp/treez-brands-page.png', fullPage: false });
|
||||
console.log('[1] Screenshot saved to /tmp/treez-brands-page.png');
|
||||
|
||||
// Try to load all brands
|
||||
console.log('\n[2] Exploring brand selection options...');
|
||||
await loadAllBrands(page);
|
||||
|
||||
// Extract brand links
|
||||
console.log('\n[3] Extracting brand links...');
|
||||
const brandLinks = await extractBrandLinks(page);
|
||||
console.log(`Found ${brandLinks.length} brand links:`);
|
||||
brandLinks.slice(0, 10).forEach(b => console.log(` - ${b.name}: ${b.url}`));
|
||||
|
||||
// If we found brand links, visit a couple to test
|
||||
if (brandLinks.length > 0) {
|
||||
console.log('\n[4] Testing product extraction from first 3 brands...');
|
||||
|
||||
let totalProducts = 0;
|
||||
const allProducts: any[] = [];
|
||||
|
||||
for (const brand of brandLinks.slice(0, 3)) {
|
||||
const brandUrl = brand.url.startsWith('http')
|
||||
? brand.url
|
||||
: `https://${STORE_ID}.treez.io${brand.url}`;
|
||||
|
||||
console.log(`\n Visiting brand: ${brand.name}`);
|
||||
console.log(` URL: ${brandUrl}`);
|
||||
|
||||
await page.goto(brandUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||
await sleep(2000);
|
||||
|
||||
const products = await extractProductsFromBrandPage(page);
|
||||
console.log(` Products found: ${products.length}`);
|
||||
|
||||
allProducts.push(...products.map(p => ({ ...p, brand: brand.name })));
|
||||
totalProducts += products.length;
|
||||
}
|
||||
|
||||
console.log(`\n[5] Summary from 3 brands: ${totalProducts} products`);
|
||||
console.log(`Estimated total (${brandLinks.length} brands): ~${Math.round(totalProducts / 3 * brandLinks.length)} products`);
|
||||
}
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('Error:', error.message);
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
Reference in New Issue
Block a user