fix(preflight): Apply stored fingerprint to task browser

- Add WorkerFingerprint interface with timezone, city, state, ip, locale
- Store fingerprint in TaskWorker after preflight passes
- Pass fingerprint through TaskContext to handlers
- Apply timezone via CDP and locale via Accept-Language header
- Ensures browser fingerprint matches proxy IP location

This fixes anti-detect detection where timezone/locale mismatch
with proxy IP was getting blocked by Cloudflare.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-13 16:40:52 -07:00
parent 5ea92e25af
commit 023cfc127f
22 changed files with 3083 additions and 5 deletions

View File

@@ -0,0 +1,130 @@
/**
* Count Jane stores - v2: Try Algolia store search
* Usage: npx ts-node scripts/count-jane-stores-v2.ts
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
const STATES = [
'AZ', 'CA', 'CO', 'FL', 'IL', 'MA', 'MI', 'NV', 'NJ', 'NY', 'OH', 'PA', 'WA', 'OR'
];
async function main() {
console.log('Counting Jane stores by exploring state pages...\n');
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
const allStores: Map<number, any> = new Map();
await page.setRequestInterception(true);
page.on('request', (req) => {
const type = req.resourceType();
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
req.abort();
} else {
req.continue();
}
});
page.on('response', async (response) => {
const url = response.url();
const contentType = response.headers()['content-type'] || '';
if (url.includes('iheartjane.com') && contentType.includes('json')) {
try {
const json = await response.json();
// Look for stores in any response
if (json.stores && Array.isArray(json.stores)) {
for (const s of json.stores) {
if (s.id) allStores.set(s.id, s);
}
}
// Also check hits (Algolia format)
if (json.hits && Array.isArray(json.hits)) {
for (const s of json.hits) {
if (s.id) allStores.set(s.id, s);
}
}
} catch {}
}
});
// First visit the main stores page
console.log('Visiting main stores page...');
await page.goto('https://www.iheartjane.com/stores', {
waitUntil: 'networkidle0',
timeout: 60000,
});
await new Promise(r => setTimeout(r, 3000));
// Try to scroll to load more stores
console.log('Scrolling to load more...');
for (let i = 0; i < 5; i++) {
await page.evaluate(() => window.scrollBy(0, 1000));
await new Promise(r => setTimeout(r, 1000));
}
// Try clicking "Load More" if it exists
try {
const loadMore = await page.$('button:has-text("Load More"), [class*="load-more"]');
if (loadMore) {
console.log('Clicking Load More...');
await loadMore.click();
await new Promise(r => setTimeout(r, 3000));
}
} catch {}
// Extract stores from DOM as fallback
const domStores = await page.evaluate(() => {
const storeElements = document.querySelectorAll('[data-store-id], [class*="StoreCard"], [class*="store-card"]');
return storeElements.length;
});
console.log(`\nStores from DOM elements: ${domStores}`);
await browser.close();
// Count by state
const byState: Record<string, number> = {};
for (const store of allStores.values()) {
const state = store.state || 'Unknown';
byState[state] = (byState[state] || 0) + 1;
}
console.log('\n=== JANE STORE COUNTS ===\n');
console.log(`Unique stores captured: ${allStores.size}`);
if (allStores.size > 0) {
console.log('\nBy State:');
const sorted = Object.entries(byState).sort((a, b) => b[1] - a[1]);
for (const [state, count] of sorted.slice(0, 20)) {
console.log(` ${state}: ${count}`);
}
// Check Arizona specifically
const azStores = Array.from(allStores.values()).filter(s =>
s.state === 'Arizona' || s.state === 'AZ'
);
console.log(`\nArizona stores: ${azStores.length}`);
if (azStores.length > 0) {
console.log('AZ stores:');
for (const s of azStores.slice(0, 10)) {
console.log(` - ${s.name} (ID: ${s.id}) - ${s.city}`);
}
}
}
// Note about total
console.log('\n--- Note ---');
console.log('Jane uses server-side rendering. To get full store count,');
console.log('you may need to check their public marketing materials or');
console.log('iterate through known store IDs.');
}
main().catch(console.error);

View File

@@ -0,0 +1,98 @@
/**
* Count Jane stores by state
* Usage: npx ts-node scripts/count-jane-stores.ts
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
async function main() {
console.log('Counting Jane stores...\n');
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
// Capture store data from API
const stores: any[] = [];
await page.setRequestInterception(true);
page.on('request', (req) => {
const type = req.resourceType();
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
req.abort();
} else {
req.continue();
}
});
page.on('response', async (response) => {
const url = response.url();
if (url.includes('iheartjane.com') && url.includes('stores')) {
try {
const json = await response.json();
if (json.stores && Array.isArray(json.stores)) {
stores.push(...json.stores);
}
} catch {}
}
});
// Visit the store directory
console.log('Loading Jane store directory...');
await page.goto('https://www.iheartjane.com/stores', {
waitUntil: 'networkidle2',
timeout: 60000,
});
// Wait for stores to load
await new Promise(r => setTimeout(r, 5000));
// Also try to get store count from page content
const pageStoreCount = await page.evaluate(() => {
// Look for store count in page text
const text = document.body.innerText;
const match = text.match(/(\d+)\s*stores?/i);
return match ? parseInt(match[1]) : null;
});
await browser.close();
// Count by state
const byState: Record<string, number> = {};
for (const store of stores) {
const state = store.state || 'Unknown';
byState[state] = (byState[state] || 0) + 1;
}
console.log('\n=== JANE STORE COUNTS ===\n');
console.log(`Total stores captured from API: ${stores.length}`);
if (pageStoreCount) {
console.log(`Page claims: ${pageStoreCount} stores`);
}
console.log('\nBy State:');
const sorted = Object.entries(byState).sort((a, b) => b[1] - a[1]);
for (const [state, count] of sorted) {
console.log(` ${state}: ${count}`);
}
// Check Arizona specifically
const azStores = stores.filter(s =>
s.state === 'Arizona' || s.state === 'AZ'
);
console.log(`\nArizona stores: ${azStores.length}`);
if (azStores.length > 0) {
console.log('Sample AZ stores:');
for (const s of azStores.slice(0, 5)) {
console.log(` - ${s.name} (ID: ${s.id}) - ${s.city}`);
}
}
}
main().catch(console.error);

View File

@@ -0,0 +1,247 @@
/**
* Explore Treez site structure to find full product catalog
*
* Usage: npx ts-node scripts/explore-treez-structure.ts
*/
import puppeteer from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
console.log('='.repeat(60));
console.log('Exploring Treez Site Structure');
console.log('='.repeat(60));
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
try {
// Navigate to base menu URL
const baseUrl = `https://${STORE_ID}.treez.io/onlinemenu/?customerType=ADULT`;
console.log(`\n[1] Navigating to: ${baseUrl}`);
await page.goto(baseUrl, { waitUntil: 'networkidle2', timeout: 60000 });
await sleep(3000);
// Bypass age gate if present
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log('[1] Age gate detected, bypassing...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
// Get all navigation links
console.log('\n[2] Extracting navigation structure...');
const navInfo = await page.evaluate(() => {
const links: { text: string; href: string }[] = [];
// Look for nav links
document.querySelectorAll('nav a, [class*="nav"] a, [class*="menu"] a, header a').forEach(el => {
const text = el.textContent?.trim() || '';
const href = el.getAttribute('href') || '';
if (text && href && !links.some(l => l.href === href)) {
links.push({ text, href });
}
});
// Look for category tabs/buttons
document.querySelectorAll('[class*="category"], [class*="tab"], [role="tab"]').forEach(el => {
const text = el.textContent?.trim() || '';
const href = el.getAttribute('href') || el.getAttribute('data-href') || '';
if (text && !links.some(l => l.text === text)) {
links.push({ text, href: href || `(click: ${el.className})` });
}
});
// Get current URL
const currentUrl = window.location.href;
// Count products on page
const productCount = document.querySelectorAll('[class*="product_product__"]').length;
return { links, currentUrl, productCount };
});
console.log(`Current URL: ${navInfo.currentUrl}`);
console.log(`Products on homepage: ${navInfo.productCount}`);
console.log('\nNavigation links found:');
navInfo.links.forEach(l => {
console.log(` "${l.text}" → ${l.href}`);
});
// Look for category buttons/tabs specifically
console.log('\n[3] Looking for category navigation...');
const categories = await page.evaluate(() => {
const cats: { text: string; className: string; tagName: string }[] = [];
// Find all clickable elements that might be categories
const selectors = [
'[class*="CategoryNav"]',
'[class*="category"]',
'[class*="Category"]',
'[class*="nav"] button',
'[class*="tab"]',
'[role="tablist"] *',
'.MuiTab-root',
'[class*="filter"]',
];
selectors.forEach(sel => {
document.querySelectorAll(sel).forEach(el => {
const text = el.textContent?.trim() || '';
if (text && text.length < 50 && !cats.some(c => c.text === text)) {
cats.push({
text,
className: el.className?.toString().slice(0, 80) || '',
tagName: el.tagName,
});
}
});
});
return cats;
});
console.log('Category-like elements:');
categories.forEach(c => {
console.log(` [${c.tagName}] "${c.text}" (class: ${c.className})`);
});
// Try clicking on "Flower" or "All" if found
console.log('\n[4] Looking for "Flower" or "All Products" link...');
const clickTargets = ['Flower', 'All', 'All Products', 'Shop All', 'View All'];
for (const target of clickTargets) {
const element = await page.evaluate((targetText) => {
const els = Array.from(document.querySelectorAll('a, button, [role="tab"], [class*="category"]'));
const match = els.find(el =>
el.textContent?.trim().toLowerCase() === targetText.toLowerCase()
);
if (match) {
return {
found: true,
text: match.textContent?.trim(),
tag: match.tagName,
};
}
return { found: false };
}, target);
if (element.found) {
console.log(`Found "${element.text}" (${element.tag}), clicking...`);
await page.evaluate((targetText) => {
const els = Array.from(document.querySelectorAll('a, button, [role="tab"], [class*="category"]'));
const match = els.find(el =>
el.textContent?.trim().toLowerCase() === targetText.toLowerCase()
);
if (match) (match as HTMLElement).click();
}, target);
await sleep(3000);
const newUrl = page.url();
const newCount = await page.evaluate(() =>
document.querySelectorAll('[class*="product_product__"]').length
);
console.log(` New URL: ${newUrl}`);
console.log(` Products after click: ${newCount}`);
if (newCount > navInfo.productCount) {
console.log(` ✓ Found more products! (${navInfo.productCount}${newCount})`);
}
break;
}
}
// Check page height and scroll behavior
console.log('\n[5] Checking scroll behavior on current page...');
let previousHeight = 0;
let scrollCount = 0;
let previousProductCount = await page.evaluate(() =>
document.querySelectorAll('[class*="product_product__"]').length
);
while (scrollCount < 10) {
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) {
console.log(` Scroll ${scrollCount + 1}: No height change, stopping`);
break;
}
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1500);
const currentProductCount = await page.evaluate(() =>
document.querySelectorAll('[class*="product_product__"]').length
);
console.log(` Scroll ${scrollCount + 1}: height=${currentHeight}, products=${currentProductCount}`);
if (currentProductCount === previousProductCount && scrollCount > 2) {
console.log(' No new products loading, stopping');
break;
}
previousHeight = currentHeight;
previousProductCount = currentProductCount;
scrollCount++;
}
// Try direct URL patterns
console.log('\n[6] Testing URL patterns...');
const urlPatterns = [
'/onlinemenu/flower?customerType=ADULT',
'/onlinemenu/all?customerType=ADULT',
'/onlinemenu?category=flower&customerType=ADULT',
'/onlinemenu?view=all&customerType=ADULT',
];
for (const pattern of urlPatterns) {
const testUrl = `https://${STORE_ID}.treez.io${pattern}`;
console.log(`\nTrying: ${testUrl}`);
await page.goto(testUrl, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(2000);
// Bypass age gate again if needed
const gate = await page.$('[data-testid="age-gate-modal"]');
if (gate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
const productCount = await page.evaluate(() =>
document.querySelectorAll('[class*="product_product__"]').length
);
console.log(` Products found: ${productCount}`);
}
// Screenshot the final state
await page.screenshot({ path: '/tmp/treez-explore.png', fullPage: true });
console.log('\n[7] Screenshot saved to /tmp/treez-explore.png');
} catch (error: any) {
console.error('Error:', error.message);
} finally {
await browser.close();
}
}
main().catch(console.error);

View File

@@ -0,0 +1,188 @@
/**
* One-off script to test iHeartJane scraping
* Mimics remote worker: Puppeteer + stealth + proxy
*
* Usage: npx ts-node scripts/test-iheartjane.ts
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
const TARGET_URL = 'https://theflowershopusa.com/mesa/menu/';
const STORE_ID = 2788;
async function main() {
console.log('[iHeartJane Test] Starting...');
// No proxy for local testing
const browser = await puppeteer.launch({
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-blink-features=AutomationControlled',
],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Intercept network requests to capture API calls
const apiResponses: any[] = [];
await page.setRequestInterception(true);
page.on('request', (req) => {
// Block heavy resources
const type = req.resourceType();
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
req.abort();
} else {
req.continue();
}
});
page.on('response', async (response) => {
const url = response.url();
const contentType = response.headers()['content-type'] || '';
// Capture any JSON response from iheartjane domains
if ((url.includes('iheartjane.com') || url.includes('algolia')) && contentType.includes('json')) {
try {
const json = await response.json();
const type = url.includes('store') ? 'STORE' :
url.includes('product') ? 'PRODUCT' :
url.includes('algolia') ? 'ALGOLIA' : 'API';
apiResponses.push({ type, url, data: json });
console.log(`[${type}] ${url.substring(0, 120)}...`);
} catch {
// Not JSON
}
}
});
console.log(`[iHeartJane Test] Navigating to ${TARGET_URL}`);
try {
await page.goto(TARGET_URL, {
waitUntil: 'networkidle2',
timeout: 60000,
});
console.log('[iHeartJane Test] Menu page loaded, waiting for data...');
// Wait a bit for all API calls to complete
await new Promise(r => setTimeout(r, 3000));
// Also try to get store info by visiting the store page
console.log('[iHeartJane Test] Fetching store info...');
const storeInfoUrl = `https://api.iheartjane.com/v1/stores/${STORE_ID}`;
// Try to fetch store info via page.evaluate (uses browser context)
const storeInfo = await page.evaluate(async (storeId) => {
try {
const resp = await fetch(`https://api.iheartjane.com/v1/stores/${storeId}`);
if (resp.ok) return await resp.json();
return { error: resp.status };
} catch (e: any) {
return { error: e.message };
}
}, STORE_ID);
if (storeInfo && !storeInfo.error) {
apiResponses.push({ type: 'STORE_DIRECT', url: storeInfoUrl, data: storeInfo });
console.log('[STORE_DIRECT] Got store info via fetch');
} else {
console.log(`[STORE_DIRECT] Failed: ${JSON.stringify(storeInfo)}`);
}
console.log('[iHeartJane Test] Processing results...');
// Wait for products to load
await page.waitForSelector('[data-testid="product-card"], .product-card, [class*="ProductCard"]', {
timeout: 30000,
}).catch(() => console.log('[iHeartJane Test] No product cards found via selector'));
// Try to extract product data from the page
const products = await page.evaluate(() => {
// Look for product data in various places
const results: any[] = [];
// Method 1: Look for __INITIAL_STATE__ or similar
const scripts = Array.from(document.querySelectorAll('script'));
for (const script of scripts) {
const text = script.textContent || '';
if (text.includes('products') && text.includes('price')) {
try {
// Try to find JSON object
const match = text.match(/\{[\s\S]*"products"[\s\S]*\}/);
if (match) {
results.push({ source: 'script', data: match[0].substring(0, 500) });
}
} catch {}
}
}
// Method 2: Look for product elements in DOM
const productElements = document.querySelectorAll('[data-testid="product-card"], .product-card, [class*="product"]');
for (const el of Array.from(productElements).slice(0, 5)) {
const name = el.querySelector('[class*="name"], h3, h4')?.textContent;
const price = el.querySelector('[class*="price"]')?.textContent;
if (name) {
results.push({ source: 'dom', name, price });
}
}
return results;
});
console.log('\n[iHeartJane Test] === RESULTS ===');
console.log(`Total API responses captured: ${apiResponses.length}`);
// Group by type
const byType: Record<string, any[]> = {};
for (const r of apiResponses) {
byType[r.type] = byType[r.type] || [];
byType[r.type].push(r);
}
for (const [type, items] of Object.entries(byType)) {
console.log(`\n--- ${type} (${items.length} responses) ---`);
for (const item of items) {
console.log(`URL: ${item.url}`);
// Show structure
if (item.data.hits) {
console.log(` Products: ${item.data.hits.length} hits`);
if (item.data.hits[0]) {
console.log(` Fields: ${Object.keys(item.data.hits[0]).join(', ')}`);
}
} else if (item.data.store) {
console.log(` Store: ${JSON.stringify(item.data.store, null, 2).substring(0, 1000)}`);
} else {
console.log(` Keys: ${Object.keys(item.data).join(', ')}`);
}
}
}
// Write full data to file
const fs = await import('fs');
fs.writeFileSync('/tmp/iheartjane-data.json', JSON.stringify(apiResponses, null, 2));
console.log('\n[iHeartJane Test] Full data saved to /tmp/iheartjane-data.json');
// Take screenshot
await page.screenshot({ path: '/tmp/iheartjane-test.png', fullPage: false });
console.log('[iHeartJane Test] Screenshot saved to /tmp/iheartjane-test.png');
} catch (error: any) {
console.error('[iHeartJane Test] Error:', error.message);
await page.screenshot({ path: '/tmp/iheartjane-error.png' });
} finally {
await browser.close();
}
console.log('[iHeartJane Test] Done');
}
main().catch(console.error);

View File

@@ -0,0 +1,224 @@
/**
* Explore Jane API to understand data structure
* Usage: npx ts-node scripts/test-jane-api-explore.ts
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
async function main() {
console.log('Exploring Jane API from browser context...\n');
const browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
// Intercept network requests to find store data API calls
const capturedResponses: Array<{ url: string; data: any }> = [];
await page.setRequestInterception(true);
page.on('request', (req) => req.continue());
page.on('response', async (response) => {
const url = response.url();
if (url.includes('iheartjane.com') &&
(url.includes('/stores') || url.includes('/search') || url.includes('algolia'))) {
try {
const text = await response.text();
if (text.startsWith('{') || text.startsWith('[')) {
const data = JSON.parse(text);
capturedResponses.push({ url, data });
console.log(`Captured: ${url.substring(0, 100)}...`);
}
} catch {
// Not JSON
}
}
});
// Visit Jane to establish session
console.log('Visiting Jane stores page to capture network requests...');
await page.goto('https://www.iheartjane.com/stores', {
waitUntil: 'networkidle2',
timeout: 60000,
});
console.log(`\nCaptured ${capturedResponses.length} API responses`);
for (const resp of capturedResponses) {
console.log(`\n--- ${resp.url.substring(0, 80)} ---`);
const keys = Object.keys(resp.data);
console.log('Keys:', keys);
// Check for stores array
if (resp.data.stores && Array.isArray(resp.data.stores)) {
console.log(`Stores count: ${resp.data.stores.length}`);
const firstStore = resp.data.stores[0];
if (firstStore) {
console.log('First store keys:', Object.keys(firstStore));
console.log('Sample:', JSON.stringify(firstStore, null, 2).substring(0, 500));
}
}
// Check for hits (Algolia)
if (resp.data.hits && Array.isArray(resp.data.hits)) {
console.log(`Hits count: ${resp.data.hits.length}`);
const firstHit = resp.data.hits[0];
if (firstHit) {
console.log('First hit keys:', Object.keys(firstHit));
}
}
}
// Look for __NEXT_DATA__ or similar embedded data
console.log('\n--- Checking for embedded page data ---');
const pageData = await page.evaluate(() => {
// Check for Next.js data
const nextData = (window as any).__NEXT_DATA__;
if (nextData?.props?.pageProps?.stores) {
return {
source: '__NEXT_DATA__',
storeCount: nextData.props.pageProps.stores.length,
firstStore: nextData.props.pageProps.stores[0],
};
}
// Check for any global store data
const win = window as any;
if (win.stores) return { source: 'window.stores', data: win.stores };
if (win.__stores) return { source: 'window.__stores', data: win.__stores };
return null;
});
if (pageData) {
console.log('Found embedded data:', pageData.source);
console.log('Store count:', pageData.storeCount);
if (pageData.firstStore) {
console.log('First store keys:', Object.keys(pageData.firstStore));
console.log('Sample:', JSON.stringify({
id: pageData.firstStore.id,
name: pageData.firstStore.name,
city: pageData.firstStore.city,
state: pageData.firstStore.state,
}, null, 2));
}
} else {
console.log('No embedded page data found');
}
// Try alternative API endpoints from browser context
console.log('\n--- Testing alternative API endpoints ---');
// Try the map endpoint
const mapData = await page.evaluate(async () => {
try {
const res = await fetch('https://api.iheartjane.com/v1/stores/map?per_page=100');
if (res.ok) return await res.json();
} catch {}
return null;
});
if (mapData) {
console.log('\n/v1/stores/map response:');
console.log('Keys:', Object.keys(mapData));
if (mapData.stores?.[0]) {
console.log('First store keys:', Object.keys(mapData.stores[0]));
}
}
// Try index endpoint
const indexData = await page.evaluate(async () => {
try {
const res = await fetch('https://api.iheartjane.com/v1/stores/index?per_page=10');
if (res.ok) return await res.json();
} catch {}
return null;
});
if (indexData) {
console.log('\n/v1/stores/index response:');
console.log('Keys:', Object.keys(indexData));
if (indexData.stores?.[0]) {
console.log('First store keys:', Object.keys(indexData.stores[0]));
}
}
// Try with state parameter
const stateData = await page.evaluate(async () => {
try {
const res = await fetch('https://api.iheartjane.com/v1/stores?state=AZ&per_page=10');
if (res.ok) return await res.json();
} catch {}
return null;
});
if (stateData) {
console.log('\n/v1/stores?state=AZ response:');
console.log('Keys:', Object.keys(stateData));
console.log('Stores count:', stateData.stores?.length);
if (stateData.stores?.[0]) {
console.log('First store keys:', Object.keys(stateData.stores[0]));
console.log('Sample:', JSON.stringify(stateData.stores[0], null, 2).substring(0, 300));
}
}
// Try Algolia directly for stores
console.log('\n--- Testing Algolia for stores ---');
const algoliaStores = await page.evaluate(async () => {
try {
// Common Algolia search pattern
const res = await fetch('https://search.iheartjane.com/1/indexes/stores-production/query', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-Algolia-Application-Id': 'HKXSXRD7RA',
'X-Algolia-API-Key': 'YjZhYjQxZjU4ZTNjMTRhYzExZTk2YjU2MzliMGE4ZTE5YjJkMmZkZTI2ODllYTY2MThlMzQ3Y2QxOTFkMjI5Y3RhZ0ZpbHRlcnM9',
},
body: JSON.stringify({
query: 'Arizona',
hitsPerPage: 20,
}),
});
if (res.ok) return await res.json();
} catch {}
return null;
});
if (algoliaStores) {
console.log('Algolia stores-production response:');
console.log('Keys:', Object.keys(algoliaStores));
console.log('Hits count:', algoliaStores.hits?.length);
if (algoliaStores.hits?.[0]) {
console.log('First hit keys:', Object.keys(algoliaStores.hits[0]));
console.log('Sample:', JSON.stringify(algoliaStores.hits[0], null, 2).substring(0, 500));
}
}
// Check if there's a /v2 endpoint
const v2Data = await page.evaluate(async () => {
try {
const res = await fetch('https://api.iheartjane.com/v2/stores?per_page=10');
if (res.ok) return await res.json();
} catch {}
return null;
});
if (v2Data) {
console.log('\n/v2/stores response:');
console.log('Keys:', Object.keys(v2Data));
if (v2Data.stores?.[0]) {
console.log('First store keys:', Object.keys(v2Data.stores[0]));
}
}
await browser.close();
console.log('\nDone!');
}
main().catch(console.error);

View File

@@ -0,0 +1,126 @@
/**
* Test script for Jane platform client
* Tests the new Jane integration with The Flower Shop Mesa
*
* Usage: npx ts-node scripts/test-jane-client.ts
*/
import {
startSession,
endSession,
fetchProductsFromUrl,
resolveStoreFromUrl,
} from '../src/platforms/jane';
import { JaneNormalizer } from '../src/hydration/normalizers/jane';
const TEST_URL = 'https://theflowershopusa.com/mesa/menu/';
async function main() {
console.log('='.repeat(60));
console.log('Jane Platform Client Test');
console.log('='.repeat(60));
console.log(`Test URL: ${TEST_URL}`);
console.log('');
try {
// Test 1: Fetch products from URL
console.log('[Test 1] Fetching products from menu URL...');
const result = await fetchProductsFromUrl(TEST_URL);
console.log('');
console.log('[Results]');
console.log(` Store: ${result.store?.name || 'Not captured'}`);
console.log(` Store ID: ${result.store?.id || 'N/A'}`);
console.log(` Products captured: ${result.products.length}`);
console.log(` API responses: ${result.responses.length}`);
if (result.store) {
console.log('');
console.log('[Store Info]');
console.log(` Address: ${result.store.address}, ${result.store.city}, ${result.store.state} ${result.store.zip}`);
console.log(` Phone: ${result.store.phone}`);
console.log(` Coordinates: ${result.store.lat}, ${result.store.long}`);
console.log(` Medical: ${result.store.medical}, Recreational: ${result.store.recreational}`);
console.log(` Rating: ${result.store.rating} (${result.store.reviews_count} reviews)`);
console.log(` Product count (store): ${result.store.product_count}`);
}
if (result.products.length > 0) {
console.log('');
console.log('[Sample Products (first 5)]');
for (const p of result.products.slice(0, 5)) {
const price = p.price_gram || p.price_each || 'N/A';
console.log(` - ${p.name} (${p.brand}) - $${price}`);
console.log(` Kind: ${p.kind}, Category: ${p.category}, THC: ${p.percent_thc}%`);
}
// Test 2: Normalize products
console.log('');
console.log('[Test 2] Testing normalizer...');
const normalizer = new JaneNormalizer();
// Build a fake payload structure
const fakePayload = {
id: 'test-payload',
dispensary_id: 9999,
crawl_run_id: null,
platform: 'jane',
payload_version: 1,
raw_json: { hits: result.products.map(p => p.raw) },
product_count: result.products.length,
pricing_type: null,
crawl_mode: null,
fetched_at: new Date(),
processed: false,
normalized_at: null,
hydration_error: null,
hydration_attempts: 0,
created_at: new Date(),
};
const normalized = normalizer.normalize(fakePayload);
console.log(` Products normalized: ${normalized.products.length}`);
console.log(` Brands extracted: ${normalized.brands.length}`);
console.log(` Categories extracted: ${normalized.categories.length}`);
console.log(` Errors: ${normalized.errors.length}`);
if (normalized.products.length > 0) {
console.log('');
console.log('[Sample Normalized Product]');
const np = normalized.products[0];
console.log(` External ID: ${np.externalProductId}`);
console.log(` Name: ${np.name}`);
console.log(` Brand: ${np.brandName}`);
console.log(` Category: ${np.category}`);
console.log(` Type: ${np.type}`);
console.log(` Strain: ${np.strainType}`);
console.log(` THC: ${np.thcPercent}%`);
console.log(` CBD: ${np.cbdPercent}%`);
console.log(` Image: ${np.primaryImageUrl?.slice(0, 60)}...`);
const pricing = normalized.pricing.get(np.externalProductId);
if (pricing) {
console.log(` Price (cents): ${pricing.priceRec}`);
console.log(` On Special: ${pricing.isOnSpecial}`);
}
}
}
console.log('');
console.log('='.repeat(60));
console.log('TEST PASSED');
console.log('='.repeat(60));
} catch (error: any) {
console.error('');
console.error('='.repeat(60));
console.error('TEST FAILED');
console.error('='.repeat(60));
console.error(`Error: ${error.message}`);
console.error(error.stack);
process.exit(1);
}
}
main().catch(console.error);

View File

@@ -0,0 +1,55 @@
/**
* Compare MED vs REC product menus for same location
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
async function main() {
const browser = await puppeteer.launch({ headless: 'new', args: ['--no-sandbox'] });
const page = await browser.newPage();
await page.goto('https://www.iheartjane.com/stores', { waitUntil: 'domcontentloaded' });
await new Promise(r => setTimeout(r, 2000));
// Fetch REC products (store 3379)
const recProducts: number[] = await page.evaluate(async () => {
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: '', hitsPerPage: 100, filters: 'store_id=3379' }),
});
const data = await res.json();
return data.hits?.map((h: any) => h.product_id) || [];
});
// Fetch MED products (store 4540)
const medProducts: number[] = await page.evaluate(async () => {
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: '', hitsPerPage: 100, filters: 'store_id=4540' }),
});
const data = await res.json();
return data.hits?.map((h: any) => h.product_id) || [];
});
const recSet = new Set(recProducts);
const medSet = new Set(medProducts);
const recOnly = recProducts.filter(id => !medSet.has(id)).length;
const medOnly = medProducts.filter(id => !recSet.has(id)).length;
const shared = recProducts.filter(id => medSet.has(id)).length;
console.log('\nHana Phoenix - MED vs REC comparison (100 products each):');
console.log(' REC products fetched:', recProducts.length);
console.log(' MED products fetched:', medProducts.length);
console.log(' REC-only:', recOnly);
console.log(' MED-only:', medOnly);
console.log(' Shared:', shared);
console.log(' Menus are:', shared === 0 ? 'COMPLETELY DIFFERENT' : shared === recProducts.length ? 'IDENTICAL' : 'PARTIALLY OVERLAPPING');
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,79 @@
/**
* Find ALL differing fields between MED and REC product payloads
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
async function main() {
const browser = await puppeteer.launch({ headless: 'new', args: ['--no-sandbox'] });
const page = await browser.newPage();
await page.goto('https://www.iheartjane.com/stores', { waitUntil: 'domcontentloaded' });
await new Promise(r => setTimeout(r, 2000));
// Get full product payload from REC store
const recProduct = await page.evaluate(async () => {
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: '', hitsPerPage: 1, filters: 'store_id=3379' }),
});
const data = await res.json();
return data.hits?.[0];
});
const productId = recProduct?.product_id;
// Get same product from MED store
const medProduct = await page.evaluate(async (pid: number) => {
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: '', hitsPerPage: 100, filters: 'store_id=4540' }),
});
const data = await res.json();
return data.hits?.find((h: any) => h.product_id === pid);
}, productId);
console.log('Product:', recProduct?.name, '(ID:', productId, ')\n');
// Get all keys
const allKeys = new Set([...Object.keys(recProduct || {}), ...Object.keys(medProduct || {})]);
const sortedKeys = [...allKeys].sort();
console.log('=== ALL KEYS IN PAYLOAD ===');
console.log(sortedKeys.join(', '));
console.log('\n=== FIELDS THAT DIFFER ===');
let diffCount = 0;
for (const key of sortedKeys) {
const recVal = JSON.stringify(recProduct?.[key]);
const medVal = JSON.stringify(medProduct?.[key]);
if (recVal !== medVal) {
diffCount++;
console.log(`${key}:`);
console.log(` REC: ${recVal?.substring(0, 100)}`);
console.log(` MED: ${medVal?.substring(0, 100)}`);
}
}
if (diffCount === 0) {
console.log('(none - payloads are identical)');
}
// Check for limit/allowance related fields
console.log('\n=== LIMIT-RELATED FIELDS ===');
const limitFields = sortedKeys.filter(k =>
k.includes('limit') || k.includes('max') || k.includes('allow') ||
k.includes('quantity') || k.includes('cart') || k.includes('medical') ||
k.includes('rec') || k.includes('weight')
);
for (const key of limitFields) {
console.log(`${key}: REC=${JSON.stringify(recProduct?.[key])} | MED=${JSON.stringify(medProduct?.[key])}`);
}
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,35 @@
/**
* Test script to capture and save full Jane payload
* Usage: npx ts-node scripts/test-jane-payload.ts
*/
import * as fs from 'fs';
import { fetchProductsFromUrl } from '../src/platforms/jane';
const TEST_URL = 'https://theflowershopusa.com/mesa/menu/';
const OUTPUT_FILE = '/tmp/jane-test-payload.json';
async function main() {
console.log('Fetching Jane payload...');
const result = await fetchProductsFromUrl(TEST_URL);
// Build payload structure matching what would be saved
const payload = {
hits: result.products.map(p => p.raw),
store: result.store?.raw || null,
capturedAt: new Date().toISOString(),
platform: 'jane',
storeId: result.store?.id,
productCount: result.products.length,
responseCount: result.responses.length,
};
// Save to file
fs.writeFileSync(OUTPUT_FILE, JSON.stringify(payload, null, 2));
console.log(`\nPayload saved to: ${OUTPUT_FILE}`);
console.log(`Products: ${result.products.length}`);
console.log(`Size: ${Math.round(fs.statSync(OUTPUT_FILE).size / 1024)}KB`);
}
main().catch(console.error);

View File

@@ -0,0 +1,113 @@
/**
* Test script for Treez platform client
* Tests the new Treez integration with Best Dispensary
*
* Usage: npx ts-node scripts/test-treez-client.ts
*/
import {
fetchProductsByStoreId,
} from '../src/platforms/treez';
import { TreezNormalizer } from '../src/hydration/normalizers/treez';
const TEST_STORE_ID = 'best';
async function main() {
console.log('='.repeat(60));
console.log('Treez Platform Client Test');
console.log('='.repeat(60));
console.log(`Test Store: ${TEST_STORE_ID}`);
console.log('');
try {
// Test 1: Fetch products from store
console.log('[Test 1] Fetching products from Treez store...');
const result = await fetchProductsByStoreId(TEST_STORE_ID);
console.log('');
console.log('[Results]');
console.log(` Store: ${result.store.name}`);
console.log(` Store ID: ${result.store.storeId}`);
console.log(` Products captured: ${result.products.length}`);
console.log(` Scroll count: ${result.scrollCount}`);
if (result.products.length > 0) {
console.log('');
console.log('[Sample Products (first 5)]');
for (const p of result.products.slice(0, 5)) {
console.log(` - ${p.name}`);
console.log(` Brand: ${p.brand || 'N/A'}`);
console.log(` Category: ${p.category || 'N/A'} / ${p.subcategory || 'N/A'}`);
console.log(` Price: ${p.price ? '$' + p.price : 'N/A'}`);
console.log(` THC: ${p.thcPercent !== null ? p.thcPercent + '%' : 'N/A'}`);
}
// Test 2: Normalize products
console.log('');
console.log('[Test 2] Testing normalizer...');
const normalizer = new TreezNormalizer();
// Build a fake payload structure
const fakePayload = {
id: 'test-payload',
dispensary_id: 9999,
crawl_run_id: null,
platform: 'treez',
payload_version: 1,
raw_json: { products: result.products },
product_count: result.products.length,
pricing_type: null,
crawl_mode: null,
fetched_at: new Date(),
processed: false,
normalized_at: null,
hydration_error: null,
hydration_attempts: 0,
created_at: new Date(),
};
const normalized = normalizer.normalize(fakePayload);
console.log(` Products normalized: ${normalized.products.length}`);
console.log(` Brands extracted: ${normalized.brands.length}`);
console.log(` Categories extracted: ${normalized.categories.length}`);
console.log(` Errors: ${normalized.errors.length}`);
if (normalized.products.length > 0) {
console.log('');
console.log('[Sample Normalized Product]');
const np = normalized.products[0];
console.log(` External ID: ${np.externalProductId}`);
console.log(` Name: ${np.name}`);
console.log(` Brand: ${np.brandName}`);
console.log(` Category: ${np.category}`);
console.log(` Type: ${np.type}`);
console.log(` Strain: ${np.strainType}`);
console.log(` THC: ${np.thcPercent !== null ? np.thcPercent + '%' : 'N/A'}`);
console.log(` CBD: ${np.cbdPercent !== null ? np.cbdPercent + '%' : 'N/A'}`);
console.log(` Image: ${np.primaryImageUrl?.slice(0, 60) || 'N/A'}...`);
const pricing = normalized.pricing.get(np.externalProductId);
if (pricing) {
console.log(` Price (cents): ${pricing.priceRec}`);
}
}
}
console.log('');
console.log('='.repeat(60));
console.log('TEST PASSED');
console.log('='.repeat(60));
} catch (error: any) {
console.error('');
console.error('='.repeat(60));
console.error('TEST FAILED');
console.error('='.repeat(60));
console.error(`Error: ${error.message}`);
console.error(error.stack);
process.exit(1);
}
}
main().catch(console.error);

View File

@@ -0,0 +1,559 @@
/**
* Treez Platform Smoke Test
*
* Discovers DOM structure and extracts products from Treez menu pages.
* Used to determine actual CSS selectors for the platform client.
*
* Usage: npx ts-node scripts/test-treez-discovery.ts
*/
import puppeteer, { Page } from 'puppeteer';
import puppeteerExtra from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
// Register stealth plugin (even though Treez doesn't use Cloudflare, good practice)
puppeteerExtra.use(StealthPlugin());
const TEST_URL = 'https://best.treez.io/onlinemenu/?customerType=ADULT';
const STORE_ID = 'best';
interface TreezProductRaw {
productId: string;
name: string;
brand: string;
category: string;
subcategory: string;
thcPercent: number | null;
cbdPercent: number | null;
price: number | null;
priceUnit: string;
imageUrl: string | null;
inStock: boolean;
weight: string | null;
}
async function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* Scroll to load all products (infinite scroll)
*/
async function scrollToLoadAll(page: Page, maxScrolls = 30): Promise<number> {
let previousHeight = 0;
let scrollCount = 0;
let sameHeightCount = 0;
console.log('[Scroll] Starting infinite scroll...');
while (scrollCount < maxScrolls) {
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) {
sameHeightCount++;
if (sameHeightCount >= 3) {
console.log('[Scroll] No new content after 3 attempts, stopping');
break;
}
} else {
sameHeightCount = 0;
}
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1500); // Wait for products to load
previousHeight = currentHeight;
scrollCount++;
// Check how many products we have
const productCount = await page.evaluate(() => {
// Try multiple possible selectors
const selectors = [
'[class*="product"]',
'[class*="Product"]',
'[data-product]',
'.menu-item',
'[class*="card"]',
'[class*="Card"]',
];
for (const sel of selectors) {
const els = document.querySelectorAll(sel);
if (els.length > 10) return els.length;
}
return 0;
});
console.log(`[Scroll] Scroll ${scrollCount}: height=${currentHeight}, products~${productCount}`);
}
return scrollCount;
}
/**
* Analyze DOM structure to find product selectors
*/
async function analyzeDOM(page: Page): Promise<void> {
console.log('\n' + '='.repeat(60));
console.log('DOM STRUCTURE ANALYSIS');
console.log('='.repeat(60));
// Find elements with "product" in class name
const productClasses = await page.evaluate(() => {
const classes = new Set<string>();
document.querySelectorAll('*').forEach((el) => {
const className = el.className;
if (typeof className === 'string' && className.toLowerCase().includes('product')) {
className.split(' ').forEach((c) => {
if (c.toLowerCase().includes('product')) {
classes.add(c);
}
});
}
});
return Array.from(classes).slice(0, 20);
});
console.log('\n[Classes containing "product"]:');
productClasses.forEach((c: string) => console.log(` .${c}`));
// Find elements with "card" in class name
const cardClasses = await page.evaluate(() => {
const classes = new Set<string>();
document.querySelectorAll('*').forEach((el) => {
const className = el.className;
if (typeof className === 'string' && className.toLowerCase().includes('card')) {
className.split(' ').forEach((c) => {
if (c.toLowerCase().includes('card')) {
classes.add(c);
}
});
}
});
return Array.from(classes).slice(0, 20);
});
console.log('\n[Classes containing "card"]:');
cardClasses.forEach((c: string) => console.log(` .${c}`));
// Find data attributes
const dataAttrs = await page.evaluate(() => {
const attrs = new Set<string>();
document.querySelectorAll('*').forEach((el) => {
Array.from(el.attributes).forEach((attr) => {
if (attr.name.startsWith('data-') && !attr.name.includes('reactid')) {
attrs.add(attr.name);
}
});
});
return Array.from(attrs).slice(0, 30);
});
console.log('\n[Data attributes found]:');
dataAttrs.forEach((attr: string) => console.log(` ${attr}`));
// Get sample HTML of potential product container
const sampleHTML = await page.evaluate(() => {
// Try to find a product container
const selectors = [
'[class*="ProductCard"]',
'[class*="product-card"]',
'[class*="menuItem"]',
'[class*="menu-item"]',
'[data-testid*="product"]',
];
for (const sel of selectors) {
const el = document.querySelector(sel);
if (el) {
return {
selector: sel,
html: el.outerHTML.slice(0, 2000),
childCount: el.children.length,
};
}
}
// Fallback: find repeating structures
const containers = document.querySelectorAll('div[class]');
const classCounts = new Map<string, number>();
containers.forEach((el) => {
if (el.children.length > 2 && el.className) {
classCounts.set(el.className, (classCounts.get(el.className) || 0) + 1);
}
});
// Find class that appears many times (likely product cards)
let bestClass = '';
let bestCount = 0;
classCounts.forEach((count, className) => {
if (count > bestCount && count > 5) {
bestCount = count;
bestClass = className;
}
});
if (bestClass) {
const el = document.querySelector(`.${bestClass.split(' ')[0]}`);
if (el) {
return {
selector: `.${bestClass.split(' ')[0]}`,
html: el.outerHTML.slice(0, 2000),
childCount: el.children.length,
count: bestCount,
};
}
}
return null;
});
if (sampleHTML) {
console.log('\n[Sample Product Container]:');
console.log(` Selector: ${sampleHTML.selector}`);
console.log(` Children: ${sampleHTML.childCount}`);
if ((sampleHTML as any).count) {
console.log(` Occurrences: ${(sampleHTML as any).count}`);
}
console.log('\n[Sample HTML (first 1000 chars)]:');
console.log(sampleHTML.html.slice(0, 1000));
}
}
/**
* Extract products using discovered selectors
* Based on DOM analysis of Treez/GapCommerce React app
*/
async function extractProducts(page: Page): Promise<TreezProductRaw[]> {
console.log('\n' + '='.repeat(60));
console.log('PRODUCT EXTRACTION');
console.log('='.repeat(60));
const products = await page.evaluate(() => {
const results: any[] = [];
// Treez uses classes like: product_product__ERWtJ
// Find all product cards using the discovered class patterns
const productSelectors = [
'[class*="product_product__"]', // Main product container
'[class*="ProductCard"]', // Alternative pattern
];
let productElements: Element[] = [];
for (const selector of productSelectors) {
const elements = document.querySelectorAll(selector);
// Filter to only get the actual product cards, not child elements
const filtered = Array.from(elements).filter(el => {
// Must have a name element and price
const hasName = el.querySelector('[class*="product__name"]') || el.querySelector('[class*="name__"]');
const hasPrice = el.querySelector('[class*="price"]');
return hasName || hasPrice;
});
if (filtered.length > 0) {
productElements = filtered;
console.log(`Found ${filtered.length} products with selector: ${selector}`);
break;
}
}
// Dedupe - some cards may be captured multiple times
const seen = new Set<string>();
// Extract data from each product element
for (const el of productElements) {
try {
// Get product name - look for name class
const nameEl = el.querySelector('[class*="product__name"], [class*="name__"]');
const name = nameEl?.textContent?.trim() || '';
if (!name || seen.has(name)) continue;
seen.add(name);
// Get product ID from link
const linkEl = el.querySelector('a[href*="/product/"]');
let productId = '';
if (linkEl) {
const href = linkEl.getAttribute('href') || '';
const match = href.match(/\/product\/([^\/\?]+)/);
productId = match ? match[1] : '';
}
if (!productId) {
productId = `treez_${name.replace(/\s+/g, '_').toLowerCase().slice(0, 30)}`;
}
// Get brand from the info section or product name parsing
const brandEl = el.querySelector('[class*="brand"], [class*="Brand"]');
let brand = brandEl?.textContent?.trim() || '';
// Get price - look for price class with $ symbol
const priceEl = el.querySelector('[class*="price__ins"], [class*="price"]');
const priceText = priceEl?.textContent || '';
const priceMatch = priceText.match(/\$(\d+(?:\.\d{2})?)/);
const price = priceMatch ? parseFloat(priceMatch[1]) : null;
// Get image URL
const imgEl = el.querySelector('img');
let imageUrl = imgEl?.getAttribute('src') || null;
// Handle Next.js image optimization URLs
if (imageUrl && imageUrl.includes('/_next/image')) {
const urlMatch = imageUrl.match(/url=([^&]+)/);
if (urlMatch) {
imageUrl = decodeURIComponent(urlMatch[1]);
}
}
// Get text content for THC/CBD extraction
const text = el.textContent || '';
// Get THC/CBD - look for patterns like "THC 25.5%" or "25.5% THC"
const thcMatch = text.match(/(?:THC[:\s]*)?(\d+(?:\.\d+)?)\s*%?\s*THC/i) ||
text.match(/THC[:\s]*(\d+(?:\.\d+)?)\s*%?/i);
const cbdMatch = text.match(/(?:CBD[:\s]*)?(\d+(?:\.\d+)?)\s*%?\s*CBD/i) ||
text.match(/CBD[:\s]*(\d+(?:\.\d+)?)\s*%?/i);
const thcPercent = thcMatch ? parseFloat(thcMatch[1]) : null;
const cbdPercent = cbdMatch ? parseFloat(cbdMatch[1]) : null;
// Get weight from name or text (e.g., "3.5G", "1G")
const weightMatch = name.match(/(\d+(?:\.\d+)?)\s*(G|g|MG|mg|OZ|oz)/i) ||
text.match(/(\d+(?:\.\d+)?)\s*(G|g|MG|mg|OZ|oz)/i);
const weight = weightMatch ? `${weightMatch[1]}${weightMatch[2].toLowerCase()}` : null;
// Price unit from weight
let priceUnit = '';
if (weight) {
priceUnit = weight;
}
// Get category/strain type
const strainTypes = ['indica', 'sativa', 'hybrid'];
let subcategory = '';
const textLower = text.toLowerCase();
for (const strain of strainTypes) {
if (textLower.includes(strain)) {
subcategory = strain;
break;
}
}
// Determine category from various signals
let category = '';
const categoryPatterns = [
{ pattern: /flower|bud/i, category: 'flower' },
{ pattern: /vape|cart|pen/i, category: 'vape' },
{ pattern: /edible|gummy|chocolate/i, category: 'edible' },
{ pattern: /concentrate|dab|wax|shatter/i, category: 'concentrate' },
{ pattern: /pre.?roll|joint/i, category: 'pre-roll' },
{ pattern: /topical|balm|cream/i, category: 'topical' },
{ pattern: /tincture/i, category: 'tincture' },
];
for (const { pattern, category: cat } of categoryPatterns) {
if (pattern.test(text)) {
category = cat;
break;
}
}
// Check stock status
const inStock = !textLower.includes('out of stock') && !textLower.includes('sold out');
results.push({
productId,
name,
brand,
category,
subcategory,
thcPercent,
cbdPercent,
price,
priceUnit,
imageUrl,
inStock,
weight,
});
} catch (err) {
console.log('Error extracting product:', err);
}
}
return results;
});
return products;
}
/**
* Bypass age gate if present
*/
async function bypassAgeGate(page: Page): Promise<boolean> {
console.log('[Age Gate] Checking for age gate...');
try {
// Wait for either age gate or main content
const ageGate = await page.$('[data-testid="age-gate-modal"], [class*="AgeGate"]');
if (ageGate) {
console.log('[Age Gate] Age gate detected, clicking confirm button...');
// Click the submit button
const submitBtn = await page.$('[data-testid="age-gate-submit-button"], button[type="submit"]');
if (submitBtn) {
await submitBtn.click();
console.log('[Age Gate] Clicked confirm button');
// Wait for age gate to disappear and menu to load
await sleep(2000);
// Wait for navigation or content change
await page.waitForFunction(
() => !document.querySelector('[data-testid="age-gate-modal"]'),
{ timeout: 10000 }
).catch(() => {
console.log('[Age Gate] Gate may still be visible, continuing anyway');
});
console.log('[Age Gate] Age gate bypassed');
return true;
} else {
console.log('[Age Gate] No submit button found');
}
} else {
console.log('[Age Gate] No age gate detected');
}
return false;
} catch (err: any) {
console.log(`[Age Gate] Error: ${err.message}`);
return false;
}
}
async function main() {
console.log('='.repeat(60));
console.log('TREEZ PLATFORM SMOKE TEST');
console.log('='.repeat(60));
console.log(`Store ID: ${STORE_ID}`);
console.log(`URL: ${TEST_URL}`);
console.log('');
const browser = await puppeteerExtra.launch({
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-blink-features=AutomationControlled',
],
});
try {
const page = await browser.newPage();
// Set viewport
await page.setViewport({ width: 1920, height: 1080 });
// Set user agent
await page.setUserAgent(
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
);
console.log('[Navigation] Going to Treez menu page...');
await page.goto(TEST_URL, {
waitUntil: 'networkidle2',
timeout: 60000,
});
console.log('[Navigation] Page loaded, waiting for React app...');
await sleep(2000);
// Bypass age gate
await bypassAgeGate(page);
// Wait for menu content to load
console.log('[Navigation] Waiting for menu content...');
await sleep(3000);
// Check if page loaded correctly
const pageTitle = await page.title();
console.log(`[Navigation] Page title: ${pageTitle}`);
// Take a screenshot for debugging
await page.screenshot({ path: '/tmp/treez-smoke-test.png', fullPage: false });
console.log('[Debug] Screenshot saved to /tmp/treez-smoke-test.png');
// Analyze DOM structure
await analyzeDOM(page);
// Scroll to load all products
await scrollToLoadAll(page);
// Extract products
const products = await extractProducts(page);
console.log('\n' + '='.repeat(60));
console.log('RESULTS');
console.log('='.repeat(60));
console.log(`Total products extracted: ${products.length}`);
if (products.length > 0) {
// Show statistics
const withPrice = products.filter((p) => p.price !== null).length;
const withThc = products.filter((p) => p.thcPercent !== null).length;
const withBrand = products.filter((p) => p.brand).length;
const withImage = products.filter((p) => p.imageUrl).length;
console.log(`\n[Data Quality]`);
console.log(` With price: ${withPrice}/${products.length} (${Math.round((withPrice / products.length) * 100)}%)`);
console.log(` With THC%: ${withThc}/${products.length} (${Math.round((withThc / products.length) * 100)}%)`);
console.log(` With brand: ${withBrand}/${products.length} (${Math.round((withBrand / products.length) * 100)}%)`);
console.log(` With image: ${withImage}/${products.length} (${Math.round((withImage / products.length) * 100)}%)`);
// Show sample products
console.log('\n[Sample Products (first 10)]:');
for (const p of products.slice(0, 10)) {
console.log(`\n ${p.name}`);
console.log(` ID: ${p.productId}`);
console.log(` Brand: ${p.brand || 'N/A'}`);
console.log(` Category: ${p.category || 'N/A'} / ${p.subcategory || 'N/A'}`);
console.log(` THC: ${p.thcPercent !== null ? p.thcPercent + '%' : 'N/A'}`);
console.log(` CBD: ${p.cbdPercent !== null ? p.cbdPercent + '%' : 'N/A'}`);
console.log(` Price: ${p.price !== null ? '$' + p.price : 'N/A'} ${p.priceUnit}`);
console.log(` Weight: ${p.weight || 'N/A'}`);
console.log(` Image: ${p.imageUrl?.slice(0, 60) || 'N/A'}...`);
console.log(` In Stock: ${p.inStock}`);
}
// Save full results to file
const fs = await import('fs');
fs.writeFileSync('/tmp/treez-products.json', JSON.stringify(products, null, 2));
console.log('\n[Debug] Full product list saved to /tmp/treez-products.json');
} else {
console.log('\n[WARNING] No products extracted!');
console.log('Check /tmp/treez-smoke-test.png for page state');
// Dump page HTML for debugging
const html = await page.content();
const fs = await import('fs');
fs.writeFileSync('/tmp/treez-page.html', html);
console.log('[Debug] Page HTML saved to /tmp/treez-page.html');
}
console.log('\n' + '='.repeat(60));
console.log(products.length > 0 ? 'SMOKE TEST PASSED' : 'SMOKE TEST NEEDS ADJUSTMENT');
console.log('='.repeat(60));
} catch (error: any) {
console.error('\n' + '='.repeat(60));
console.error('SMOKE TEST FAILED');
console.error('='.repeat(60));
console.error(`Error: ${error.message}`);
console.error(error.stack);
process.exit(1);
} finally {
await browser.close();
}
}
main().catch(console.error);