"use strict"; /** * Test script for iHeartJane menu scraping via Playwright * Intercepts API/Algolia calls made by the browser */ Object.defineProperty(exports, "__esModule", { value: true }); const playwright_1 = require("playwright"); async function scrapeJaneMenu(urlOrStoreId) { // Handle either a full URL or just a store ID const menuUrl = urlOrStoreId.startsWith('http') ? urlOrStoreId : `https://www.iheartjane.com/embed/stores/${urlOrStoreId}/menu`; console.log(`Starting Playwright scrape for iHeartJane: ${menuUrl}`); const browser = await playwright_1.chromium.launch({ headless: true, args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-blink-features=AutomationControlled' ] }); const context = await browser.newContext({ userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', viewport: { width: 1920, height: 1080 }, locale: 'en-US', timezoneId: 'America/Chicago' }); // Add stealth scripts to avoid detection await context.addInitScript(() => { Object.defineProperty(navigator, 'webdriver', { get: () => false }); window.chrome = { runtime: {} }; }); const page = await context.newPage(); const products = []; const apiResponses = []; const capturedCredentials = {}; // Intercept ALL network requests to capture API/Algolia data and credentials page.on('request', (request) => { const url = request.url(); const headers = request.headers(); // Capture Algolia credentials from request headers if (url.includes('algolia')) { const appId = headers['x-algolia-application-id']; const apiKey = headers['x-algolia-api-key']; if (appId && apiKey) { capturedCredentials.algolia = { appId, apiKey }; console.log(`Captured Algolia credentials: App=${appId}, Key=${apiKey.substring(0, 10)}...`); } } }); page.on('response', async (response) => { const url = response.url(); // Capture Algolia search results if (url.includes('algolia.net') || url.includes('algolianet.com')) { try { const data = await response.json(); if (data.results && data.results[0] && data.results[0].hits) { console.log(`Captured ${data.results[0].hits.length} products from Algolia`); apiResponses.push({ type: 'algolia', data: data.results[0] }); } } catch (e) { // Not JSON or error parsing } } // Capture Jane API responses if (url.includes('api.iheartjane.com') && url.includes('products')) { try { const data = await response.json(); console.log(`Captured Jane API response: ${url}`); apiResponses.push({ type: 'jane-api', url, data }); } catch (e) { // Not JSON or error parsing } } }); try { console.log(`Navigating to: ${menuUrl}`); await page.goto(menuUrl, { waitUntil: 'domcontentloaded', timeout: 60000 }); // Wait for page to settle await page.waitForTimeout(2000); // Handle age gate - use Playwright locator with force click console.log('Looking for age gate...'); try { let clicked = false; // Method 1: Use Playwright locator with exact text match try { const yesButton = page.locator('button:has-text("Yes")').first(); await yesButton.waitFor({ state: 'visible', timeout: 5000 }); await yesButton.click({ force: true }); clicked = true; console.log('Clicked age gate via Playwright locator'); await page.waitForTimeout(5000); } catch (e) { console.log('Playwright locator failed:', e.message); } // Method 2: Try clicking by visible bounding box if (!clicked) { try { const box = await page.locator('button:has-text("Yes")').first().boundingBox(); if (box) { await page.mouse.click(box.x + box.width / 2, box.y + box.height / 2); clicked = true; console.log(`Clicked age gate at coordinates: ${box.x + box.width / 2}, ${box.y + box.height / 2}`); await page.waitForTimeout(5000); } } catch (e) { console.log('Bounding box click failed'); } } // Method 3: Try JavaScript click if (!clicked) { const jsClickResult = await page.evaluate(() => { const buttons = Array.from(document.querySelectorAll('button')); for (const btn of buttons) { if (btn.textContent?.includes('Yes')) { btn.click(); return { success: true, buttonText: btn.textContent }; } } return { success: false }; }); if (jsClickResult.success) { clicked = true; console.log(`Clicked via JS: ${jsClickResult.buttonText}`); await page.waitForTimeout(5000); } } // Method 4: Click element containing "Yes" with dispatchEvent if (!clicked) { const dispatchResult = await page.evaluate(() => { const buttons = Array.from(document.querySelectorAll('button')); for (const btn of buttons) { if (btn.textContent?.includes('Yes')) { btn.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true })); return true; } } return false; }); if (dispatchResult) { clicked = true; console.log('Clicked via dispatchEvent'); await page.waitForTimeout(5000); } } // Log button info for debugging const buttonInfo = await page.evaluate(() => { const buttons = Array.from(document.querySelectorAll('button')); return buttons.map(b => ({ text: b.textContent?.trim(), visible: b.offsetParent !== null, rect: b.getBoundingClientRect() })); }); console.log('Buttons found:', JSON.stringify(buttonInfo, null, 2)); } catch (e) { console.log('Age gate handling error:', e); } // Wait for content to load after age gate await page.waitForTimeout(3000); // Try to scroll to trigger more product loads console.log('Scrolling to load more products...'); for (let i = 0; i < 3; i++) { await page.evaluate(() => window.scrollBy(0, 1000)); await page.waitForTimeout(1000); } // Extract products from the page DOM as backup const domProducts = await page.evaluate(() => { const items = []; // Try various selectors that Jane might use const productCards = document.querySelectorAll('[data-testid*="product"], [class*="ProductCard"], [class*="product-card"], .product-tile'); productCards.forEach((card) => { const name = card.querySelector('[class*="name"], [class*="title"], h3, h4')?.textContent?.trim(); const brand = card.querySelector('[class*="brand"]')?.textContent?.trim(); const price = card.querySelector('[class*="price"]')?.textContent?.trim(); const image = card.querySelector('img')?.getAttribute('src'); if (name) { items.push({ name, brand, price, image, source: 'dom' }); } }); return items; }); console.log(`Extracted ${domProducts.length} products from DOM`); // Check for __NEXT_DATA__ or similar embedded data const embeddedData = await page.evaluate(() => { // Check for Next.js data const nextData = document.getElementById('__NEXT_DATA__'); if (nextData) { return { type: 'next', data: JSON.parse(nextData.textContent || '{}') }; } // Check for any window-level product data const win = window; if (win.__INITIAL_STATE__) return { type: 'initial_state', data: win.__INITIAL_STATE__ }; if (win.__PRELOADED_STATE__) return { type: 'preloaded', data: win.__PRELOADED_STATE__ }; if (win.products) return { type: 'products', data: win.products }; return null; }); if (embeddedData) { console.log(`Found embedded data: ${embeddedData.type}`); apiResponses.push(embeddedData); } // Take a screenshot for debugging const screenshotPath = `/tmp/jane-scrape-${Date.now()}.png`; await page.screenshot({ path: screenshotPath, fullPage: true }); console.log(`Screenshot saved to ${screenshotPath}`); // Process captured API responses console.log('\n=== API Responses Summary ==='); for (const resp of apiResponses) { console.log(`Type: ${resp.type}`); if (resp.type === 'algolia' && resp.data.hits) { console.log(` Hits: ${resp.data.hits.length}`); console.log(` Total: ${resp.data.nbHits}`); if (resp.data.hits[0]) { console.log(` Sample product:`, JSON.stringify(resp.data.hits[0], null, 2).substring(0, 1000)); } } } console.log('\n=== DOM Products Sample ==='); console.log(JSON.stringify(domProducts.slice(0, 3), null, 2)); console.log('\n=== Captured Credentials ==='); console.log(JSON.stringify(capturedCredentials, null, 2)); return { apiResponses, domProducts, embeddedData, capturedCredentials }; } finally { await browser.close(); } } // Main execution const urlOrStoreId = process.argv[2] || 'https://iheartjane.com/aly2djS2yXoTGnR0/DBeqE6HSSwijog9l'; // Default to The Flower Shop Az scrapeJaneMenu(urlOrStoreId) .then((result) => { console.log('\n=== Scrape Complete ==='); console.log(`Total API responses captured: ${result.apiResponses.length}`); console.log(`Total DOM products: ${result.domProducts.length}`); }) .catch((err) => { console.error('Scrape failed:', err); process.exit(1); });