Fix Dutchie scraper to wait for React content to load
- Change waitUntil from 'domcontentloaded' to 'networkidle2' for SPAs - Add waitForSelector to wait for product elements before parsing - WordPress plugin: update API endpoints to use hardcoded URL The scraper was returning 0 products because it wasn't waiting for React to render the product list. Now it properly waits for either the product list items or an empty state indicator. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -378,9 +378,9 @@ export class Downloader {
|
||||
await page.setUserAgent(request.metadata.userAgent);
|
||||
}
|
||||
|
||||
// Navigate to page
|
||||
// Navigate to page - use networkidle2 for SPAs like Dutchie
|
||||
const navigationPromise = page.goto(request.url, {
|
||||
waitUntil: 'domcontentloaded',
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
|
||||
@@ -390,8 +390,19 @@ export class Downloader {
|
||||
throw new Error('Navigation failed - no response');
|
||||
}
|
||||
|
||||
// Wait for initial render
|
||||
await page.waitForTimeout(3000);
|
||||
// Wait for React to render product content
|
||||
// Try to wait for products, but don't fail if they don't appear (empty category)
|
||||
try {
|
||||
await page.waitForSelector('[data-testid="product-list-item"], [data-testid="empty-state"]', {
|
||||
timeout: 10000
|
||||
});
|
||||
} catch {
|
||||
// Products might not exist in this category - continue anyway
|
||||
logger.debug('scraper', 'No products found within timeout - continuing');
|
||||
}
|
||||
|
||||
// Additional wait for any lazy-loaded content
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
// Check for lazy-loaded content
|
||||
await this.autoScroll(page);
|
||||
|
||||
Reference in New Issue
Block a user