From e518bb8169cac71be954c64e4228e419f58c7e83 Mon Sep 17 00:00:00 2001 From: Kelly Date: Mon, 1 Dec 2025 09:33:32 -0700 Subject: [PATCH] Fix Dutchie scraper to wait for React content to load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Change waitUntil from 'domcontentloaded' to 'networkidle2' for SPAs - Add waitForSelector to wait for product elements before parsing - WordPress plugin: update API endpoints to use hardcoded URL The scraper was returning 0 products because it wasn't waiting for React to render the product list. Now it properly waits for either the product list items or an empty state indicator. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- backend/src/scraper-v2/downloader.ts | 19 +++++++++--- wordpress-plugin/dutchie-menus.php | 45 ++++++++-------------------- 2 files changed, 28 insertions(+), 36 deletions(-) diff --git a/backend/src/scraper-v2/downloader.ts b/backend/src/scraper-v2/downloader.ts index 4614b408..455d0c21 100644 --- a/backend/src/scraper-v2/downloader.ts +++ b/backend/src/scraper-v2/downloader.ts @@ -378,9 +378,9 @@ export class Downloader { await page.setUserAgent(request.metadata.userAgent); } - // Navigate to page + // Navigate to page - use networkidle2 for SPAs like Dutchie const navigationPromise = page.goto(request.url, { - waitUntil: 'domcontentloaded', + waitUntil: 'networkidle2', timeout: 60000 }); @@ -390,8 +390,19 @@ export class Downloader { throw new Error('Navigation failed - no response'); } - // Wait for initial render - await page.waitForTimeout(3000); + // Wait for React to render product content + // Try to wait for products, but don't fail if they don't appear (empty category) + try { + await page.waitForSelector('[data-testid="product-list-item"], [data-testid="empty-state"]', { + timeout: 10000 + }); + } catch { + // Products might not exist in this category - continue anyway + logger.debug('scraper', 'No products found within timeout - continuing'); + } + + // Additional wait for any lazy-loaded content + await page.waitForTimeout(2000); // Check for lazy-loaded content await this.autoScroll(page); diff --git a/wordpress-plugin/dutchie-menus.php b/wordpress-plugin/dutchie-menus.php index 74fc276b..30a774ea 100644 --- a/wordpress-plugin/dutchie-menus.php +++ b/wordpress-plugin/dutchie-menus.php @@ -1,11 +1,11 @@

Dutchie Menus Settings

+

Version by Creationshop

+
- - - - - - - -
- -

Your Dutchie Menus API endpoint (e.g., http://localhost:3010)

-
-

Your authentication token from the admin dashboard

-
- -

Default store ID to use

+

Your authentication token from the admin dashboard. The token includes your store configuration.

@@ -158,13 +141,13 @@ class Dutchie_Menus_Plugin {