Fix Dutchie scraper to wait for React content to load

- Change waitUntil from 'domcontentloaded' to 'networkidle2' for SPAs
- Add waitForSelector to wait for product elements before parsing
- WordPress plugin: update API endpoints to use hardcoded URL

The scraper was returning 0 products because it wasn't waiting for
React to render the product list. Now it properly waits for either
the product list items or an empty state indicator.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-01 09:33:32 -07:00
parent 199b6a8a23
commit e518bb8169
2 changed files with 28 additions and 36 deletions

View File

@@ -378,9 +378,9 @@ export class Downloader {
await page.setUserAgent(request.metadata.userAgent); await page.setUserAgent(request.metadata.userAgent);
} }
// Navigate to page // Navigate to page - use networkidle2 for SPAs like Dutchie
const navigationPromise = page.goto(request.url, { const navigationPromise = page.goto(request.url, {
waitUntil: 'domcontentloaded', waitUntil: 'networkidle2',
timeout: 60000 timeout: 60000
}); });
@@ -390,8 +390,19 @@ export class Downloader {
throw new Error('Navigation failed - no response'); throw new Error('Navigation failed - no response');
} }
// Wait for initial render // Wait for React to render product content
await page.waitForTimeout(3000); // Try to wait for products, but don't fail if they don't appear (empty category)
try {
await page.waitForSelector('[data-testid="product-list-item"], [data-testid="empty-state"]', {
timeout: 10000
});
} catch {
// Products might not exist in this category - continue anyway
logger.debug('scraper', 'No products found within timeout - continuing');
}
// Additional wait for any lazy-loaded content
await page.waitForTimeout(2000);
// Check for lazy-loaded content // Check for lazy-loaded content
await this.autoScroll(page); await this.autoScroll(page);

View File

@@ -1,11 +1,11 @@
<?php <?php
/** /**
* Plugin Name: Dutchie Menus * Plugin Name: Dutchie Menus
* Plugin URI: https://github.com/yourusername/dutchie-menus * Plugin URI: https://creationshop.io
* Description: Display cannabis product menus from your Dutchie scraper with Elementor integration * Description: Display cannabis product menus from your Dutchie scraper with Elementor integration
* Version: 1.0.0 * Version: 1.1.0
* Author: Your Name * Author: Creationshop
* Author URI: https://yoursite.com * Author URI: https://creationshop.io
* License: GPL v2 or later * License: GPL v2 or later
* Text Domain: dutchie-menus * Text Domain: dutchie-menus
* Requires PHP: 7.4 * Requires PHP: 7.4
@@ -15,7 +15,8 @@ if (!defined('ABSPATH')) {
exit; // Exit if accessed directly exit; // Exit if accessed directly
} }
define('DUTCHIE_MENUS_VERSION', '1.0.0'); define('DUTCHIE_MENUS_VERSION', '1.1.0');
define('DUTCHIE_MENUS_API_URL', 'https://dispos.crawlsy.com/api');
define('DUTCHIE_MENUS_PLUGIN_DIR', plugin_dir_path(__FILE__)); define('DUTCHIE_MENUS_PLUGIN_DIR', plugin_dir_path(__FILE__));
define('DUTCHIE_MENUS_PLUGIN_URL', plugin_dir_url(__FILE__)); define('DUTCHIE_MENUS_PLUGIN_URL', plugin_dir_url(__FILE__));
@@ -100,9 +101,7 @@ class Dutchie_Menus_Plugin {
* Register Plugin Settings * Register Plugin Settings
*/ */
public function register_settings() { public function register_settings() {
register_setting('dutchie_menus_settings', 'dutchie_api_url');
register_setting('dutchie_menus_settings', 'dutchie_api_token'); register_setting('dutchie_menus_settings', 'dutchie_api_token');
register_setting('dutchie_menus_settings', 'dutchie_default_store_id');
} }
/** /**
@@ -112,36 +111,20 @@ class Dutchie_Menus_Plugin {
?> ?>
<div class="wrap"> <div class="wrap">
<h1>Dutchie Menus Settings</h1> <h1>Dutchie Menus Settings</h1>
<p>Version <?php echo DUTCHIE_MENUS_VERSION; ?> by <a href="https://creationshop.io" target="_blank">Creationshop</a></p>
<form method="post" action="options.php"> <form method="post" action="options.php">
<?php settings_fields('dutchie_menus_settings'); ?> <?php settings_fields('dutchie_menus_settings'); ?>
<?php do_settings_sections('dutchie_menus_settings'); ?> <?php do_settings_sections('dutchie_menus_settings'); ?>
<table class="form-table"> <table class="form-table">
<tr>
<th scope="row"><label for="dutchie_api_url">API URL</label></th>
<td>
<input type="url" id="dutchie_api_url" name="dutchie_api_url"
value="<?php echo esc_attr(get_option('dutchie_api_url', 'http://localhost:3010')); ?>"
class="regular-text" />
<p class="description">Your Dutchie Menus API endpoint (e.g., http://localhost:3010)</p>
</td>
</tr>
<tr> <tr>
<th scope="row"><label for="dutchie_api_token">API Token</label></th> <th scope="row"><label for="dutchie_api_token">API Token</label></th>
<td> <td>
<input type="password" id="dutchie_api_token" name="dutchie_api_token" <input type="password" id="dutchie_api_token" name="dutchie_api_token"
value="<?php echo esc_attr(get_option('dutchie_api_token')); ?>" value="<?php echo esc_attr(get_option('dutchie_api_token')); ?>"
class="regular-text" /> class="regular-text" />
<p class="description">Your authentication token from the admin dashboard</p> <p class="description">Your authentication token from the admin dashboard. The token includes your store configuration.</p>
</td>
</tr>
<tr>
<th scope="row"><label for="dutchie_default_store_id">Default Store ID</label></th>
<td>
<input type="number" id="dutchie_default_store_id" name="dutchie_default_store_id"
value="<?php echo esc_attr(get_option('dutchie_default_store_id', '1')); ?>"
class="small-text" />
<p class="description">Default store ID to use</p>
</td> </td>
</tr> </tr>
</table> </table>
@@ -158,13 +141,13 @@ class Dutchie_Menus_Plugin {
<script> <script>
jQuery(document).ready(function($) { jQuery(document).ready(function($) {
$('#test-api-connection').on('click', function() { $('#test-api-connection').on('click', function() {
var apiUrl = $('#dutchie_api_url').val(); var apiUrl = '<?php echo DUTCHIE_MENUS_API_URL; ?>';
var apiToken = $('#dutchie_api_token').val(); var apiToken = $('#dutchie_api_token').val();
$('#api-test-result').html('<p>Testing connection...</p>'); $('#api-test-result').html('<p>Testing connection...</p>');
$.ajax({ $.ajax({
url: apiUrl + '/api/auth/me', url: apiUrl + '/auth/me',
method: 'GET', method: 'GET',
headers: { headers: {
'Authorization': 'Bearer ' + apiToken 'Authorization': 'Bearer ' + apiToken
@@ -239,7 +222,6 @@ class Dutchie_Menus_Plugin {
* Fetch Products from API * Fetch Products from API
*/ */
public function fetch_products($args = []) { public function fetch_products($args = []) {
$api_url = get_option('dutchie_api_url', 'http://localhost:3010');
$api_token = get_option('dutchie_api_token'); $api_token = get_option('dutchie_api_token');
if (!$api_token) { if (!$api_token) {
@@ -247,7 +229,7 @@ class Dutchie_Menus_Plugin {
} }
$query_args = http_build_query($args); $query_args = http_build_query($args);
$url = $api_url . '/api/products?' . $query_args; $url = DUTCHIE_MENUS_API_URL . '/products?' . $query_args;
$response = wp_remote_get($url, [ $response = wp_remote_get($url, [
'headers' => [ 'headers' => [
@@ -270,14 +252,13 @@ class Dutchie_Menus_Plugin {
* Fetch Single Product from API * Fetch Single Product from API
*/ */
public function fetch_product($id) { public function fetch_product($id) {
$api_url = get_option('dutchie_api_url', 'http://localhost:3010');
$api_token = get_option('dutchie_api_token'); $api_token = get_option('dutchie_api_token');
if (!$api_token) { if (!$api_token) {
return false; return false;
} }
$url = $api_url . '/api/products/' . intval($id); $url = DUTCHIE_MENUS_API_URL . '/products/' . intval($id);
$response = wp_remote_get($url, [ $response = wp_remote_get($url, [
'headers' => [ 'headers' => [