"use strict"; // ============================================================================ // DEPRECATED: Dutchie now crawled via GraphQL only (see dutchie-az pipeline) // DO NOT USE - This HTML scraper is unreliable and targets the legacy products table. // All Dutchie crawling must go through: src/dutchie-az/services/product-crawler.ts // ============================================================================ Object.defineProperty(exports, "__esModule", { value: true }); exports.dutchieTemplate = void 0; exports.getTemplateForUrl = getTemplateForUrl; const logger_1 = require("../../services/logger"); /** * @deprecated DEPRECATED - Dutchie HTML scraping is no longer supported. * Use the dutchie-az GraphQL pipeline instead: src/dutchie-az/services/product-crawler.ts * This template relied on unstable DOM selectors and wrote to legacy tables. */ exports.dutchieTemplate = { name: 'Dutchie Marketplace', urlPattern: /dutchie\.com\/dispensary\//, buildCategoryUrl: (baseUrl, category) => { // Remove trailing slash const base = baseUrl.replace(/\/$/, ''); // Convert category name to URL-friendly slug const categorySlug = category.toLowerCase().replace(/\s+/g, '-'); return `${base}/products/${categorySlug}`; }, extractProducts: async (page) => { const products = []; try { // Wait for product cards to load await page.waitForSelector('a[data-testid="card-link"]', { timeout: 10000 }).catch(() => { logger_1.logger.warn('scraper', 'No product cards found with data-testid="card-link"'); }); // Get all product card links const productCards = await page.locator('a[href*="/product/"][data-testid="card-link"]').all(); logger_1.logger.info('scraper', `Found ${productCards.length} Dutchie product cards`); for (const card of productCards) { try { // Extract all data at once using evaluate for speed const cardData = await card.evaluate((el) => { const href = el.getAttribute('href') || ''; const img = el.querySelector('img'); const imageUrl = img ? img.getAttribute('src') || '' : ''; // Get all text nodes in order const textElements = Array.from(el.querySelectorAll('*')) .filter(el => el.textContent && el.children.length === 0) .map(el => (el.textContent || '').trim()) .filter(text => text.length > 0); const name = textElements[0] || ''; const brand = textElements[1] || ''; // Look for price const priceMatch = el.textContent?.match(/\$(\d+(?:\.\d{2})?)/); const price = priceMatch ? parseFloat(priceMatch[1]) : undefined; return { href, imageUrl, name, brand, price }; }); if (cardData.name && cardData.href) { products.push({ name: cardData.name, brand: cardData.brand || undefined, product_url: cardData.href.startsWith('http') ? cardData.href : `https://dutchie.com${cardData.href}`, image_url: cardData.imageUrl || undefined, price: cardData.price, in_stock: true, }); } } catch (err) { logger_1.logger.warn('scraper', `Error extracting Dutchie product card: ${err}`); } } } catch (err) { logger_1.logger.error('scraper', `Error in Dutchie product extraction: ${err}`); } return products; }, }; /** * Get the appropriate scraper template based on URL */ function getTemplateForUrl(url) { if (exports.dutchieTemplate.urlPattern.test(url)) { return exports.dutchieTemplate; } return null; }