"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.discoverCategories = discoverCategories; const puppeteer_extra_1 = __importDefault(require("puppeteer-extra")); const puppeteer_extra_plugin_stealth_1 = __importDefault(require("puppeteer-extra-plugin-stealth")); const migrate_1 = require("../db/migrate"); const logger_1 = require("./logger"); const age_gate_1 = require("../utils/age-gate"); const dutchie_1 = require("../scrapers/templates/dutchie"); // Apply stealth plugin puppeteer_extra_1.default.use((0, puppeteer_extra_plugin_stealth_1.default)()); const DUTCHIE_CATEGORIES = [ { name: 'Shop', slug: 'shop' }, { name: 'Flower', slug: 'flower', parentSlug: 'shop' }, { name: 'Pre-Rolls', slug: 'pre-rolls', parentSlug: 'shop' }, { name: 'Vaporizers', slug: 'vaporizers', parentSlug: 'shop' }, { name: 'Concentrates', slug: 'concentrates', parentSlug: 'shop' }, { name: 'Edibles', slug: 'edibles', parentSlug: 'shop' }, { name: 'Topicals', slug: 'topicals', parentSlug: 'shop' }, { name: 'Accessories', slug: 'accessories', parentSlug: 'shop' }, { name: 'Brands', slug: 'brands' }, { name: 'Specials', slug: 'specials' } ]; const CURALEAF_CATEGORIES = [ { name: 'Shop', slug: 'shop' }, { name: 'Flower', slug: 'flower', parentSlug: 'shop' }, { name: 'Pre-Rolls', slug: 'pre-rolls', parentSlug: 'shop' }, { name: 'Vaporizers', slug: 'vaporizers', parentSlug: 'shop' }, { name: 'Concentrates', slug: 'concentrates', parentSlug: 'shop' }, { name: 'Edibles', slug: 'edibles', parentSlug: 'shop' }, { name: 'Tinctures', slug: 'tinctures', parentSlug: 'shop' }, { name: 'Topicals', slug: 'topicals', parentSlug: 'shop' }, { name: 'Capsules', slug: 'capsules', parentSlug: 'shop' }, { name: 'Accessories', slug: 'accessories', parentSlug: 'shop' } ]; async function makePageStealthy(page) { await page.evaluateOnNewDocument(() => { Object.defineProperty(navigator, 'webdriver', { get: () => false }); Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] }); Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); window.chrome = { runtime: {} }; }); } async function isDutchieMenu(page) { try { // Check page source for Dutchie markers const isDutchie = await page.evaluate(() => { // Check for window.reactEnv with dutchie URLs if (window.reactEnv) { const env = window.reactEnv; if (env.adminUrl?.includes('dutchie.com') || env.apiUrl?.includes('dutchie.com') || env.consumerUrl?.includes('dutchie.com')) { return true; } } // Check HTML source for dutchie references const htmlContent = document.documentElement.innerHTML; if (htmlContent.includes('admin.dutchie.com') || htmlContent.includes('api.dutchie.com') || htmlContent.includes('embedded-menu') || htmlContent.includes('window.reactEnv')) { return true; } return false; }); return isDutchie; } catch (error) { logger_1.logger.warn('categories', `Error detecting Dutchie menu: ${error}`); return false; } } async function discoverCategories(storeId) { let browser = null; try { logger_1.logger.info('categories', `Discovering categories for store ID: ${storeId}`); const storeResult = await migrate_1.pool.query(` SELECT id, name, slug, dutchie_url FROM stores WHERE id = $1 `, [storeId]); if (storeResult.rows.length === 0) { throw new Error('Store not found'); } const store = storeResult.rows[0]; const baseUrl = store.dutchie_url; // Launch browser to check page source browser = await puppeteer_extra_1.default.launch({ headless: 'new', args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-blink-features=AutomationControlled' ] }); const page = await browser.newPage(); await makePageStealthy(page); await page.setViewport({ width: 1920, height: 1080 }); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'); // Set age gate bypass cookies BEFORE navigation (standard for all cannabis sites) const state = (0, age_gate_1.detectStateFromUrl)(baseUrl); await (0, age_gate_1.setAgeGateCookies)(page, baseUrl, state); logger_1.logger.info('categories', `Loading page to detect menu type: ${baseUrl}`); await page.goto(baseUrl, { waitUntil: 'domcontentloaded', timeout: 60000 }); await page.waitForTimeout(3000); // If age gate still appears, try to bypass it await (0, age_gate_1.bypassAgeGate)(page, state); // Detect if it's a Dutchie menu by inspecting page source const isDutchie = await isDutchieMenu(page); await browser.close(); browser = null; if (isDutchie) { logger_1.logger.info('categories', `✅ Detected Dutchie menu for ${store.name}`); await createDutchieCategories(storeId, store); } else { // Fallback: Use standard cannabis categories for non-Dutchie sites logger_1.logger.info('categories', `Non-Dutchie menu detected, using standard cannabis categories for ${store.name}`); await createCuraleafCategories(storeId, store); } } catch (error) { logger_1.logger.error('categories', `Category discovery error: ${error}`); if (browser) await browser.close(); throw error; } } async function createDutchieCategories(storeId, store) { const client = await migrate_1.pool.connect(); try { await client.query('BEGIN'); logger_1.logger.info('categories', `Creating predefined Dutchie category structure`); const baseUrl = store.dutchie_url; for (const category of DUTCHIE_CATEGORIES) { let categoryUrl; // Use Dutchie template to build correct category URLs if (category.parentSlug) { // Subcategory: Use template's buildCategoryUrl (e.g., /products/flower) categoryUrl = dutchie_1.dutchieTemplate.buildCategoryUrl(baseUrl, category.name); } else { // Top-level: Use base URL with slug categoryUrl = `${baseUrl}/${category.slug}`; } if (!category.parentSlug) { // Create parent category await client.query(` INSERT INTO categories (store_id, name, slug, dutchie_url, scrape_enabled) VALUES ($1, $2, $3, $4, true) ON CONFLICT (store_id, slug) DO UPDATE SET name = $2, dutchie_url = $4 RETURNING id `, [storeId, category.name, category.slug, categoryUrl]); logger_1.logger.info('categories', `📁 ${category.name}`); } else { // Create subcategory const parentResult = await client.query(` SELECT id FROM categories WHERE store_id = $1 AND slug = $2 `, [storeId, category.parentSlug]); if (parentResult.rows.length > 0) { await client.query(` INSERT INTO categories (store_id, name, slug, dutchie_url, scrape_enabled) VALUES ($1, $2, $3, $4, true) ON CONFLICT (store_id, slug) DO UPDATE SET name = $2, dutchie_url = $4 `, [storeId, category.name, category.slug, categoryUrl]); logger_1.logger.info('categories', ` └── ${category.name}`); } } } await client.query('COMMIT'); logger_1.logger.info('categories', `✅ Created ${DUTCHIE_CATEGORIES.length} Dutchie categories successfully`); } catch (error) { await client.query('ROLLBACK'); logger_1.logger.error('categories', `Failed to create Dutchie categories: ${error}`); throw error; } finally { client.release(); } } async function createCuraleafCategories(storeId, store) { const client = await migrate_1.pool.connect(); try { await client.query('BEGIN'); logger_1.logger.info('categories', `Creating predefined Curaleaf category structure`); const baseUrl = store.dutchie_url; for (const category of CURALEAF_CATEGORIES) { let categoryUrl; if (category.parentSlug) { // Subcategory URL - Curaleaf uses pattern like: /stores/{store-slug}/{category} categoryUrl = `${baseUrl}?category=${category.slug}`; } else { // Top-level category categoryUrl = baseUrl; } if (!category.parentSlug) { // Create parent category await client.query(` INSERT INTO categories (store_id, name, slug, dutchie_url, scrape_enabled) VALUES ($1, $2, $3, $4, true) ON CONFLICT (store_id, slug) DO UPDATE SET name = $2, dutchie_url = $4 RETURNING id `, [storeId, category.name, category.slug, categoryUrl]); logger_1.logger.info('categories', `📁 ${category.name}`); } else { // Create subcategory const parentResult = await client.query(` SELECT id FROM categories WHERE store_id = $1 AND slug = $2 `, [storeId, category.parentSlug]); if (parentResult.rows.length > 0) { await client.query(` INSERT INTO categories (store_id, name, slug, dutchie_url, scrape_enabled) VALUES ($1, $2, $3, $4, true) ON CONFLICT (store_id, slug) DO UPDATE SET name = $2, dutchie_url = $4 `, [storeId, category.name, category.slug, categoryUrl]); logger_1.logger.info('categories', ` └── ${category.name}`); } } } await client.query('COMMIT'); logger_1.logger.info('categories', `✅ Created ${CURALEAF_CATEGORIES.length} Curaleaf categories successfully`); } catch (error) { await client.query('ROLLBACK'); logger_1.logger.error('categories', `Failed to create Curaleaf categories: ${error}`); throw error; } finally { client.release(); } }