import { pool } from '../db/pool'; import { logger } from '../services/logger'; import { Downloader } from './downloader'; import { ScraperRequest } from './types'; interface Category { id?: number; storeId: number; name: string; slug: string; dutchieUrl: string; parentId?: number; path: string; displayOrder?: number; } interface NavigationLink { text: string; href: string; isCategory: boolean; parentText?: string; } /** * Navigation Discovery - finds and builds category structure */ export class NavigationDiscovery { private downloader: Downloader; constructor(downloader: Downloader) { this.downloader = downloader; } /** * Discover categories from a store's main page */ async discoverCategories(storeId: number): Promise { logger.info('categories', `Starting category discovery for store ${storeId}`); try { // Get dispensary info (store = dispensary) const storeResult = await pool.query(` SELECT id, name, slug, menu_url as dutchie_url FROM dispensaries WHERE id = $1 `, [storeId]); if (storeResult.rows.length === 0) { throw new Error('Store not found'); } const store = storeResult.rows[0]; const baseUrl = store.dutchie_url; // Create request to fetch the main page const request: ScraperRequest = { url: baseUrl, priority: 100, retryCount: 0, maxRetries: 3, metadata: { requiresBrowser: true, requiresStealth: true }, callback: async () => ({ items: [], requests: [] }) }; // Fetch the page const response = await this.downloader.fetch(request); // Extract navigation links const page = await this.downloader.getCurrentPage(); if (!page) { throw new Error('No active page for navigation extraction'); } const links = await this.extractNavigationLinks(page, baseUrl); logger.info('categories', `Found ${links.length} navigation links`); // Check if it's a Dutchie menu const isDutchie = await this.isDutchieMenu(page); if (isDutchie) { logger.info('categories', 'Detected Dutchie menu - using predefined structure'); await this.createDutchieCategories(storeId, store, links); } else { logger.info('categories', 'Custom menu detected - extracting from navigation'); await this.createCustomCategories(storeId, store, links); } logger.info('categories', `✅ Category discovery completed for ${store.name}`); } catch (error) { logger.error('categories', `Category discovery failed: ${error}`); throw error; } } /** * Extract navigation links from page */ private async extractNavigationLinks(page: any, baseUrl: string): Promise { return await page.evaluate((base: string) => { const links: NavigationLink[] = []; // Look for navigation elements const navSelectors = [ 'nav a', '[role="navigation"] a', '[class*="nav"] a', '[class*="menu"] a', '[class*="category"] a', 'header a' ]; const foundLinks = new Set(); for (const selector of navSelectors) { // @ts-ignore - runs in browser context const elements = document.querySelectorAll(selector); elements.forEach((el: any) => { const text = el.textContent?.trim(); let href = el.href || el.getAttribute('href'); if (!text || !href || text.length < 2) return; // Normalize href if (href.startsWith('/')) { // @ts-ignore - runs in browser context const url = new URL(base); href = `${url.origin}${href}`; } // Skip external links and anchors if (!href.includes(base) || href.includes('#')) return; // Skip duplicates const linkKey = `${text}:${href}`; if (foundLinks.has(linkKey)) return; foundLinks.add(linkKey); // Determine if it's likely a category const categoryKeywords = [ 'flower', 'pre-roll', 'vape', 'edible', 'concentrate', 'topical', 'accessory', 'brand', 'special', 'shop', 'indica', 'sativa', 'hybrid', 'cbd', 'thc' ]; const isCategory = categoryKeywords.some(kw => text.toLowerCase().includes(kw) || href.toLowerCase().includes(kw) ); links.push({ text, href, isCategory }); }); } return links; }, baseUrl); } /** * Check if it's a Dutchie menu */ private async isDutchieMenu(page: any): Promise { return await page.evaluate(() => { // Check for Dutchie markers // @ts-ignore - runs in browser context if ((window as any).reactEnv) { // @ts-ignore - runs in browser context const env = (window as any).reactEnv; if (env.adminUrl?.includes('dutchie.com') || env.apiUrl?.includes('dutchie.com') || env.consumerUrl?.includes('dutchie.com')) { return true; } } // @ts-ignore - runs in browser context const htmlContent = document.documentElement.innerHTML; return ( htmlContent.includes('admin.dutchie.com') || htmlContent.includes('api.dutchie.com') || htmlContent.includes('embedded-menu') || htmlContent.includes('window.reactEnv') ); }); } /** * Create categories for Dutchie menus (predefined structure) * Uses your existing Dutchie category structure */ private async createDutchieCategories(storeId: number, store: any, discoveredLinks: NavigationLink[]): Promise { const client = await pool.connect(); try { await client.query('BEGIN'); logger.info('categories', `Creating predefined Dutchie category structure`); const baseUrl = store.dutchie_url; // Your existing Dutchie categories structure const DUTCHIE_CATEGORIES = [ { name: 'Shop', slug: 'shop', parentSlug: undefined }, { name: 'Flower', slug: 'flower', parentSlug: 'shop' }, { name: 'Pre-Rolls', slug: 'pre-rolls', parentSlug: 'shop' }, { name: 'Vaporizers', slug: 'vaporizers', parentSlug: 'shop' }, { name: 'Concentrates', slug: 'concentrates', parentSlug: 'shop' }, { name: 'Edibles', slug: 'edibles', parentSlug: 'shop' }, { name: 'Topicals', slug: 'topicals', parentSlug: 'shop' }, { name: 'Accessories', slug: 'accessories', parentSlug: 'shop' }, { name: 'Brands', slug: 'brands', parentSlug: undefined }, { name: 'Specials', slug: 'specials', parentSlug: undefined } ]; for (const category of DUTCHIE_CATEGORIES) { let categoryUrl: string; if (category.parentSlug) { // Subcategory: /embedded-menu/{slug}/shop/flower categoryUrl = `${baseUrl}/${category.parentSlug}/${category.slug}`; } else { // Top-level: /embedded-menu/{slug}/shop categoryUrl = `${baseUrl}/${category.slug}`; } const path = category.parentSlug ? `${category.parentSlug}/${category.slug}` : category.slug; if (!category.parentSlug) { // Create parent category await client.query(` INSERT INTO categories (store_id, name, slug, dutchie_url, path, scrape_enabled, parent_id) VALUES ($1, $2, $3, $4, $5, true, NULL) ON CONFLICT (store_id, slug) DO UPDATE SET name = $2, dutchie_url = $4, path = $5 RETURNING id `, [storeId, category.name, category.slug, categoryUrl, path]); logger.info('categories', `📁 ${category.name}`); } else { // Create subcategory const parentResult = await client.query(` SELECT id FROM categories WHERE store_id = $1 AND slug = $2 `, [storeId, category.parentSlug]); if (parentResult.rows.length > 0) { const parentId = parentResult.rows[0].id; await client.query(` INSERT INTO categories (store_id, name, slug, dutchie_url, path, scrape_enabled, parent_id) VALUES ($1, $2, $3, $4, $5, true, $6) ON CONFLICT (store_id, slug) DO UPDATE SET name = $2, dutchie_url = $4, path = $5, parent_id = $6 `, [storeId, category.name, category.slug, categoryUrl, path, parentId]); logger.info('categories', ` └── ${category.name}`); } } } await client.query('COMMIT'); logger.info('categories', `✅ Created ${DUTCHIE_CATEGORIES.length} Dutchie categories successfully`); } catch (error) { await client.query('ROLLBACK'); logger.error('categories', `Failed to create Dutchie categories: ${error}`); throw error; } finally { client.release(); } } /** * Create categories from discovered links (custom menus) */ private async createCustomCategories(storeId: number, store: any, links: NavigationLink[]): Promise { const client = await pool.connect(); try { await client.query('BEGIN'); // Filter to likely category links const categoryLinks = links.filter(link => link.isCategory); let displayOrder = 0; for (const link of categoryLinks) { // Generate slug from text const slug = link.text .toLowerCase() .replace(/[^a-z0-9]+/g, '-') .replace(/^-|-$/g, ''); // Determine path from URL const url = new URL(link.href); const path = url.pathname.replace(/^\//, ''); await client.query(` INSERT INTO categories (store_id, name, slug, dutchie_url, path, scrape_enabled, display_order) VALUES ($1, $2, $3, $4, $5, true, $6) ON CONFLICT (store_id, slug) DO UPDATE SET name = $2, dutchie_url = $4, path = $5, display_order = $6 `, [storeId, link.text, slug, link.href, path, displayOrder++]); logger.info('categories', `📁 ${link.text} -> ${link.href}`); } await client.query('COMMIT'); logger.info('categories', `✅ Created ${categoryLinks.length} custom categories`); } catch (error) { await client.query('ROLLBACK'); throw error; } finally { client.release(); } } /** * Update display_order column in categories table */ async ensureDisplayOrderColumn(): Promise { try { await pool.query(` ALTER TABLE categories ADD COLUMN IF NOT EXISTS display_order INTEGER DEFAULT 0 `); logger.info('categories', 'Ensured display_order column exists'); } catch (error) { logger.warn('categories', `Could not add display_order column: ${error}`); } } }