279 lines
12 KiB
JavaScript
279 lines
12 KiB
JavaScript
"use strict";
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.NavigationDiscovery = void 0;
|
|
const migrate_1 = require("../db/migrate");
|
|
const logger_1 = require("../services/logger");
|
|
/**
|
|
* Navigation Discovery - finds and builds category structure
|
|
*/
|
|
class NavigationDiscovery {
|
|
downloader;
|
|
constructor(downloader) {
|
|
this.downloader = downloader;
|
|
}
|
|
/**
|
|
* Discover categories from a store's main page
|
|
*/
|
|
async discoverCategories(storeId) {
|
|
logger_1.logger.info('categories', `Starting category discovery for store ${storeId}`);
|
|
try {
|
|
// Get store info
|
|
const storeResult = await migrate_1.pool.query(`
|
|
SELECT id, name, slug, dutchie_url
|
|
FROM stores
|
|
WHERE id = $1
|
|
`, [storeId]);
|
|
if (storeResult.rows.length === 0) {
|
|
throw new Error('Store not found');
|
|
}
|
|
const store = storeResult.rows[0];
|
|
const baseUrl = store.dutchie_url;
|
|
// Create request to fetch the main page
|
|
const request = {
|
|
url: baseUrl,
|
|
priority: 100,
|
|
retryCount: 0,
|
|
maxRetries: 3,
|
|
metadata: {
|
|
requiresBrowser: true,
|
|
requiresStealth: true
|
|
},
|
|
callback: async () => ({ items: [], requests: [] })
|
|
};
|
|
// Fetch the page
|
|
const response = await this.downloader.fetch(request);
|
|
// Extract navigation links
|
|
const page = await this.downloader.getCurrentPage();
|
|
if (!page) {
|
|
throw new Error('No active page for navigation extraction');
|
|
}
|
|
const links = await this.extractNavigationLinks(page, baseUrl);
|
|
logger_1.logger.info('categories', `Found ${links.length} navigation links`);
|
|
// Check if it's a Dutchie menu
|
|
const isDutchie = await this.isDutchieMenu(page);
|
|
if (isDutchie) {
|
|
logger_1.logger.info('categories', 'Detected Dutchie menu - using predefined structure');
|
|
await this.createDutchieCategories(storeId, store, links);
|
|
}
|
|
else {
|
|
logger_1.logger.info('categories', 'Custom menu detected - extracting from navigation');
|
|
await this.createCustomCategories(storeId, store, links);
|
|
}
|
|
logger_1.logger.info('categories', `✅ Category discovery completed for ${store.name}`);
|
|
}
|
|
catch (error) {
|
|
logger_1.logger.error('categories', `Category discovery failed: ${error}`);
|
|
throw error;
|
|
}
|
|
}
|
|
/**
|
|
* Extract navigation links from page
|
|
*/
|
|
async extractNavigationLinks(page, baseUrl) {
|
|
return await page.evaluate((base) => {
|
|
const links = [];
|
|
// Look for navigation elements
|
|
const navSelectors = [
|
|
'nav a',
|
|
'[role="navigation"] a',
|
|
'[class*="nav"] a',
|
|
'[class*="menu"] a',
|
|
'[class*="category"] a',
|
|
'header a'
|
|
];
|
|
const foundLinks = new Set();
|
|
for (const selector of navSelectors) {
|
|
// @ts-ignore - runs in browser context
|
|
const elements = document.querySelectorAll(selector);
|
|
elements.forEach((el) => {
|
|
const text = el.textContent?.trim();
|
|
let href = el.href || el.getAttribute('href');
|
|
if (!text || !href || text.length < 2)
|
|
return;
|
|
// Normalize href
|
|
if (href.startsWith('/')) {
|
|
// @ts-ignore - runs in browser context
|
|
const url = new URL(base);
|
|
href = `${url.origin}${href}`;
|
|
}
|
|
// Skip external links and anchors
|
|
if (!href.includes(base) || href.includes('#'))
|
|
return;
|
|
// Skip duplicates
|
|
const linkKey = `${text}:${href}`;
|
|
if (foundLinks.has(linkKey))
|
|
return;
|
|
foundLinks.add(linkKey);
|
|
// Determine if it's likely a category
|
|
const categoryKeywords = [
|
|
'flower', 'pre-roll', 'vape', 'edible', 'concentrate',
|
|
'topical', 'accessory', 'brand', 'special', 'shop',
|
|
'indica', 'sativa', 'hybrid', 'cbd', 'thc'
|
|
];
|
|
const isCategory = categoryKeywords.some(kw => text.toLowerCase().includes(kw) ||
|
|
href.toLowerCase().includes(kw));
|
|
links.push({
|
|
text,
|
|
href,
|
|
isCategory
|
|
});
|
|
});
|
|
}
|
|
return links;
|
|
}, baseUrl);
|
|
}
|
|
/**
|
|
* Check if it's a Dutchie menu
|
|
*/
|
|
async isDutchieMenu(page) {
|
|
return await page.evaluate(() => {
|
|
// Check for Dutchie markers
|
|
// @ts-ignore - runs in browser context
|
|
if (window.reactEnv) {
|
|
// @ts-ignore - runs in browser context
|
|
const env = window.reactEnv;
|
|
if (env.adminUrl?.includes('dutchie.com') ||
|
|
env.apiUrl?.includes('dutchie.com') ||
|
|
env.consumerUrl?.includes('dutchie.com')) {
|
|
return true;
|
|
}
|
|
}
|
|
// @ts-ignore - runs in browser context
|
|
const htmlContent = document.documentElement.innerHTML;
|
|
return (htmlContent.includes('admin.dutchie.com') ||
|
|
htmlContent.includes('api.dutchie.com') ||
|
|
htmlContent.includes('embedded-menu') ||
|
|
htmlContent.includes('window.reactEnv'));
|
|
});
|
|
}
|
|
/**
|
|
* Create categories for Dutchie menus (predefined structure)
|
|
* Uses your existing Dutchie category structure
|
|
*/
|
|
async createDutchieCategories(storeId, store, discoveredLinks) {
|
|
const client = await migrate_1.pool.connect();
|
|
try {
|
|
await client.query('BEGIN');
|
|
logger_1.logger.info('categories', `Creating predefined Dutchie category structure`);
|
|
const baseUrl = store.dutchie_url;
|
|
// Your existing Dutchie categories structure
|
|
const DUTCHIE_CATEGORIES = [
|
|
{ name: 'Shop', slug: 'shop', parentSlug: undefined },
|
|
{ name: 'Flower', slug: 'flower', parentSlug: 'shop' },
|
|
{ name: 'Pre-Rolls', slug: 'pre-rolls', parentSlug: 'shop' },
|
|
{ name: 'Vaporizers', slug: 'vaporizers', parentSlug: 'shop' },
|
|
{ name: 'Concentrates', slug: 'concentrates', parentSlug: 'shop' },
|
|
{ name: 'Edibles', slug: 'edibles', parentSlug: 'shop' },
|
|
{ name: 'Topicals', slug: 'topicals', parentSlug: 'shop' },
|
|
{ name: 'Accessories', slug: 'accessories', parentSlug: 'shop' },
|
|
{ name: 'Brands', slug: 'brands', parentSlug: undefined },
|
|
{ name: 'Specials', slug: 'specials', parentSlug: undefined }
|
|
];
|
|
for (const category of DUTCHIE_CATEGORIES) {
|
|
let categoryUrl;
|
|
if (category.parentSlug) {
|
|
// Subcategory: /embedded-menu/{slug}/shop/flower
|
|
categoryUrl = `${baseUrl}/${category.parentSlug}/${category.slug}`;
|
|
}
|
|
else {
|
|
// Top-level: /embedded-menu/{slug}/shop
|
|
categoryUrl = `${baseUrl}/${category.slug}`;
|
|
}
|
|
const path = category.parentSlug ? `${category.parentSlug}/${category.slug}` : category.slug;
|
|
if (!category.parentSlug) {
|
|
// Create parent category
|
|
await client.query(`
|
|
INSERT INTO categories (store_id, name, slug, dutchie_url, path, scrape_enabled, parent_id)
|
|
VALUES ($1, $2, $3, $4, $5, true, NULL)
|
|
ON CONFLICT (store_id, slug)
|
|
DO UPDATE SET name = $2, dutchie_url = $4, path = $5
|
|
RETURNING id
|
|
`, [storeId, category.name, category.slug, categoryUrl, path]);
|
|
logger_1.logger.info('categories', `📁 ${category.name}`);
|
|
}
|
|
else {
|
|
// Create subcategory
|
|
const parentResult = await client.query(`
|
|
SELECT id FROM categories
|
|
WHERE store_id = $1 AND slug = $2
|
|
`, [storeId, category.parentSlug]);
|
|
if (parentResult.rows.length > 0) {
|
|
const parentId = parentResult.rows[0].id;
|
|
await client.query(`
|
|
INSERT INTO categories (store_id, name, slug, dutchie_url, path, scrape_enabled, parent_id)
|
|
VALUES ($1, $2, $3, $4, $5, true, $6)
|
|
ON CONFLICT (store_id, slug)
|
|
DO UPDATE SET name = $2, dutchie_url = $4, path = $5, parent_id = $6
|
|
`, [storeId, category.name, category.slug, categoryUrl, path, parentId]);
|
|
logger_1.logger.info('categories', ` └── ${category.name}`);
|
|
}
|
|
}
|
|
}
|
|
await client.query('COMMIT');
|
|
logger_1.logger.info('categories', `✅ Created ${DUTCHIE_CATEGORIES.length} Dutchie categories successfully`);
|
|
}
|
|
catch (error) {
|
|
await client.query('ROLLBACK');
|
|
logger_1.logger.error('categories', `Failed to create Dutchie categories: ${error}`);
|
|
throw error;
|
|
}
|
|
finally {
|
|
client.release();
|
|
}
|
|
}
|
|
/**
|
|
* Create categories from discovered links (custom menus)
|
|
*/
|
|
async createCustomCategories(storeId, store, links) {
|
|
const client = await migrate_1.pool.connect();
|
|
try {
|
|
await client.query('BEGIN');
|
|
// Filter to likely category links
|
|
const categoryLinks = links.filter(link => link.isCategory);
|
|
let displayOrder = 0;
|
|
for (const link of categoryLinks) {
|
|
// Generate slug from text
|
|
const slug = link.text
|
|
.toLowerCase()
|
|
.replace(/[^a-z0-9]+/g, '-')
|
|
.replace(/^-|-$/g, '');
|
|
// Determine path from URL
|
|
const url = new URL(link.href);
|
|
const path = url.pathname.replace(/^\//, '');
|
|
await client.query(`
|
|
INSERT INTO categories (store_id, name, slug, dutchie_url, path, scrape_enabled, display_order)
|
|
VALUES ($1, $2, $3, $4, $5, true, $6)
|
|
ON CONFLICT (store_id, slug)
|
|
DO UPDATE SET name = $2, dutchie_url = $4, path = $5, display_order = $6
|
|
`, [storeId, link.text, slug, link.href, path, displayOrder++]);
|
|
logger_1.logger.info('categories', `📁 ${link.text} -> ${link.href}`);
|
|
}
|
|
await client.query('COMMIT');
|
|
logger_1.logger.info('categories', `✅ Created ${categoryLinks.length} custom categories`);
|
|
}
|
|
catch (error) {
|
|
await client.query('ROLLBACK');
|
|
throw error;
|
|
}
|
|
finally {
|
|
client.release();
|
|
}
|
|
}
|
|
/**
|
|
* Update display_order column in categories table
|
|
*/
|
|
async ensureDisplayOrderColumn() {
|
|
try {
|
|
await migrate_1.pool.query(`
|
|
ALTER TABLE categories
|
|
ADD COLUMN IF NOT EXISTS display_order INTEGER DEFAULT 0
|
|
`);
|
|
logger_1.logger.info('categories', 'Ensured display_order column exists');
|
|
}
|
|
catch (error) {
|
|
logger_1.logger.warn('categories', `Could not add display_order column: ${error}`);
|
|
}
|
|
}
|
|
}
|
|
exports.NavigationDiscovery = NavigationDiscovery;
|