fix(monitor): remove non-existent worker columns from job_run_logs query
The job_run_logs table tracks scheduled job orchestration, not individual worker jobs. Worker info (worker_id, worker_hostname) belongs on dispensary_crawl_jobs, not job_run_logs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
114
backend/dist/services/category-discovery.js
vendored
114
backend/dist/services/category-discovery.js
vendored
@@ -4,9 +4,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
||||
};
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.discoverCategories = discoverCategories;
|
||||
const puppeteer_1 = __importDefault(require("puppeteer"));
|
||||
const puppeteer_extra_1 = __importDefault(require("puppeteer-extra"));
|
||||
const puppeteer_extra_plugin_stealth_1 = __importDefault(require("puppeteer-extra-plugin-stealth"));
|
||||
const migrate_1 = require("../db/migrate");
|
||||
const logger_1 = require("./logger");
|
||||
const age_gate_1 = require("../utils/age-gate");
|
||||
const dutchie_1 = require("../scrapers/templates/dutchie");
|
||||
// Apply stealth plugin
|
||||
puppeteer_extra_1.default.use((0, puppeteer_extra_plugin_stealth_1.default)());
|
||||
const DUTCHIE_CATEGORIES = [
|
||||
{ name: 'Shop', slug: 'shop' },
|
||||
{ name: 'Flower', slug: 'flower', parentSlug: 'shop' },
|
||||
@@ -19,6 +24,18 @@ const DUTCHIE_CATEGORIES = [
|
||||
{ name: 'Brands', slug: 'brands' },
|
||||
{ name: 'Specials', slug: 'specials' }
|
||||
];
|
||||
const CURALEAF_CATEGORIES = [
|
||||
{ name: 'Shop', slug: 'shop' },
|
||||
{ name: 'Flower', slug: 'flower', parentSlug: 'shop' },
|
||||
{ name: 'Pre-Rolls', slug: 'pre-rolls', parentSlug: 'shop' },
|
||||
{ name: 'Vaporizers', slug: 'vaporizers', parentSlug: 'shop' },
|
||||
{ name: 'Concentrates', slug: 'concentrates', parentSlug: 'shop' },
|
||||
{ name: 'Edibles', slug: 'edibles', parentSlug: 'shop' },
|
||||
{ name: 'Tinctures', slug: 'tinctures', parentSlug: 'shop' },
|
||||
{ name: 'Topicals', slug: 'topicals', parentSlug: 'shop' },
|
||||
{ name: 'Capsules', slug: 'capsules', parentSlug: 'shop' },
|
||||
{ name: 'Accessories', slug: 'accessories', parentSlug: 'shop' }
|
||||
];
|
||||
async function makePageStealthy(page) {
|
||||
await page.evaluateOnNewDocument(() => {
|
||||
Object.defineProperty(navigator, 'webdriver', { get: () => false });
|
||||
@@ -72,7 +89,7 @@ async function discoverCategories(storeId) {
|
||||
const store = storeResult.rows[0];
|
||||
const baseUrl = store.dutchie_url;
|
||||
// Launch browser to check page source
|
||||
browser = await puppeteer_1.default.launch({
|
||||
browser = await puppeteer_extra_1.default.launch({
|
||||
headless: 'new',
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
@@ -85,9 +102,14 @@ async function discoverCategories(storeId) {
|
||||
await makePageStealthy(page);
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
|
||||
// Set age gate bypass cookies BEFORE navigation (standard for all cannabis sites)
|
||||
const state = (0, age_gate_1.detectStateFromUrl)(baseUrl);
|
||||
await (0, age_gate_1.setAgeGateCookies)(page, baseUrl, state);
|
||||
logger_1.logger.info('categories', `Loading page to detect menu type: ${baseUrl}`);
|
||||
await page.goto(baseUrl, { waitUntil: 'domcontentloaded', timeout: 60000 });
|
||||
await page.waitForTimeout(3000);
|
||||
// If age gate still appears, try to bypass it
|
||||
await (0, age_gate_1.bypassAgeGate)(page, state);
|
||||
// Detect if it's a Dutchie menu by inspecting page source
|
||||
const isDutchie = await isDutchieMenu(page);
|
||||
await browser.close();
|
||||
@@ -97,8 +119,9 @@ async function discoverCategories(storeId) {
|
||||
await createDutchieCategories(storeId, store);
|
||||
}
|
||||
else {
|
||||
logger_1.logger.info('categories', `⚠️ Non-Dutchie menu detected, would need custom scraping logic`);
|
||||
throw new Error('Non-Dutchie menus not yet supported. Please contact support.');
|
||||
// Fallback: Use standard cannabis categories for non-Dutchie sites
|
||||
logger_1.logger.info('categories', `Non-Dutchie menu detected, using standard cannabis categories for ${store.name}`);
|
||||
await createCuraleafCategories(storeId, store);
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
@@ -116,24 +139,24 @@ async function createDutchieCategories(storeId, store) {
|
||||
const baseUrl = store.dutchie_url;
|
||||
for (const category of DUTCHIE_CATEGORIES) {
|
||||
let categoryUrl;
|
||||
// Use Dutchie template to build correct category URLs
|
||||
if (category.parentSlug) {
|
||||
// Subcategory: /embedded-menu/{slug}/shop/flower
|
||||
categoryUrl = `${baseUrl}/${category.parentSlug}/${category.slug}`;
|
||||
// Subcategory: Use template's buildCategoryUrl (e.g., /products/flower)
|
||||
categoryUrl = dutchie_1.dutchieTemplate.buildCategoryUrl(baseUrl, category.name);
|
||||
}
|
||||
else {
|
||||
// Top-level: /embedded-menu/{slug}/shop
|
||||
// Top-level: Use base URL with slug
|
||||
categoryUrl = `${baseUrl}/${category.slug}`;
|
||||
}
|
||||
const path = category.parentSlug ? `${category.parentSlug}/${category.slug}` : category.slug;
|
||||
if (!category.parentSlug) {
|
||||
// Create parent category
|
||||
await client.query(`
|
||||
INSERT INTO categories (store_id, name, slug, dutchie_url, path, scrape_enabled, parent_id)
|
||||
VALUES ($1, $2, $3, $4, $5, true, NULL)
|
||||
ON CONFLICT (store_id, slug)
|
||||
DO UPDATE SET name = $2, dutchie_url = $4, path = $5
|
||||
INSERT INTO categories (store_id, name, slug, dutchie_url, scrape_enabled)
|
||||
VALUES ($1, $2, $3, $4, true)
|
||||
ON CONFLICT (store_id, slug)
|
||||
DO UPDATE SET name = $2, dutchie_url = $4
|
||||
RETURNING id
|
||||
`, [storeId, category.name, category.slug, categoryUrl, path]);
|
||||
`, [storeId, category.name, category.slug, categoryUrl]);
|
||||
logger_1.logger.info('categories', `📁 ${category.name}`);
|
||||
}
|
||||
else {
|
||||
@@ -143,13 +166,12 @@ async function createDutchieCategories(storeId, store) {
|
||||
WHERE store_id = $1 AND slug = $2
|
||||
`, [storeId, category.parentSlug]);
|
||||
if (parentResult.rows.length > 0) {
|
||||
const parentId = parentResult.rows[0].id;
|
||||
await client.query(`
|
||||
INSERT INTO categories (store_id, name, slug, dutchie_url, path, scrape_enabled, parent_id)
|
||||
VALUES ($1, $2, $3, $4, $5, true, $6)
|
||||
INSERT INTO categories (store_id, name, slug, dutchie_url, scrape_enabled)
|
||||
VALUES ($1, $2, $3, $4, true)
|
||||
ON CONFLICT (store_id, slug)
|
||||
DO UPDATE SET name = $2, dutchie_url = $4, path = $5, parent_id = $6
|
||||
`, [storeId, category.name, category.slug, categoryUrl, path, parentId]);
|
||||
DO UPDATE SET name = $2, dutchie_url = $4
|
||||
`, [storeId, category.name, category.slug, categoryUrl]);
|
||||
logger_1.logger.info('categories', ` └── ${category.name}`);
|
||||
}
|
||||
}
|
||||
@@ -166,3 +188,59 @@ async function createDutchieCategories(storeId, store) {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
async function createCuraleafCategories(storeId, store) {
|
||||
const client = await migrate_1.pool.connect();
|
||||
try {
|
||||
await client.query('BEGIN');
|
||||
logger_1.logger.info('categories', `Creating predefined Curaleaf category structure`);
|
||||
const baseUrl = store.dutchie_url;
|
||||
for (const category of CURALEAF_CATEGORIES) {
|
||||
let categoryUrl;
|
||||
if (category.parentSlug) {
|
||||
// Subcategory URL - Curaleaf uses pattern like: /stores/{store-slug}/{category}
|
||||
categoryUrl = `${baseUrl}?category=${category.slug}`;
|
||||
}
|
||||
else {
|
||||
// Top-level category
|
||||
categoryUrl = baseUrl;
|
||||
}
|
||||
if (!category.parentSlug) {
|
||||
// Create parent category
|
||||
await client.query(`
|
||||
INSERT INTO categories (store_id, name, slug, dutchie_url, scrape_enabled)
|
||||
VALUES ($1, $2, $3, $4, true)
|
||||
ON CONFLICT (store_id, slug)
|
||||
DO UPDATE SET name = $2, dutchie_url = $4
|
||||
RETURNING id
|
||||
`, [storeId, category.name, category.slug, categoryUrl]);
|
||||
logger_1.logger.info('categories', `📁 ${category.name}`);
|
||||
}
|
||||
else {
|
||||
// Create subcategory
|
||||
const parentResult = await client.query(`
|
||||
SELECT id FROM categories
|
||||
WHERE store_id = $1 AND slug = $2
|
||||
`, [storeId, category.parentSlug]);
|
||||
if (parentResult.rows.length > 0) {
|
||||
await client.query(`
|
||||
INSERT INTO categories (store_id, name, slug, dutchie_url, scrape_enabled)
|
||||
VALUES ($1, $2, $3, $4, true)
|
||||
ON CONFLICT (store_id, slug)
|
||||
DO UPDATE SET name = $2, dutchie_url = $4
|
||||
`, [storeId, category.name, category.slug, categoryUrl]);
|
||||
logger_1.logger.info('categories', ` └── ${category.name}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
await client.query('COMMIT');
|
||||
logger_1.logger.info('categories', `✅ Created ${CURALEAF_CATEGORIES.length} Curaleaf categories successfully`);
|
||||
}
|
||||
catch (error) {
|
||||
await client.query('ROLLBACK');
|
||||
logger_1.logger.error('categories', `Failed to create Curaleaf categories: ${error}`);
|
||||
throw error;
|
||||
}
|
||||
finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user