fix(monitor): remove non-existent worker columns from job_run_logs query

The job_run_logs table tracks scheduled job orchestration, not individual
worker jobs. Worker info (worker_id, worker_hostname) belongs on
dispensary_crawl_jobs, not job_run_logs.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-03 18:45:05 -07:00
parent 54f40d26bb
commit 66e07b2009
466 changed files with 84988 additions and 9226 deletions

View File

@@ -0,0 +1,612 @@
"use strict";
/**
* Menu Provider Detection Service
*
* Detects which menu platform a dispensary is using by analyzing:
* - HTML content patterns (scripts, iframes, classes)
* - URL patterns (embedded menu paths)
* - API endpoint signatures
* - Meta tags and headers
*/
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.detectMenuProvider = detectMenuProvider;
exports.quickDutchieCheck = quickDutchieCheck;
exports.detectProviderChange = detectProviderChange;
const puppeteer_1 = __importDefault(require("puppeteer"));
const logger_1 = require("./logger");
// Provider detection patterns
const PROVIDER_PATTERNS = {
dutchie: {
scripts: [
/dutchie/i,
/dutchie-plus/i,
/dutchie\.com/i,
/dutchie-embed/i,
],
iframes: [
/dutchie\.com/i,
/embed\.dutchie/i,
/iframe\.dutchie/i,
],
classes: [
/dutchie-/i,
/DutchieEmbed/i,
],
urls: [
/dutchie\.com/i,
/\.dutchie\./i,
],
meta: [
/dutchie/i,
],
apiEndpoints: [
/graphql.*dutchie/i,
/api\.dutchie/i,
],
htmlPatterns: [
/data-dutchie/i,
/__DUTCHIE__/i,
/dutchie-plus-iframe/i,
],
},
treez: {
scripts: [
/treez/i,
/treez\.io/i,
/treezpay/i,
],
iframes: [
/treez\.io/i,
/menu\.treez/i,
],
classes: [
/treez-/i,
],
urls: [
/treez\.io/i,
/\.treez\./i,
],
meta: [
/treez/i,
],
apiEndpoints: [
/api\.treez/i,
],
htmlPatterns: [
/data-treez/i,
/treez-embed/i,
],
},
jane: {
scripts: [
/jane\.co/i,
/iheartjane/i,
/jane-embed/i,
/janetechnologies/i,
],
iframes: [
/jane\.co/i,
/iheartjane\.com/i,
/menu\.jane/i,
],
classes: [
/jane-/i,
/iheartjane/i,
],
urls: [
/jane\.co/i,
/iheartjane\.com/i,
],
meta: [
/jane/i,
/iheartjane/i,
],
apiEndpoints: [
/api\.iheartjane/i,
/api\.jane\.co/i,
],
htmlPatterns: [
/data-jane/i,
/jane-root/i,
/jane-embed/i,
],
},
weedmaps: {
scripts: [
/weedmaps/i,
/wm\.com/i,
],
iframes: [
/weedmaps\.com/i,
/menu\.weedmaps/i,
],
classes: [
/weedmaps-/i,
/wm-/i,
],
urls: [
/weedmaps\.com/i,
],
meta: [
/weedmaps/i,
],
apiEndpoints: [
/api.*weedmaps/i,
],
htmlPatterns: [
/data-weedmaps/i,
],
},
leafly: {
scripts: [
/leafly/i,
/leafly\.com/i,
],
iframes: [
/leafly\.com/i,
/menu\.leafly/i,
],
classes: [
/leafly-/i,
],
urls: [
/leafly\.com/i,
],
meta: [
/leafly/i,
],
apiEndpoints: [
/api\.leafly/i,
],
htmlPatterns: [
/data-leafly/i,
],
},
meadow: {
scripts: [
/meadow/i,
/getmeadow/i,
],
iframes: [
/getmeadow\.com/i,
],
classes: [
/meadow-/i,
],
urls: [
/getmeadow\.com/i,
],
meta: [],
apiEndpoints: [
/api\.getmeadow/i,
],
htmlPatterns: [],
},
greenlight: {
scripts: [
/greenlight/i,
/greenlightmenu/i,
],
iframes: [
/greenlight/i,
],
classes: [
/greenlight-/i,
],
urls: [
/greenlight/i,
],
meta: [],
apiEndpoints: [],
htmlPatterns: [],
},
blaze: {
scripts: [
/blaze\.me/i,
/blazepos/i,
],
iframes: [
/blaze\.me/i,
],
classes: [
/blaze-/i,
],
urls: [
/blaze\.me/i,
],
meta: [],
apiEndpoints: [
/api\.blaze/i,
],
htmlPatterns: [],
},
flowhub: {
scripts: [
/flowhub/i,
],
iframes: [
/flowhub\.com/i,
],
classes: [
/flowhub-/i,
],
urls: [
/flowhub\.com/i,
],
meta: [],
apiEndpoints: [],
htmlPatterns: [],
},
dispense: {
scripts: [
/dispenseapp/i,
],
iframes: [
/dispenseapp\.com/i,
],
classes: [
/dispense-/i,
],
urls: [
/dispenseapp\.com/i,
],
meta: [],
apiEndpoints: [],
htmlPatterns: [],
},
cova: {
scripts: [
/covasoftware/i,
/cova\.software/i,
],
iframes: [
/cova/i,
],
classes: [
/cova-/i,
],
urls: [
/cova/i,
],
meta: [],
apiEndpoints: [],
htmlPatterns: [],
},
};
// Common menu URL paths to check
const MENU_PATHS = [
'/menu',
'/shop',
'/products',
'/order',
'/store',
'/dispensary-menu',
'/online-menu',
'/shop-all',
'/browse',
'/catalog',
];
/**
* Analyze a single page for provider signals
*/
async function analyzePageForProviders(page, url) {
const signals = [];
try {
// Get page HTML
const html = await page.content();
const lowerHtml = html.toLowerCase();
// Check each provider's patterns
for (const [provider, patterns] of Object.entries(PROVIDER_PATTERNS)) {
// Check script sources
const scripts = await page.$$eval('script[src]', els => els.map(el => el.getAttribute('src') || ''));
for (const script of scripts) {
for (const pattern of patterns.scripts) {
if (pattern.test(script)) {
signals.push({
provider: provider,
confidence: 90,
source: 'script_src',
details: script,
});
}
}
}
// Check inline scripts
const inlineScripts = await page.$$eval('script:not([src])', els => els.map(el => el.textContent || ''));
for (const scriptContent of inlineScripts) {
for (const pattern of patterns.scripts) {
if (pattern.test(scriptContent)) {
signals.push({
provider: provider,
confidence: 70,
source: 'inline_script',
details: `Pattern: ${pattern}`,
});
}
}
}
// Check iframes
const iframes = await page.$$eval('iframe', els => els.map(el => el.getAttribute('src') || ''));
for (const iframe of iframes) {
for (const pattern of patterns.iframes) {
if (pattern.test(iframe)) {
signals.push({
provider: provider,
confidence: 95,
source: 'iframe_src',
details: iframe,
});
}
}
}
// Check HTML patterns
for (const pattern of patterns.htmlPatterns) {
if (pattern.test(html)) {
signals.push({
provider: provider,
confidence: 85,
source: 'html_pattern',
details: `Pattern: ${pattern}`,
});
}
}
// Check CSS classes
for (const pattern of patterns.classes) {
if (pattern.test(html)) {
signals.push({
provider: provider,
confidence: 60,
source: 'css_class',
details: `Pattern: ${pattern}`,
});
}
}
// Check meta tags
const metaTags = await page.$$eval('meta', els => els.map(el => `${el.getAttribute('name')} ${el.getAttribute('content')}`));
for (const meta of metaTags) {
for (const pattern of patterns.meta) {
if (pattern.test(meta)) {
signals.push({
provider: provider,
confidence: 80,
source: 'meta_tag',
details: meta,
});
}
}
}
}
// Check for network requests (if we intercepted them)
// This would be enhanced with request interception
}
catch (error) {
logger_1.logger.error('provider-detection', `Error analyzing page ${url}: ${error}`);
}
return signals;
}
/**
* Aggregate signals into a final detection result
*/
function aggregateSignals(signals) {
if (signals.length === 0) {
return { provider: 'unknown', confidence: 0 };
}
// Group signals by provider
const providerScores = {};
for (const signal of signals) {
if (!providerScores[signal.provider]) {
providerScores[signal.provider] = [];
}
providerScores[signal.provider].push(signal.confidence);
}
// Calculate weighted score for each provider
const scores = [];
for (const [provider, confidences] of Object.entries(providerScores)) {
// Use max confidence + bonus for multiple signals
const maxConf = Math.max(...confidences);
const multiSignalBonus = Math.min(10, (confidences.length - 1) * 3);
const score = Math.min(100, maxConf + multiSignalBonus);
scores.push({ provider: provider, score });
}
// Sort by score descending
scores.sort((a, b) => b.score - a.score);
const best = scores[0];
// If there's a clear winner (20+ point lead), use it
if (scores.length === 1 || best.score - scores[1].score >= 20) {
return { provider: best.provider, confidence: best.score };
}
// Multiple contenders - reduce confidence
return { provider: best.provider, confidence: Math.max(50, best.score - 20) };
}
/**
* Detect the menu provider for a dispensary
*/
async function detectMenuProvider(websiteUrl, options = {}) {
const { checkMenuPaths = true, timeout = 30000 } = options;
const result = {
provider: 'unknown',
confidence: 0,
signals: [],
urlsTested: [],
menuEntryPoints: [],
rawSignals: {},
};
let browser = null;
try {
// Normalize URL
let baseUrl = websiteUrl.trim();
if (!baseUrl.startsWith('http')) {
baseUrl = `https://${baseUrl}`;
}
baseUrl = baseUrl.replace(/\/$/, ''); // Remove trailing slash
// Launch browser
browser = await puppeteer_1.default.launch({
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu',
],
});
const page = await browser.newPage();
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
// Track network requests for API detection
const apiRequests = [];
await page.setRequestInterception(true);
page.on('request', (request) => {
const url = request.url();
if (url.includes('api') || url.includes('graphql')) {
apiRequests.push(url);
}
request.continue();
});
// URLs to check
const urlsToCheck = [baseUrl];
if (checkMenuPaths) {
for (const path of MENU_PATHS) {
urlsToCheck.push(`${baseUrl}${path}`);
}
}
// Check each URL
for (const url of urlsToCheck) {
try {
result.urlsTested.push(url);
await page.goto(url, {
waitUntil: 'networkidle2',
timeout,
});
// Wait a bit for dynamic content
await new Promise(r => setTimeout(r, 2000));
// Analyze page
const pageSignals = await analyzePageForProviders(page, url);
result.signals.push(...pageSignals);
// Track if this URL has menu content
const hasMenuContent = await page.evaluate(() => {
const text = document.body.innerText.toLowerCase();
return (text.includes('add to cart') ||
text.includes('add to bag') ||
text.includes('product') ||
text.includes('indica') ||
text.includes('sativa') ||
text.includes('hybrid') ||
text.includes('thc') ||
text.includes('cbd'));
});
if (hasMenuContent && url !== baseUrl) {
result.menuEntryPoints.push(url);
}
}
catch (pageError) {
// 404s are fine, just skip
if (!pageError.message?.includes('404')) {
logger_1.logger.warn('provider-detection', `Could not load ${url}: ${pageError.message}`);
}
}
}
// Check API requests for provider hints
for (const apiUrl of apiRequests) {
for (const [provider, patterns] of Object.entries(PROVIDER_PATTERNS)) {
for (const pattern of patterns.apiEndpoints) {
if (pattern.test(apiUrl)) {
result.signals.push({
provider: provider,
confidence: 95,
source: 'api_request',
details: apiUrl,
});
}
}
}
}
// Record raw signals
result.rawSignals = {
apiRequestsFound: apiRequests.length,
menuEntryPointsFound: result.menuEntryPoints.length,
totalSignals: result.signals.length,
uniqueProviders: [...new Set(result.signals.map(s => s.provider))].length,
};
// Aggregate signals into final result
const aggregated = aggregateSignals(result.signals);
result.provider = aggregated.provider;
result.confidence = aggregated.confidence;
}
catch (error) {
result.error = error.message;
logger_1.logger.error('provider-detection', `Detection failed for ${websiteUrl}: ${error.message}`);
}
finally {
if (browser) {
await browser.close();
}
}
return result;
}
/**
* Quick check if a site has Dutchie - used during production crawls
*/
async function quickDutchieCheck(page) {
try {
const html = await page.content();
// Check for Dutchie-specific patterns
const dutchiePatterns = [
/dutchie/i,
/dutchie-plus/i,
/__DUTCHIE__/i,
/data-dutchie/i,
/embed\.dutchie/i,
];
for (const pattern of dutchiePatterns) {
if (pattern.test(html)) {
return true;
}
}
// Check iframes
const iframes = await page.$$eval('iframe', els => els.map(el => el.getAttribute('src') || ''));
for (const iframe of iframes) {
if (/dutchie/i.test(iframe)) {
return true;
}
}
return false;
}
catch {
return false;
}
}
/**
* Check if provider has changed from expected
*/
async function detectProviderChange(page, expectedProvider) {
try {
const signals = await analyzePageForProviders(page, page.url());
const aggregated = aggregateSignals(signals);
// If we expected Dutchie but found something else with high confidence
if (expectedProvider === 'dutchie' && aggregated.provider !== 'dutchie' && aggregated.confidence >= 70) {
return {
changed: true,
newProvider: aggregated.provider,
confidence: aggregated.confidence,
};
}
// If we expected Dutchie and found nothing/low confidence, might have switched
if (expectedProvider === 'dutchie' && aggregated.confidence < 30) {
// Check if Dutchie is definitely NOT present
const hasDutchie = await quickDutchieCheck(page);
if (!hasDutchie) {
return {
changed: true,
newProvider: aggregated.provider !== 'unknown' ? aggregated.provider : 'other',
confidence: Math.max(30, aggregated.confidence),
};
}
}
return { changed: false };
}
catch {
return { changed: false };
}
}