The job_run_logs table tracks scheduled job orchestration, not individual worker jobs. Worker info (worker_id, worker_hostname) belongs on dispensary_crawl_jobs, not job_run_logs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
413 lines
16 KiB
JavaScript
413 lines
16 KiB
JavaScript
"use strict";
|
|
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
if (k2 === undefined) k2 = k;
|
|
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
}
|
|
Object.defineProperty(o, k2, desc);
|
|
}) : (function(o, m, k, k2) {
|
|
if (k2 === undefined) k2 = k;
|
|
o[k2] = m[k];
|
|
}));
|
|
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
}) : function(o, v) {
|
|
o["default"] = v;
|
|
});
|
|
var __importStar = (this && this.__importStar) || (function () {
|
|
var ownKeys = function(o) {
|
|
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
var ar = [];
|
|
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
return ar;
|
|
};
|
|
return ownKeys(o);
|
|
};
|
|
return function (mod) {
|
|
if (mod && mod.__esModule) return mod;
|
|
var result = {};
|
|
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
__setModuleDefault(result, mod);
|
|
return result;
|
|
};
|
|
})();
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
const express_1 = require("express");
|
|
const middleware_1 = require("../auth/middleware");
|
|
const migrate_1 = require("../db/migrate");
|
|
const scraper_v2_1 = require("../scraper-v2");
|
|
const router = (0, express_1.Router)();
|
|
router.use(middleware_1.authMiddleware);
|
|
// Get all stores
|
|
router.get('/', async (req, res) => {
|
|
try {
|
|
const result = await migrate_1.pool.query(`
|
|
SELECT
|
|
s.*,
|
|
COUNT(DISTINCT p.id) as product_count,
|
|
COUNT(DISTINCT c.id) as category_count
|
|
FROM stores s
|
|
LEFT JOIN products p ON s.id = p.store_id
|
|
LEFT JOIN categories c ON s.id = c.store_id
|
|
GROUP BY s.id
|
|
ORDER BY s.name
|
|
`);
|
|
res.json({ stores: result.rows });
|
|
}
|
|
catch (error) {
|
|
console.error('Error fetching stores:', error);
|
|
res.status(500).json({ error: 'Failed to fetch stores' });
|
|
}
|
|
});
|
|
// Freshness threshold in hours
|
|
const STALE_THRESHOLD_HOURS = 4;
|
|
function calculateFreshness(lastScrapedAt) {
|
|
if (!lastScrapedAt) {
|
|
return {
|
|
last_scraped_at: null,
|
|
is_stale: true,
|
|
freshness: 'Never scraped',
|
|
hours_since_scrape: null
|
|
};
|
|
}
|
|
const now = new Date();
|
|
const diffMs = now.getTime() - lastScrapedAt.getTime();
|
|
const diffHours = diffMs / (1000 * 60 * 60);
|
|
const isStale = diffHours > STALE_THRESHOLD_HOURS;
|
|
let freshnessText;
|
|
if (diffHours < 1) {
|
|
const mins = Math.round(diffHours * 60);
|
|
freshnessText = `${mins} minute${mins !== 1 ? 's' : ''} ago`;
|
|
}
|
|
else if (diffHours < 24) {
|
|
const hrs = Math.round(diffHours);
|
|
freshnessText = `${hrs} hour${hrs !== 1 ? 's' : ''} ago`;
|
|
}
|
|
else {
|
|
const days = Math.round(diffHours / 24);
|
|
freshnessText = `${days} day${days !== 1 ? 's' : ''} ago`;
|
|
}
|
|
return {
|
|
last_scraped_at: lastScrapedAt.toISOString(),
|
|
is_stale: isStale,
|
|
freshness: freshnessText,
|
|
hours_since_scrape: Math.round(diffHours * 10) / 10
|
|
};
|
|
}
|
|
function detectProvider(dutchieUrl) {
|
|
if (!dutchieUrl)
|
|
return 'unknown';
|
|
if (dutchieUrl.includes('dutchie.com'))
|
|
return 'Dutchie';
|
|
if (dutchieUrl.includes('iheartjane.com') || dutchieUrl.includes('jane.co'))
|
|
return 'Jane';
|
|
if (dutchieUrl.includes('treez.io'))
|
|
return 'Treez';
|
|
if (dutchieUrl.includes('weedmaps.com'))
|
|
return 'Weedmaps';
|
|
if (dutchieUrl.includes('leafly.com'))
|
|
return 'Leafly';
|
|
return 'Custom';
|
|
}
|
|
// Get single store with full details
|
|
router.get('/:id', async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
// Get store with counts and linked dispensary
|
|
const result = await migrate_1.pool.query(`
|
|
SELECT
|
|
s.*,
|
|
d.id as dispensary_id,
|
|
d.name as dispensary_name,
|
|
d.slug as dispensary_slug,
|
|
d.state as dispensary_state,
|
|
d.city as dispensary_city,
|
|
d.address as dispensary_address,
|
|
d.menu_provider as dispensary_menu_provider,
|
|
COUNT(DISTINCT p.id) as product_count,
|
|
COUNT(DISTINCT c.id) as category_count,
|
|
COUNT(DISTINCT p.id) FILTER (WHERE p.in_stock = true) as in_stock_count,
|
|
COUNT(DISTINCT p.id) FILTER (WHERE p.in_stock = false) as out_of_stock_count
|
|
FROM stores s
|
|
LEFT JOIN dispensaries d ON s.dispensary_id = d.id
|
|
LEFT JOIN products p ON s.id = p.store_id
|
|
LEFT JOIN categories c ON s.id = c.store_id
|
|
WHERE s.id = $1
|
|
GROUP BY s.id, d.id, d.name, d.slug, d.state, d.city, d.address, d.menu_provider
|
|
`, [id]);
|
|
if (result.rows.length === 0) {
|
|
return res.status(404).json({ error: 'Store not found' });
|
|
}
|
|
const store = result.rows[0];
|
|
// Get recent crawl jobs for this store
|
|
const jobsResult = await migrate_1.pool.query(`
|
|
SELECT
|
|
id, status, job_type, trigger_type,
|
|
started_at, completed_at,
|
|
products_found, products_new, products_updated,
|
|
in_stock_count, out_of_stock_count,
|
|
error_message
|
|
FROM crawl_jobs
|
|
WHERE store_id = $1
|
|
ORDER BY created_at DESC
|
|
LIMIT 10
|
|
`, [id]);
|
|
// Get schedule info if exists
|
|
const scheduleResult = await migrate_1.pool.query(`
|
|
SELECT
|
|
enabled, interval_hours, next_run_at, last_run_at
|
|
FROM store_crawl_schedule
|
|
WHERE store_id = $1
|
|
`, [id]);
|
|
// Calculate freshness
|
|
const freshness = calculateFreshness(store.last_scraped_at);
|
|
// Detect provider from URL
|
|
const provider = detectProvider(store.dutchie_url);
|
|
// Build response
|
|
const response = {
|
|
...store,
|
|
provider,
|
|
freshness: freshness.freshness,
|
|
is_stale: freshness.is_stale,
|
|
hours_since_scrape: freshness.hours_since_scrape,
|
|
linked_dispensary: store.dispensary_id ? {
|
|
id: store.dispensary_id,
|
|
name: store.dispensary_name,
|
|
slug: store.dispensary_slug,
|
|
state: store.dispensary_state,
|
|
city: store.dispensary_city,
|
|
address: store.dispensary_address,
|
|
menu_provider: store.dispensary_menu_provider
|
|
} : null,
|
|
schedule: scheduleResult.rows[0] || null,
|
|
recent_jobs: jobsResult.rows
|
|
};
|
|
// Remove redundant dispensary fields from root
|
|
delete response.dispensary_name;
|
|
delete response.dispensary_slug;
|
|
delete response.dispensary_state;
|
|
delete response.dispensary_city;
|
|
delete response.dispensary_address;
|
|
delete response.dispensary_menu_provider;
|
|
res.json(response);
|
|
}
|
|
catch (error) {
|
|
console.error('Error fetching store:', error);
|
|
res.status(500).json({ error: 'Failed to fetch store' });
|
|
}
|
|
});
|
|
// Get store brands
|
|
router.get('/:id/brands', async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
const result = await migrate_1.pool.query(`
|
|
SELECT name
|
|
FROM brands
|
|
WHERE store_id = $1
|
|
ORDER BY name
|
|
`, [id]);
|
|
const brands = result.rows.map((row) => row.name);
|
|
res.json({ brands });
|
|
}
|
|
catch (error) {
|
|
console.error('Error fetching store brands:', error);
|
|
res.status(500).json({ error: 'Failed to fetch store brands' });
|
|
}
|
|
});
|
|
// Get store specials
|
|
router.get('/:id/specials', async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
const { date } = req.query;
|
|
// Use provided date or today's date
|
|
const queryDate = date || new Date().toISOString().split('T')[0];
|
|
const result = await migrate_1.pool.query(`
|
|
SELECT
|
|
s.*,
|
|
p.name as product_name,
|
|
p.image_url as product_image
|
|
FROM specials s
|
|
LEFT JOIN products p ON s.product_id = p.id
|
|
WHERE s.store_id = $1 AND s.valid_date = $2
|
|
ORDER BY s.name
|
|
`, [id, queryDate]);
|
|
res.json({ specials: result.rows, date: queryDate });
|
|
}
|
|
catch (error) {
|
|
console.error('Error fetching store specials:', error);
|
|
res.status(500).json({ error: 'Failed to fetch store specials' });
|
|
}
|
|
});
|
|
// Create store
|
|
router.post('/', (0, middleware_1.requireRole)('superadmin', 'admin'), async (req, res) => {
|
|
try {
|
|
const { name, slug, dutchie_url, active, scrape_enabled } = req.body;
|
|
const result = await migrate_1.pool.query(`
|
|
INSERT INTO stores (name, slug, dutchie_url, active, scrape_enabled)
|
|
VALUES ($1, $2, $3, $4, $5)
|
|
RETURNING *
|
|
`, [name, slug, dutchie_url, active ?? true, scrape_enabled ?? true]);
|
|
res.status(201).json(result.rows[0]);
|
|
}
|
|
catch (error) {
|
|
console.error('Error creating store:', error);
|
|
res.status(500).json({ error: 'Failed to create store' });
|
|
}
|
|
});
|
|
// Update store
|
|
router.put('/:id', (0, middleware_1.requireRole)('superadmin', 'admin'), async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
const { name, slug, dutchie_url, active, scrape_enabled } = req.body;
|
|
const result = await migrate_1.pool.query(`
|
|
UPDATE stores
|
|
SET name = COALESCE($1, name),
|
|
slug = COALESCE($2, slug),
|
|
dutchie_url = COALESCE($3, dutchie_url),
|
|
active = COALESCE($4, active),
|
|
scrape_enabled = COALESCE($5, scrape_enabled),
|
|
updated_at = CURRENT_TIMESTAMP
|
|
WHERE id = $6
|
|
RETURNING *
|
|
`, [name, slug, dutchie_url, active, scrape_enabled, id]);
|
|
if (result.rows.length === 0) {
|
|
return res.status(404).json({ error: 'Store not found' });
|
|
}
|
|
res.json(result.rows[0]);
|
|
}
|
|
catch (error) {
|
|
console.error('Error updating store:', error);
|
|
res.status(500).json({ error: 'Failed to update store' });
|
|
}
|
|
});
|
|
// Delete store
|
|
router.delete('/:id', (0, middleware_1.requireRole)('superadmin'), async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
const result = await migrate_1.pool.query('DELETE FROM stores WHERE id = $1 RETURNING *', [id]);
|
|
if (result.rows.length === 0) {
|
|
return res.status(404).json({ error: 'Store not found' });
|
|
}
|
|
res.json({ message: 'Store deleted successfully' });
|
|
}
|
|
catch (error) {
|
|
console.error('Error deleting store:', error);
|
|
res.status(500).json({ error: 'Failed to delete store' });
|
|
}
|
|
});
|
|
// Trigger scrape for a store
|
|
router.post('/:id/scrape', (0, middleware_1.requireRole)('superadmin', 'admin'), async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
const { parallel = 3, userAgent } = req.body; // Default to 3 parallel scrapers
|
|
const storeResult = await migrate_1.pool.query('SELECT id FROM stores WHERE id = $1', [id]);
|
|
if (storeResult.rows.length === 0) {
|
|
return res.status(404).json({ error: 'Store not found' });
|
|
}
|
|
(0, scraper_v2_1.scrapeStore)(parseInt(id), parseInt(parallel), userAgent).catch(err => {
|
|
console.error('Background scrape error:', err);
|
|
});
|
|
res.json({
|
|
message: 'Scrape started',
|
|
parallel: parseInt(parallel),
|
|
userAgent: userAgent || 'random'
|
|
});
|
|
}
|
|
catch (error) {
|
|
console.error('Error triggering scrape:', error);
|
|
res.status(500).json({ error: 'Failed to trigger scrape' });
|
|
}
|
|
});
|
|
// Download missing images for a store
|
|
router.post('/:id/download-images', (0, middleware_1.requireRole)('superadmin', 'admin'), async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
const storeResult = await migrate_1.pool.query('SELECT id, name FROM stores WHERE id = $1', [id]);
|
|
if (storeResult.rows.length === 0) {
|
|
return res.status(404).json({ error: 'Store not found' });
|
|
}
|
|
const store = storeResult.rows[0];
|
|
const productsResult = await migrate_1.pool.query(`
|
|
SELECT id, name, image_url
|
|
FROM products
|
|
WHERE store_id = $1
|
|
AND image_url IS NOT NULL
|
|
AND local_image_path IS NULL
|
|
`, [id]);
|
|
(async () => {
|
|
const { uploadImageFromUrl } = await Promise.resolve().then(() => __importStar(require('../utils/minio')));
|
|
let downloaded = 0;
|
|
for (const product of productsResult.rows) {
|
|
try {
|
|
console.log(`📸 Downloading image for: ${product.name}`);
|
|
const localPath = await uploadImageFromUrl(product.image_url, product.id);
|
|
await migrate_1.pool.query(`
|
|
UPDATE products
|
|
SET local_image_path = $1
|
|
WHERE id = $2
|
|
`, [localPath, product.id]);
|
|
downloaded++;
|
|
}
|
|
catch (error) {
|
|
console.error(`Failed to download image for ${product.name}:`, error);
|
|
}
|
|
}
|
|
console.log(`✅ Downloaded ${downloaded} of ${productsResult.rows.length} missing images for ${store.name}`);
|
|
})().catch(err => console.error('Background image download error:', err));
|
|
res.json({
|
|
message: 'Image download started',
|
|
total_missing: productsResult.rows.length
|
|
});
|
|
}
|
|
catch (error) {
|
|
console.error('Error triggering image download:', error);
|
|
res.status(500).json({ error: 'Failed to trigger image download' });
|
|
}
|
|
});
|
|
// Discover categories for a store
|
|
router.post('/:id/discover-categories', (0, middleware_1.requireRole)('superadmin', 'admin'), async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
const storeResult = await migrate_1.pool.query('SELECT id FROM stores WHERE id = $1', [id]);
|
|
if (storeResult.rows.length === 0) {
|
|
return res.status(404).json({ error: 'Store not found' });
|
|
}
|
|
(0, scraper_v2_1.discoverCategories)(parseInt(id)).catch(err => {
|
|
console.error('Background category discovery error:', err);
|
|
});
|
|
res.json({ message: 'Category discovery started' });
|
|
}
|
|
catch (error) {
|
|
console.error('Error triggering category discovery:', error);
|
|
res.status(500).json({ error: 'Failed to trigger category discovery' });
|
|
}
|
|
});
|
|
// Debug scraper
|
|
router.post('/:id/debug-scrape', (0, middleware_1.requireRole)('superadmin', 'admin'), async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
console.log('Debug scrape triggered for store:', id);
|
|
const categoryResult = await migrate_1.pool.query(`
|
|
SELECT c.dutchie_url, c.name
|
|
FROM categories c
|
|
WHERE c.store_id = $1 AND c.slug = 'edibles'
|
|
LIMIT 1
|
|
`, [id]);
|
|
if (categoryResult.rows.length === 0) {
|
|
return res.status(404).json({ error: 'Edibles category not found' });
|
|
}
|
|
console.log('Found category:', categoryResult.rows[0]);
|
|
const { debugDutchiePage } = await Promise.resolve().then(() => __importStar(require('../services/scraper-debug')));
|
|
debugDutchiePage(categoryResult.rows[0].dutchie_url).catch(err => {
|
|
console.error('Debug error:', err);
|
|
});
|
|
res.json({ message: 'Debug started, check logs', url: categoryResult.rows[0].dutchie_url });
|
|
}
|
|
catch (error) {
|
|
console.error('Debug endpoint error:', error);
|
|
res.status(500).json({ error: 'Failed to debug' });
|
|
}
|
|
});
|
|
exports.default = router;
|