"use strict"; /** * Crawler Sandbox API Routes * * Endpoints for managing sandbox crawls, templates, and provider detection */ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const express_1 = __importDefault(require("express")); const migrate_1 = require("../db/migrate"); const middleware_1 = require("../auth/middleware"); const logger_1 = require("../services/logger"); const crawler_jobs_1 = require("../services/crawler-jobs"); const router = express_1.default.Router(); // Apply auth middleware to all routes router.use(middleware_1.authMiddleware); // ======================================== // Sandbox Entries // ======================================== /** * GET /api/crawler-sandbox * List sandbox entries with optional filters */ router.get('/', async (req, res) => { try { const { status, dispensaryId, limit = 50, offset = 0 } = req.query; let query = ` SELECT cs.*, d.name as dispensary_name, d.website, d.menu_provider, d.crawler_status FROM crawler_sandboxes cs JOIN dispensaries d ON d.id = cs.dispensary_id WHERE 1=1 `; const params = []; let paramIndex = 1; if (status) { query += ` AND cs.status = $${paramIndex}`; params.push(status); paramIndex++; } if (dispensaryId) { query += ` AND cs.dispensary_id = $${paramIndex}`; params.push(Number(dispensaryId)); paramIndex++; } query += ` ORDER BY cs.created_at DESC LIMIT $${paramIndex} OFFSET $${paramIndex + 1}`; params.push(Number(limit), Number(offset)); const result = await migrate_1.pool.query(query, params); // Get total count const countResult = await migrate_1.pool.query(`SELECT COUNT(*) FROM crawler_sandboxes cs WHERE 1=1 ${status ? 'AND cs.status = $1' : ''} ${dispensaryId ? `AND cs.dispensary_id = $${status ? 2 : 1}` : ''}`, status && dispensaryId ? [status, dispensaryId] : status ? [status] : dispensaryId ? [dispensaryId] : []); res.json({ sandboxes: result.rows, total: parseInt(countResult.rows[0].count), limit: Number(limit), offset: Number(offset), }); } catch (error) { logger_1.logger.error('api', `Get sandboxes error: ${error.message}`); res.status(500).json({ error: error.message }); } }); /** * GET /api/crawler-sandbox/:id * Get a single sandbox entry with full details */ router.get('/:id', async (req, res) => { try { const { id } = req.params; const result = await migrate_1.pool.query(`SELECT cs.*, d.name as dispensary_name, d.website, d.menu_url, d.menu_provider, d.menu_provider_confidence, d.crawler_mode, d.crawler_status FROM crawler_sandboxes cs JOIN dispensaries d ON d.id = cs.dispensary_id WHERE cs.id = $1`, [id]); if (result.rows.length === 0) { return res.status(404).json({ error: 'Sandbox entry not found' }); } // Get related jobs const jobs = await migrate_1.pool.query(`SELECT * FROM sandbox_crawl_jobs WHERE sandbox_id = $1 OR dispensary_id = $2 ORDER BY created_at DESC LIMIT 10`, [id, result.rows[0].dispensary_id]); res.json({ sandbox: result.rows[0], jobs: jobs.rows, }); } catch (error) { logger_1.logger.error('api', `Get sandbox error: ${error.message}`); res.status(500).json({ error: error.message }); } }); /** * POST /api/crawler-sandbox/:id/analyze * Trigger re-analysis of a sandbox entry */ router.post('/:id/analyze', (0, middleware_1.requireRole)('admin'), async (req, res) => { try { const { id } = req.params; const sandbox = await migrate_1.pool.query('SELECT * FROM crawler_sandboxes WHERE id = $1', [id]); if (sandbox.rows.length === 0) { return res.status(404).json({ error: 'Sandbox entry not found' }); } // Queue a new sandbox job const job = await migrate_1.pool.query(`INSERT INTO sandbox_crawl_jobs (dispensary_id, sandbox_id, job_type, status, priority) VALUES ($1, $2, 'deep_crawl', 'pending', 20) RETURNING id`, [sandbox.rows[0].dispensary_id, id]); // Update sandbox status await migrate_1.pool.query(`UPDATE crawler_sandboxes SET status = 'pending', updated_at = NOW() WHERE id = $1`, [id]); res.json({ message: 'Analysis job queued', jobId: job.rows[0].id, }); } catch (error) { logger_1.logger.error('api', `Analyze sandbox error: ${error.message}`); res.status(500).json({ error: error.message }); } }); /** * POST /api/crawler-sandbox/:id/move-to-production * Move a sandbox entry to production (for Dutchie dispensaries) */ router.post('/:id/move-to-production', (0, middleware_1.requireRole)('admin'), async (req, res) => { try { const { id } = req.params; const sandbox = await migrate_1.pool.query(`SELECT cs.*, d.menu_provider FROM crawler_sandboxes cs JOIN dispensaries d ON d.id = cs.dispensary_id WHERE cs.id = $1`, [id]); if (sandbox.rows.length === 0) { return res.status(404).json({ error: 'Sandbox entry not found' }); } // Can only move to production if provider is dutchie if (sandbox.rows[0].menu_provider !== 'dutchie') { return res.status(400).json({ error: 'Only Dutchie dispensaries can be moved to production currently', }); } // Update dispensary to production mode await migrate_1.pool.query(`UPDATE dispensaries SET crawler_mode = 'production', crawler_status = 'idle', updated_at = NOW() WHERE id = $1`, [sandbox.rows[0].dispensary_id]); // Mark sandbox as moved await migrate_1.pool.query(`UPDATE crawler_sandboxes SET status = 'moved_to_production', updated_at = NOW() WHERE id = $1`, [id]); res.json({ message: 'Dispensary moved to production' }); } catch (error) { logger_1.logger.error('api', `Move to production error: ${error.message}`); res.status(500).json({ error: error.message }); } }); /** * PATCH /api/crawler-sandbox/:id * Update sandbox entry (e.g., add human review notes) */ router.patch('/:id', (0, middleware_1.requireRole)('admin'), async (req, res) => { try { const { id } = req.params; const { human_review_notes, status, suspected_menu_provider } = req.body; const updates = []; const params = []; let paramIndex = 1; if (human_review_notes !== undefined) { updates.push(`human_review_notes = $${paramIndex}`); params.push(human_review_notes); paramIndex++; } if (status) { updates.push(`status = $${paramIndex}`); params.push(status); paramIndex++; } if (suspected_menu_provider !== undefined) { updates.push(`suspected_menu_provider = $${paramIndex}`); params.push(suspected_menu_provider); paramIndex++; } if (updates.length === 0) { return res.status(400).json({ error: 'No updates provided' }); } updates.push('updated_at = NOW()'); if (human_review_notes !== undefined) { updates.push('reviewed_at = NOW()'); } params.push(id); await migrate_1.pool.query(`UPDATE crawler_sandboxes SET ${updates.join(', ')} WHERE id = $${paramIndex}`, params); res.json({ message: 'Sandbox updated' }); } catch (error) { logger_1.logger.error('api', `Update sandbox error: ${error.message}`); res.status(500).json({ error: error.message }); } }); // ======================================== // Templates // ======================================== /** * GET /api/crawler-sandbox/templates * List all crawler templates */ router.get('/templates/list', async (req, res) => { try { const result = await migrate_1.pool.query(`SELECT * FROM crawler_templates ORDER BY provider, is_default_for_provider DESC, name`); res.json({ templates: result.rows }); } catch (error) { logger_1.logger.error('api', `Get templates error: ${error.message}`); res.status(500).json({ error: error.message }); } }); /** * GET /api/crawler-sandbox/templates/:id * Get a single template */ router.get('/templates/:id', async (req, res) => { try { const { id } = req.params; const result = await migrate_1.pool.query('SELECT * FROM crawler_templates WHERE id = $1', [id]); if (result.rows.length === 0) { return res.status(404).json({ error: 'Template not found' }); } res.json({ template: result.rows[0] }); } catch (error) { logger_1.logger.error('api', `Get template error: ${error.message}`); res.status(500).json({ error: error.message }); } }); /** * POST /api/crawler-sandbox/templates * Create a new template */ router.post('/templates', (0, middleware_1.requireRole)('admin'), async (req, res) => { try { const { provider, name, selector_config, navigation_config, transform_config, validation_rules, notes, } = req.body; if (!provider || !name) { return res.status(400).json({ error: 'provider and name are required' }); } const result = await migrate_1.pool.query(`INSERT INTO crawler_templates (provider, name, selector_config, navigation_config, transform_config, validation_rules, notes, created_by) VALUES ($1, $2, $3, $4, $5, $6, $7, $8) RETURNING *`, [ provider, name, JSON.stringify(selector_config || {}), JSON.stringify(navigation_config || {}), JSON.stringify(transform_config || {}), JSON.stringify(validation_rules || {}), notes, req.user?.email || 'system', ]); res.status(201).json({ template: result.rows[0] }); } catch (error) { logger_1.logger.error('api', `Create template error: ${error.message}`); res.status(500).json({ error: error.message }); } }); /** * PUT /api/crawler-sandbox/templates/:id * Update a template */ router.put('/templates/:id', (0, middleware_1.requireRole)('admin'), async (req, res) => { try { const { id } = req.params; const { is_active, is_default_for_provider, selector_config, navigation_config, transform_config, validation_rules, notes, } = req.body; const updates = []; const params = []; let paramIndex = 1; if (is_active !== undefined) { updates.push(`is_active = $${paramIndex}`); params.push(is_active); paramIndex++; } if (is_default_for_provider !== undefined) { updates.push(`is_default_for_provider = $${paramIndex}`); params.push(is_default_for_provider); paramIndex++; } if (selector_config !== undefined) { updates.push(`selector_config = $${paramIndex}`); params.push(JSON.stringify(selector_config)); paramIndex++; } if (navigation_config !== undefined) { updates.push(`navigation_config = $${paramIndex}`); params.push(JSON.stringify(navigation_config)); paramIndex++; } if (transform_config !== undefined) { updates.push(`transform_config = $${paramIndex}`); params.push(JSON.stringify(transform_config)); paramIndex++; } if (validation_rules !== undefined) { updates.push(`validation_rules = $${paramIndex}`); params.push(JSON.stringify(validation_rules)); paramIndex++; } if (notes !== undefined) { updates.push(`notes = $${paramIndex}`); params.push(notes); paramIndex++; } if (updates.length === 0) { return res.status(400).json({ error: 'No updates provided' }); } updates.push('updated_at = NOW()'); params.push(id); await migrate_1.pool.query(`UPDATE crawler_templates SET ${updates.join(', ')} WHERE id = $${paramIndex}`, params); const result = await migrate_1.pool.query('SELECT * FROM crawler_templates WHERE id = $1', [id]); res.json({ template: result.rows[0] }); } catch (error) { logger_1.logger.error('api', `Update template error: ${error.message}`); res.status(500).json({ error: error.message }); } }); // ======================================== // Jobs // ======================================== /** * GET /api/crawler-sandbox/jobs * List sandbox crawl jobs */ router.get('/jobs/list', async (req, res) => { try { const { status, dispensaryId, limit = 50 } = req.query; let query = ` SELECT sj.*, d.name as dispensary_name FROM sandbox_crawl_jobs sj JOIN dispensaries d ON d.id = sj.dispensary_id WHERE 1=1 `; const params = []; let paramIndex = 1; if (status) { query += ` AND sj.status = $${paramIndex}`; params.push(status); paramIndex++; } if (dispensaryId) { query += ` AND sj.dispensary_id = $${paramIndex}`; params.push(Number(dispensaryId)); paramIndex++; } query += ` ORDER BY sj.created_at DESC LIMIT $${paramIndex}`; params.push(Number(limit)); const result = await migrate_1.pool.query(query, params); res.json({ jobs: result.rows }); } catch (error) { logger_1.logger.error('api', `Get jobs error: ${error.message}`); res.status(500).json({ error: error.message }); } }); /** * POST /api/crawler-sandbox/jobs/detect/:dispensaryId * Trigger provider detection for a dispensary */ router.post('/jobs/detect/:dispensaryId', (0, middleware_1.requireRole)('admin'), async (req, res) => { try { const { dispensaryId } = req.params; // Create detection job const job = await migrate_1.pool.query(`INSERT INTO sandbox_crawl_jobs (dispensary_id, job_type, status, priority) VALUES ($1, 'detection', 'pending', 30) RETURNING id`, [dispensaryId]); // Update dispensary status await migrate_1.pool.query(`UPDATE dispensaries SET crawler_status = 'queued_detection', updated_at = NOW() WHERE id = $1`, [dispensaryId]); res.json({ message: 'Detection job queued', jobId: job.rows[0].id, }); } catch (error) { logger_1.logger.error('api', `Queue detection error: ${error.message}`); res.status(500).json({ error: error.message }); } }); /** * POST /api/crawler-sandbox/jobs/run/:id * Immediately run a sandbox job */ router.post('/jobs/run/:id', (0, middleware_1.requireRole)('admin'), async (req, res) => { try { const { id } = req.params; const job = await migrate_1.pool.query('SELECT * FROM sandbox_crawl_jobs WHERE id = $1', [id]); if (job.rows.length === 0) { return res.status(404).json({ error: 'Job not found' }); } const jobData = job.rows[0]; // Run the job immediately let result; if (jobData.job_type === 'detection') { result = await (0, crawler_jobs_1.runDetectMenuProviderJob)(jobData.dispensary_id); } else { result = await (0, crawler_jobs_1.runSandboxCrawlJob)(jobData.dispensary_id, jobData.sandbox_id); } // Update job status await migrate_1.pool.query(`UPDATE sandbox_crawl_jobs SET status = $1, completed_at = NOW(), result_summary = $2, error_message = $3 WHERE id = $4`, [ result.success ? 'completed' : 'failed', JSON.stringify(result.data || {}), result.success ? null : result.message, id, ]); res.json(result); } catch (error) { logger_1.logger.error('api', `Run job error: ${error.message}`); res.status(500).json({ error: error.message }); } }); // ======================================== // Stats // ======================================== /** * GET /api/crawler-sandbox/stats * Get sandbox/crawler statistics */ router.get('/stats/overview', async (req, res) => { try { // Dispensary provider stats const providerStats = await migrate_1.pool.query(` SELECT menu_provider, COUNT(*) as count, AVG(menu_provider_confidence)::integer as avg_confidence FROM dispensaries WHERE menu_provider IS NOT NULL GROUP BY menu_provider ORDER BY count DESC `); // Mode stats const modeStats = await migrate_1.pool.query(` SELECT crawler_mode, COUNT(*) as count FROM dispensaries GROUP BY crawler_mode `); // Status stats const statusStats = await migrate_1.pool.query(` SELECT crawler_status, COUNT(*) as count FROM dispensaries GROUP BY crawler_status ORDER BY count DESC `); // Sandbox stats const sandboxStats = await migrate_1.pool.query(` SELECT status, COUNT(*) as count FROM crawler_sandboxes GROUP BY status `); // Job stats const jobStats = await migrate_1.pool.query(` SELECT status, job_type, COUNT(*) as count FROM sandbox_crawl_jobs GROUP BY status, job_type `); // Recent activity const recentActivity = await migrate_1.pool.query(` SELECT 'sandbox' as type, id, dispensary_id, status, created_at FROM crawler_sandboxes ORDER BY created_at DESC LIMIT 5 `); res.json({ providers: providerStats.rows, modes: modeStats.rows, statuses: statusStats.rows, sandbox: sandboxStats.rows, jobs: jobStats.rows, recentActivity: recentActivity.rows, }); } catch (error) { logger_1.logger.error('api', `Get stats error: ${error.message}`); res.status(500).json({ error: error.message }); } }); exports.default = router;