Add crawler scheduler, orchestrator, and multi-category intelligence

- Add scheduler UI with store schedules, job queue, and global settings
- Add store crawl orchestrator for intelligent crawl workflow
- Add multi-category intelligence detection (product, specials, brands, metadata)
- Add CrawlerLogger for structured JSON logging
- Add migrations for scheduler tables and dispensary linking
- Add dispensary → scheduler navigation link
- Support production/sandbox crawler modes per provider

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-11-30 09:29:15 -07:00
parent 8b4292fbb2
commit 3861a31a3b
25 changed files with 8874 additions and 13 deletions

View File

@@ -49,7 +49,10 @@ import scraperMonitorRoutes from './routes/scraper-monitor';
import apiTokensRoutes from './routes/api-tokens';
import apiPermissionsRoutes from './routes/api-permissions';
import parallelScrapeRoutes from './routes/parallel-scrape';
import scheduleRoutes from './routes/schedule';
import crawlerSandboxRoutes from './routes/crawler-sandbox';
import { trackApiUsage, checkRateLimit } from './middleware/apiTokenTracker';
import { startCrawlScheduler } from './services/crawl-scheduler';
import { validateWordPressPermissions } from './middleware/wordpressPermissions';
// Apply WordPress permissions validation first (sets req.apiToken)
@@ -75,6 +78,8 @@ app.use('/api/scraper-monitor', scraperMonitorRoutes);
app.use('/api/api-tokens', apiTokensRoutes);
app.use('/api/api-permissions', apiPermissionsRoutes);
app.use('/api/parallel-scrape', parallelScrapeRoutes);
app.use('/api/schedule', scheduleRoutes);
app.use('/api/crawler-sandbox', crawlerSandboxRoutes);
async function startServer() {
try {
@@ -86,6 +91,10 @@ async function startServer() {
// Clean up any orphaned proxy test jobs from previous server runs
await cleanupOrphanedJobs();
// Start the crawl scheduler (checks every minute for jobs to run)
startCrawlScheduler();
logger.info('system', 'Crawl scheduler started');
app.listen(PORT, () => {
logger.info('system', `Server running on port ${PORT}`);
console.log(`🚀 Server running on port ${PORT}`);