feat: AZ dispensary harmonization with Dutchie source of truth

Major changes:
- Add harmonize-az-dispensaries.ts script to sync dispensaries with Dutchie API
- Add migration 057 for crawl_enabled and dutchie_verified fields
- Remove legacy dutchie-az module (replaced by platforms/dutchie)
- Clean up deprecated crawlers, scrapers, and orchestrator code
- Update location-discovery to not fallback to slug when ID is missing
- Add crawl-rotator service for proxy rotation
- Add types/index.ts for shared type definitions
- Add woodpecker-agent k8s manifest

Harmonization script:
- Queries ConsumerDispensaries API for all 32 AZ cities
- Matches dispensaries by platform_dispensary_id (not slug)
- Updates existing records with full Dutchie data
- Creates new records for unmatched Dutchie dispensaries
- Disables dispensaries not found in Dutchie

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-08 10:19:49 -07:00
parent 948a732dd5
commit b7cfec0770
112 changed files with 3163 additions and 34694 deletions

View File

@@ -6,6 +6,7 @@ import { initializeMinio, isMinioEnabled } from './utils/minio';
import { initializeImageStorage } from './utils/image-storage';
import { logger } from './services/logger';
import { cleanupOrphanedJobs } from './services/proxyTestQueue';
import healthRoutes from './routes/health';
dotenv.config();
@@ -58,22 +59,15 @@ import scraperMonitorRoutes from './routes/scraper-monitor';
import apiTokensRoutes from './routes/api-tokens';
import apiPermissionsRoutes from './routes/api-permissions';
import parallelScrapeRoutes from './routes/parallel-scrape';
import scheduleRoutes from './routes/schedule';
import crawlerSandboxRoutes from './routes/crawler-sandbox';
import versionRoutes from './routes/version';
import publicApiRoutes from './routes/public-api';
import usersRoutes from './routes/users';
import staleProcessesRoutes from './routes/stale-processes';
import orchestratorAdminRoutes from './routes/orchestrator-admin';
import adminRoutes from './routes/admin';
import healthRoutes from './routes/health';
import workersRoutes from './routes/workers';
import { dutchieAZRouter, startScheduler as startDutchieAZScheduler, initializeDefaultSchedules } from './dutchie-az';
import { getPool } from './dutchie-az/db/connection';
import { createAnalyticsRouter } from './dutchie-az/routes/analytics';
import { createMultiStateRoutes } from './multi-state';
import { trackApiUsage, checkRateLimit } from './middleware/apiTokenTracker';
import { startCrawlScheduler } from './services/crawl-scheduler';
import { validateWordPressPermissions } from './middleware/wordpressPermissions';
import { markTrustedDomains } from './middleware/trustedDomains';
import { createSystemRouter, createPrometheusRouter } from './system/routes';
@@ -81,7 +75,7 @@ import { createPortalRoutes } from './portals';
import { createStatesRouter } from './routes/states';
import { createAnalyticsV2Router } from './routes/analytics-v2';
import { createDiscoveryRoutes } from './discovery';
import { createDutchieDiscoveryRoutes, promoteDiscoveryLocation } from './dutchie-az/discovery';
import { getPool } from './db/pool';
// Consumer API routes (findadispo.com, findagram.co)
import consumerAuthRoutes from './routes/consumer-auth';
@@ -132,41 +126,22 @@ app.use('/api/scraper-monitor', scraperMonitorRoutes);
app.use('/api/api-tokens', apiTokensRoutes);
app.use('/api/api-permissions', apiPermissionsRoutes);
app.use('/api/parallel-scrape', parallelScrapeRoutes);
app.use('/api/schedule', scheduleRoutes);
app.use('/api/crawler-sandbox', crawlerSandboxRoutes);
app.use('/api/version', versionRoutes);
app.use('/api/users', usersRoutes);
app.use('/api/stale-processes', staleProcessesRoutes);
// Admin routes - operator actions (crawl triggers, health checks)
app.use('/api/admin', adminRoutes);
// Admin routes - orchestrator actions
app.use('/api/admin/orchestrator', orchestratorAdminRoutes);
// SEO orchestrator routes
app.use('/api/seo', seoRoutes);
// Provider-agnostic worker management routes (replaces /api/dutchie-az/admin/schedules)
// Provider-agnostic worker management routes
app.use('/api/workers', workersRoutes);
// Monitor routes - aliased from workers for convenience
app.use('/api/monitor', workersRoutes);
console.log('[Workers] Routes registered at /api/workers and /api/monitor');
// Market data pipeline routes (provider-agnostic)
app.use('/api/markets', dutchieAZRouter);
// Legacy aliases (deprecated - remove after frontend migration)
app.use('/api/az', dutchieAZRouter);
app.use('/api/dutchie-az', dutchieAZRouter);
// Phase 3: Analytics Dashboards - price trends, penetration, category growth, etc.
try {
const analyticsRouter = createAnalyticsRouter(getPool());
app.use('/api/markets/analytics', analyticsRouter);
// Legacy alias for backwards compatibility
app.use('/api/az/analytics', analyticsRouter);
console.log('[Analytics] Routes registered at /api/markets/analytics');
} catch (error) {
console.warn('[Analytics] Failed to register routes:', error);
}
// Phase 3: Analytics V2 - Enhanced analytics with rec/med state segmentation
try {
const analyticsV2Router = createAnalyticsV2Router(getPool());
@@ -239,43 +214,7 @@ try {
}
// Platform-specific Discovery Routes
// Uses neutral slugs to avoid trademark issues in URLs:
// dt = Dutchie, jn = Jane, wm = Weedmaps, etc.
// Routes: /api/discovery/platforms/:platformSlug/*
try {
const dtDiscoveryRoutes = createDutchieDiscoveryRoutes(getPool());
app.use('/api/discovery/platforms/dt', dtDiscoveryRoutes);
console.log('[Discovery] Platform routes registered at /api/discovery/platforms/dt');
} catch (error) {
console.warn('[Discovery] Failed to register platform routes:', error);
}
// Orchestrator promotion endpoint (platform-agnostic)
// Route: /api/orchestrator/platforms/:platformSlug/promote/:id
app.post('/api/orchestrator/platforms/:platformSlug/promote/:id', async (req, res) => {
try {
const { platformSlug, id } = req.params;
// Validate platform slug
const validPlatforms = ['dt']; // dt = Dutchie
if (!validPlatforms.includes(platformSlug)) {
return res.status(400).json({
success: false,
error: `Invalid platform slug: ${platformSlug}. Valid slugs: ${validPlatforms.join(', ')}`
});
}
const result = await promoteDiscoveryLocation(getPool(), parseInt(id, 10));
if (result.success) {
res.json(result);
} else {
res.status(400).json(result);
}
} catch (error: any) {
console.error('[Orchestrator] Promotion error:', error);
res.status(500).json({ success: false, error: error.message });
}
});
// TODO: Rebuild with /platforms/dutchie/ module
async function startServer() {
try {
@@ -288,15 +227,6 @@ async function startServer() {
// Clean up any orphaned proxy test jobs from previous server runs
await cleanupOrphanedJobs();
// Start the crawl scheduler (checks every minute for jobs to run)
startCrawlScheduler();
logger.info('system', 'Crawl scheduler started');
// Start the Dutchie AZ scheduler (enqueues jobs for workers)
await initializeDefaultSchedules();
startDutchieAZScheduler();
logger.info('system', 'Dutchie AZ scheduler started');
app.listen(PORT, () => {
logger.info('system', `Server running on port ${PORT}`);
console.log(`🚀 Server running on port ${PORT}`);