Initial commit - Dutchie dispensary scraper

This commit is contained in:
Kelly
2025-11-28 19:45:44 -07:00
commit 5757a8e9bd
23375 changed files with 3788799 additions and 0 deletions

116
backend/src/services/scheduler.ts Executable file
View File

@@ -0,0 +1,116 @@
import cron from 'node-cron';
import { pool } from '../db/migrate';
import { scrapeStore, scrapeCategory } from '../scraper-v2';
let scheduledJobs: cron.ScheduledTask[] = [];
async function getSettings(): Promise<{
scrapeIntervalHours: number;
scrapeSpecialsTime: string;
}> {
const result = await pool.query(`
SELECT key, value FROM settings
WHERE key IN ('scrape_interval_hours', 'scrape_specials_time')
`);
const settings: any = {};
result.rows.forEach(row => {
settings[row.key] = row.value;
});
return {
scrapeIntervalHours: parseInt(settings.scrape_interval_hours || '4'),
scrapeSpecialsTime: settings.scrape_specials_time || '00:01'
};
}
async function scrapeAllStores(): Promise<void> {
console.log('🔄 Starting scheduled scrape for all stores...');
const result = await pool.query(`
SELECT id, name FROM stores WHERE active = true AND scrape_enabled = true
`);
for (const store of result.rows) {
try {
console.log(`Scraping store: ${store.name}`);
await scrapeStore(store.id);
} catch (error) {
console.error(`Failed to scrape store ${store.name}:`, error);
}
}
console.log('✅ Scheduled scrape completed');
}
async function scrapeSpecials(): Promise<void> {
console.log('🌟 Starting scheduled specials scrape...');
const result = await pool.query(`
SELECT s.id, s.name, c.id as category_id
FROM stores s
JOIN categories c ON c.store_id = s.id
WHERE s.active = true AND s.scrape_enabled = true
AND c.slug = 'specials' AND c.scrape_enabled = true
`);
for (const row of result.rows) {
try {
console.log(`Scraping specials for: ${row.name}`);
await scrapeCategory(row.id, row.category_id);
} catch (error) {
console.error(`Failed to scrape specials for ${row.name}:`, error);
}
}
console.log('✅ Specials scrape completed');
}
export async function startScheduler(): Promise<void> {
// Stop any existing jobs
stopScheduler();
const settings = await getSettings();
// Schedule regular store scrapes (every N hours)
const scrapeIntervalCron = `0 */${settings.scrapeIntervalHours} * * *`;
const storeJob = cron.schedule(scrapeIntervalCron, scrapeAllStores);
scheduledJobs.push(storeJob);
console.log(`📅 Scheduled store scraping: every ${settings.scrapeIntervalHours} hours`);
// Schedule specials scraping (daily at specified time)
const [hours, minutes] = settings.scrapeSpecialsTime.split(':');
const specialsCron = `${minutes} ${hours} * * *`;
const specialsJob = cron.schedule(specialsCron, scrapeSpecials);
scheduledJobs.push(specialsJob);
console.log(`📅 Scheduled specials scraping: daily at ${settings.scrapeSpecialsTime}`);
// Initial scrape on startup (after 10 seconds)
setTimeout(() => {
console.log('🚀 Running initial scrape...');
scrapeAllStores().catch(console.error);
}, 10000);
}
export function stopScheduler(): void {
scheduledJobs.forEach(job => job.stop());
scheduledJobs = [];
console.log('🛑 Scheduler stopped');
}
export async function restartScheduler(): Promise<void> {
console.log('🔄 Restarting scheduler...');
stopScheduler();
await startScheduler();
}
// Manual trigger functions for admin
export async function triggerStoreScrape(storeId: number): Promise<void> {
console.log(`🔧 Manual scrape triggered for store ID: ${storeId}`);
await scrapeStore(storeId);
}
export async function triggerAllStoresScrape(): Promise<void> {
console.log('🔧 Manual scrape triggered for all stores');
await scrapeAllStores();
}