Initial commit - Dutchie dispensary scraper

This commit is contained in:
Kelly
2025-11-28 19:45:44 -07:00
commit 5757a8e9bd
23375 changed files with 3788799 additions and 0 deletions

303
backend/src/routes/stores.ts Executable file
View File

@@ -0,0 +1,303 @@
import { Router } from 'express';
import { authMiddleware, requireRole } from '../auth/middleware';
import { pool } from '../db/migrate';
import { scrapeStore, scrapeCategory, discoverCategories } from '../scraper-v2';
const router = Router();
router.use(authMiddleware);
// Get all stores
router.get('/', async (req, res) => {
try {
const result = await pool.query(`
SELECT
s.*,
COUNT(DISTINCT p.id) as product_count,
COUNT(DISTINCT c.id) as category_count
FROM stores s
LEFT JOIN products p ON s.id = p.store_id
LEFT JOIN categories c ON s.id = c.store_id
GROUP BY s.id
ORDER BY s.name
`);
res.json({ stores: result.rows });
} catch (error) {
console.error('Error fetching stores:', error);
res.status(500).json({ error: 'Failed to fetch stores' });
}
});
// Get single store
router.get('/:id', async (req, res) => {
try {
const { id } = req.params;
const result = await pool.query(`
SELECT
s.*,
COUNT(DISTINCT p.id) as product_count,
COUNT(DISTINCT c.id) as category_count
FROM stores s
LEFT JOIN products p ON s.id = p.store_id
LEFT JOIN categories c ON s.id = c.store_id
WHERE s.id = $1
GROUP BY s.id
`, [id]);
if (result.rows.length === 0) {
return res.status(404).json({ error: 'Store not found' });
}
res.json(result.rows[0]);
} catch (error) {
console.error('Error fetching store:', error);
res.status(500).json({ error: 'Failed to fetch store' });
}
});
// Get store brands
router.get('/:id/brands', async (req, res) => {
try {
const { id } = req.params;
const result = await pool.query(`
SELECT name
FROM brands
WHERE store_id = $1
ORDER BY name
`, [id]);
const brands = result.rows.map((row: any) => row.name);
res.json({ brands });
} catch (error) {
console.error('Error fetching store brands:', error);
res.status(500).json({ error: 'Failed to fetch store brands' });
}
});
// Get store specials
router.get('/:id/specials', async (req, res) => {
try {
const { id } = req.params;
const { date } = req.query;
// Use provided date or today's date
const queryDate = date || new Date().toISOString().split('T')[0];
const result = await pool.query(`
SELECT
s.*,
p.name as product_name,
p.image_url as product_image
FROM specials s
LEFT JOIN products p ON s.product_id = p.id
WHERE s.store_id = $1 AND s.valid_date = $2
ORDER BY s.name
`, [id, queryDate]);
res.json({ specials: result.rows, date: queryDate });
} catch (error) {
console.error('Error fetching store specials:', error);
res.status(500).json({ error: 'Failed to fetch store specials' });
}
});
// Create store
router.post('/', requireRole('superadmin', 'admin'), async (req, res) => {
try {
const { name, slug, dutchie_url, active, scrape_enabled } = req.body;
const result = await pool.query(`
INSERT INTO stores (name, slug, dutchie_url, active, scrape_enabled)
VALUES ($1, $2, $3, $4, $5)
RETURNING *
`, [name, slug, dutchie_url, active ?? true, scrape_enabled ?? true]);
res.status(201).json(result.rows[0]);
} catch (error) {
console.error('Error creating store:', error);
res.status(500).json({ error: 'Failed to create store' });
}
});
// Update store
router.put('/:id', requireRole('superadmin', 'admin'), async (req, res) => {
try {
const { id } = req.params;
const { name, slug, dutchie_url, active, scrape_enabled } = req.body;
const result = await pool.query(`
UPDATE stores
SET name = COALESCE($1, name),
slug = COALESCE($2, slug),
dutchie_url = COALESCE($3, dutchie_url),
active = COALESCE($4, active),
scrape_enabled = COALESCE($5, scrape_enabled),
updated_at = CURRENT_TIMESTAMP
WHERE id = $6
RETURNING *
`, [name, slug, dutchie_url, active, scrape_enabled, id]);
if (result.rows.length === 0) {
return res.status(404).json({ error: 'Store not found' });
}
res.json(result.rows[0]);
} catch (error) {
console.error('Error updating store:', error);
res.status(500).json({ error: 'Failed to update store' });
}
});
// Delete store
router.delete('/:id', requireRole('superadmin'), async (req, res) => {
try {
const { id } = req.params;
const result = await pool.query('DELETE FROM stores WHERE id = $1 RETURNING *', [id]);
if (result.rows.length === 0) {
return res.status(404).json({ error: 'Store not found' });
}
res.json({ message: 'Store deleted successfully' });
} catch (error) {
console.error('Error deleting store:', error);
res.status(500).json({ error: 'Failed to delete store' });
}
});
// Trigger scrape for a store
router.post('/:id/scrape', requireRole('superadmin', 'admin'), async (req, res) => {
try {
const { id } = req.params;
const { parallel = 3, userAgent } = req.body; // Default to 3 parallel scrapers
const storeResult = await pool.query('SELECT id FROM stores WHERE id = $1', [id]);
if (storeResult.rows.length === 0) {
return res.status(404).json({ error: 'Store not found' });
}
scrapeStore(parseInt(id), parseInt(parallel), userAgent).catch(err => {
console.error('Background scrape error:', err);
});
res.json({
message: 'Scrape started',
parallel: parseInt(parallel),
userAgent: userAgent || 'random'
});
} catch (error) {
console.error('Error triggering scrape:', error);
res.status(500).json({ error: 'Failed to trigger scrape' });
}
});
// Download missing images for a store
router.post('/:id/download-images', requireRole('superadmin', 'admin'), async (req, res) => {
try {
const { id } = req.params;
const storeResult = await pool.query('SELECT id, name FROM stores WHERE id = $1', [id]);
if (storeResult.rows.length === 0) {
return res.status(404).json({ error: 'Store not found' });
}
const store = storeResult.rows[0];
const productsResult = await pool.query(`
SELECT id, name, image_url
FROM products
WHERE store_id = $1
AND image_url IS NOT NULL
AND local_image_path IS NULL
`, [id]);
(async () => {
const { uploadImageFromUrl } = await import('../utils/minio');
let downloaded = 0;
for (const product of productsResult.rows) {
try {
console.log(`📸 Downloading image for: ${product.name}`);
const localPath = await uploadImageFromUrl(product.image_url, product.id);
await pool.query(`
UPDATE products
SET local_image_path = $1
WHERE id = $2
`, [localPath, product.id]);
downloaded++;
} catch (error) {
console.error(`Failed to download image for ${product.name}:`, error);
}
}
console.log(`✅ Downloaded ${downloaded} of ${productsResult.rows.length} missing images for ${store.name}`);
})().catch(err => console.error('Background image download error:', err));
res.json({
message: 'Image download started',
total_missing: productsResult.rows.length
});
} catch (error) {
console.error('Error triggering image download:', error);
res.status(500).json({ error: 'Failed to trigger image download' });
}
});
// Discover categories for a store
router.post('/:id/discover-categories', requireRole('superadmin', 'admin'), async (req, res) => {
try {
const { id } = req.params;
const storeResult = await pool.query('SELECT id FROM stores WHERE id = $1', [id]);
if (storeResult.rows.length === 0) {
return res.status(404).json({ error: 'Store not found' });
}
discoverCategories(parseInt(id)).catch(err => {
console.error('Background category discovery error:', err);
});
res.json({ message: 'Category discovery started' });
} catch (error) {
console.error('Error triggering category discovery:', error);
res.status(500).json({ error: 'Failed to trigger category discovery' });
}
});
// Debug scraper
router.post('/:id/debug-scrape', requireRole('superadmin', 'admin'), async (req, res) => {
try {
const { id } = req.params;
console.log('Debug scrape triggered for store:', id);
const categoryResult = await pool.query(`
SELECT c.dutchie_url, c.name
FROM categories c
WHERE c.store_id = $1 AND c.slug = 'edibles'
LIMIT 1
`, [id]);
if (categoryResult.rows.length === 0) {
return res.status(404).json({ error: 'Edibles category not found' });
}
console.log('Found category:', categoryResult.rows[0]);
const { debugDutchiePage } = await import('../services/scraper-debug');
debugDutchiePage(categoryResult.rows[0].dutchie_url).catch(err => {
console.error('Debug error:', err);
});
res.json({ message: 'Debug started, check logs', url: categoryResult.rows[0].dutchie_url });
} catch (error) {
console.error('Debug endpoint error:', error);
res.status(500).json({ error: 'Failed to debug' });
}
});
export default router;