Fix category-crawler-jobs store lookup query

- Fix column name from s.dutchie_plus_url to s.dutchie_url
- Add availability tracking and product freshness APIs
- Add crawl script for sequential dispensary processing

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-01 00:07:00 -07:00
parent 20a7b69537
commit 9d8972aa86
15 changed files with 11604 additions and 42 deletions

View File

@@ -28,28 +28,150 @@ router.get('/', async (req, res) => {
}
});
// Get single store
// Freshness threshold in hours
const STALE_THRESHOLD_HOURS = 4;
function calculateFreshness(lastScrapedAt: Date | null): {
last_scraped_at: string | null;
is_stale: boolean;
freshness: string;
hours_since_scrape: number | null;
} {
if (!lastScrapedAt) {
return {
last_scraped_at: null,
is_stale: true,
freshness: 'Never scraped',
hours_since_scrape: null
};
}
const now = new Date();
const diffMs = now.getTime() - lastScrapedAt.getTime();
const diffHours = diffMs / (1000 * 60 * 60);
const isStale = diffHours > STALE_THRESHOLD_HOURS;
let freshnessText: string;
if (diffHours < 1) {
const mins = Math.round(diffHours * 60);
freshnessText = `${mins} minute${mins !== 1 ? 's' : ''} ago`;
} else if (diffHours < 24) {
const hrs = Math.round(diffHours);
freshnessText = `${hrs} hour${hrs !== 1 ? 's' : ''} ago`;
} else {
const days = Math.round(diffHours / 24);
freshnessText = `${days} day${days !== 1 ? 's' : ''} ago`;
}
return {
last_scraped_at: lastScrapedAt.toISOString(),
is_stale: isStale,
freshness: freshnessText,
hours_since_scrape: Math.round(diffHours * 10) / 10
};
}
function detectProvider(dutchieUrl: string | null): string {
if (!dutchieUrl) return 'unknown';
if (dutchieUrl.includes('dutchie.com')) return 'Dutchie';
if (dutchieUrl.includes('iheartjane.com') || dutchieUrl.includes('jane.co')) return 'Jane';
if (dutchieUrl.includes('treez.io')) return 'Treez';
if (dutchieUrl.includes('weedmaps.com')) return 'Weedmaps';
if (dutchieUrl.includes('leafly.com')) return 'Leafly';
return 'Custom';
}
// Get single store with full details
router.get('/:id', async (req, res) => {
try {
const { id } = req.params;
// Get store with counts and linked dispensary
const result = await pool.query(`
SELECT
SELECT
s.*,
d.id as dispensary_id,
d.name as dispensary_name,
d.slug as dispensary_slug,
d.state as dispensary_state,
d.city as dispensary_city,
d.address as dispensary_address,
d.menu_provider as dispensary_menu_provider,
COUNT(DISTINCT p.id) as product_count,
COUNT(DISTINCT c.id) as category_count
COUNT(DISTINCT c.id) as category_count,
COUNT(DISTINCT p.id) FILTER (WHERE p.in_stock = true) as in_stock_count,
COUNT(DISTINCT p.id) FILTER (WHERE p.in_stock = false) as out_of_stock_count
FROM stores s
LEFT JOIN dispensaries d ON s.dispensary_id = d.id
LEFT JOIN products p ON s.id = p.store_id
LEFT JOIN categories c ON s.id = c.store_id
WHERE s.id = $1
GROUP BY s.id
GROUP BY s.id, d.id, d.name, d.slug, d.state, d.city, d.address, d.menu_provider
`, [id]);
if (result.rows.length === 0) {
return res.status(404).json({ error: 'Store not found' });
}
res.json(result.rows[0]);
const store = result.rows[0];
// Get recent crawl jobs for this store
const jobsResult = await pool.query(`
SELECT
id, status, job_type, trigger_type,
started_at, completed_at,
products_found, products_new, products_updated,
in_stock_count, out_of_stock_count,
error_message
FROM crawl_jobs
WHERE store_id = $1
ORDER BY created_at DESC
LIMIT 10
`, [id]);
// Get schedule info if exists
const scheduleResult = await pool.query(`
SELECT
enabled, interval_hours, next_run_at, last_run_at
FROM store_crawl_schedule
WHERE store_id = $1
`, [id]);
// Calculate freshness
const freshness = calculateFreshness(store.last_scraped_at);
// Detect provider from URL
const provider = detectProvider(store.dutchie_url);
// Build response
const response = {
...store,
provider,
freshness: freshness.freshness,
is_stale: freshness.is_stale,
hours_since_scrape: freshness.hours_since_scrape,
linked_dispensary: store.dispensary_id ? {
id: store.dispensary_id,
name: store.dispensary_name,
slug: store.dispensary_slug,
state: store.dispensary_state,
city: store.dispensary_city,
address: store.dispensary_address,
menu_provider: store.dispensary_menu_provider
} : null,
schedule: scheduleResult.rows[0] || null,
recent_jobs: jobsResult.rows
};
// Remove redundant dispensary fields from root
delete response.dispensary_name;
delete response.dispensary_slug;
delete response.dispensary_state;
delete response.dispensary_city;
delete response.dispensary_address;
delete response.dispensary_menu_provider;
res.json(response);
} catch (error) {
console.error('Error fetching store:', error);
res.status(500).json({ error: 'Failed to fetch store' });