- Add backend stale process monitoring API (/api/stale-processes) - Add users management route - Add frontend landing page and stale process monitor UI on /scraper-tools - Move old development scripts to backend/archive/ - Update frontend build with new features 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
112 lines
4.7 KiB
JavaScript
112 lines
4.7 KiB
JavaScript
"use strict";
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
const pg_1 = require("pg");
|
|
const pool = new pg_1.Pool({ connectionString: process.env.DATABASE_URL });
|
|
// Simple fetch with timeout
|
|
async function fetchWithTimeout(url, timeout = 10000) {
|
|
const controller = new AbortController();
|
|
const id = setTimeout(() => controller.abort(), timeout);
|
|
try {
|
|
const resp = await fetch(url, {
|
|
signal: controller.signal,
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
},
|
|
redirect: 'follow',
|
|
});
|
|
clearTimeout(id);
|
|
return await resp.text();
|
|
}
|
|
catch (e) {
|
|
clearTimeout(id);
|
|
throw e;
|
|
}
|
|
}
|
|
// Check for dutchie patterns in HTML
|
|
function detectDutchie(html) {
|
|
// Check for reactEnv.dispensaryId (Curaleaf/Sol pattern)
|
|
const reactEnvMatch = html.match(/"dispensaryId"\s*:\s*"([a-fA-F0-9]{24})"/i);
|
|
if (reactEnvMatch) {
|
|
return { provider: 'dutchie', platformId: reactEnvMatch[1] };
|
|
}
|
|
// Check for Dutchie embedded-menu script (Trulieve pattern)
|
|
// Look for: embedded-menu/5eaf48fc972e6200b1303b97.js
|
|
const embedMatch = html.match(/embedded-menu\/([a-f0-9]{24})(?:\.js)?/i);
|
|
if (embedMatch) {
|
|
return { provider: 'dutchie', platformId: embedMatch[1] };
|
|
}
|
|
// Check for dutchie.com links
|
|
const dutchieLink = html.match(/https?:\/\/(?:www\.)?dutchie\.com\/(?:dispensary|embedded-menu|stores)\/([a-zA-Z0-9-]+)/i);
|
|
if (dutchieLink) {
|
|
return { provider: 'dutchie', menuUrl: dutchieLink[0] };
|
|
}
|
|
// Check for jane
|
|
if (html.includes('iheartjane.com') || html.includes('jane.co')) {
|
|
const janeMatch = html.match(/https?:\/\/(?:www\.)?(?:iheartjane\.com|jane\.co)\/[^"\s]+/i);
|
|
return { provider: 'jane', menuUrl: janeMatch?.[0] };
|
|
}
|
|
// Check for treez
|
|
if (html.includes('.treez.io')) {
|
|
const treezMatch = html.match(/https?:\/\/[a-zA-Z0-9-]+\.treez\.io[^"\s]*/i);
|
|
return { provider: 'treez', menuUrl: treezMatch?.[0] };
|
|
}
|
|
// Check for leafly
|
|
if (html.includes('leafly.com/dispensary')) {
|
|
return { provider: 'leafly' };
|
|
}
|
|
return { provider: 'unknown' };
|
|
}
|
|
async function main() {
|
|
const { rows: stores } = await pool.query(`
|
|
SELECT id, name, website
|
|
FROM dispensaries
|
|
WHERE platform_dispensary_id IS NULL
|
|
AND website IS NOT NULL
|
|
AND website NOT LIKE '%example%'
|
|
ORDER BY id
|
|
LIMIT 150
|
|
`);
|
|
console.log('Checking ' + stores.length + ' stores...\n');
|
|
let dutchieCount = 0;
|
|
let otherCount = 0;
|
|
let errorCount = 0;
|
|
for (const store of stores) {
|
|
try {
|
|
const html = await fetchWithTimeout(store.website);
|
|
const result = detectDutchie(html);
|
|
if (result.provider === 'dutchie') {
|
|
if (result.platformId) {
|
|
await pool.query('UPDATE dispensaries SET menu_type = $1, platform_dispensary_id = $2, updated_at = NOW() WHERE id = $3', ['dutchie', result.platformId, store.id]);
|
|
console.log('[' + store.id + '] ' + store.name + ' => DUTCHIE (ID: ' + result.platformId + ')');
|
|
dutchieCount++;
|
|
}
|
|
else if (result.menuUrl) {
|
|
await pool.query('UPDATE dispensaries SET menu_type = $1, menu_url = $2, updated_at = NOW() WHERE id = $3', ['dutchie', result.menuUrl, store.id]);
|
|
console.log('[' + store.id + '] ' + store.name + ' => DUTCHIE (URL: ' + result.menuUrl.slice(0, 60) + ')');
|
|
dutchieCount++;
|
|
}
|
|
}
|
|
else if (result.provider !== 'unknown') {
|
|
await pool.query('UPDATE dispensaries SET menu_type = $1, menu_url = COALESCE($2, menu_url), updated_at = NOW() WHERE id = $3', [result.provider, result.menuUrl, store.id]);
|
|
console.log('[' + store.id + '] ' + store.name + ' => ' + result.provider.toUpperCase());
|
|
otherCount++;
|
|
}
|
|
else {
|
|
console.log('[' + store.id + '] ' + store.name + ' => no menu found');
|
|
}
|
|
}
|
|
catch (err) {
|
|
const errMsg = err.name === 'AbortError' ? 'timeout' : err.message?.slice(0, 40) || 'error';
|
|
console.log('[' + store.id + '] ' + store.name + ' => ERROR: ' + errMsg);
|
|
errorCount++;
|
|
}
|
|
}
|
|
console.log('\n=== Summary ===');
|
|
console.log('Dutchie detected: ' + dutchieCount);
|
|
console.log('Other providers: ' + otherCount);
|
|
console.log('Errors: ' + errorCount);
|
|
await pool.end();
|
|
}
|
|
main().catch(console.error);
|