feat: Add stale process monitor, users route, landing page, archive old scripts

- Add backend stale process monitoring API (/api/stale-processes)
- Add users management route
- Add frontend landing page and stale process monitor UI on /scraper-tools
- Move old development scripts to backend/archive/
- Update frontend build with new features

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-05 04:07:31 -07:00
parent d2d44d2aeb
commit d91c55a344
3115 changed files with 5755 additions and 719 deletions

View File

@@ -0,0 +1,169 @@
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
import { Pool } from 'pg';
puppeteer.use(StealthPlugin());
const pool = new Pool({
connectionString: process.env.DATABASE_URL || 'postgresql://dutchie:dutchie_local_pass@postgres:5432/dutchie_menus'
});
const USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
async function scrapeArizonaStores() {
const browser = await puppeteer.launch({
headless: 'new',
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-blink-features=AutomationControlled'
]
});
try {
const page = await browser.newPage();
await page.setUserAgent(USER_AGENT);
console.log('Navigating to Curaleaf stores page...');
await page.goto('https://curaleaf.com/stores/', {
waitUntil: 'networkidle2',
timeout: 30000
});
await page.waitForTimeout(3000);
const stores = await page.evaluate(() => {
const results: Array<{name: string; slug: string; url: string}> = [];
const links = Array.from(document.querySelectorAll('a[href*="/stores/"]'));
for (const link of links) {
const href = (link as HTMLAnchorElement).href;
const text = link.textContent?.trim() || '';
if (href.includes('/stores/curaleaf') &&
(href.toLowerCase().includes('-az-') ||
href.toLowerCase().includes('arizona') ||
href.toLowerCase().includes('dispensary-peoria') ||
text.toLowerCase().includes('arizona') ||
text.toLowerCase().includes(', az'))) {
const match = href.match(/\/stores\/([^\/\?#]+)/);
if (match) {
results.push({
name: text,
slug: match[1],
url: href.split('?')[0].split('#')[0]
});
}
}
}
return results;
});
console.log(`\nFound ${stores.length} Arizona stores\n`);
const uniqueStores = Array.from(
new Map(stores.map(s => [s.slug, s])).values()
);
return uniqueStores;
} finally {
await browser.close();
}
}
async function compareAndUpdate() {
const client = await pool.connect();
try {
console.log('Scraping Curaleaf website...\n');
const scrapedStores = await scrapeArizonaStores();
console.log('\nQuerying database...\n');
const result = await client.query(
"SELECT id, name, slug, dutchie_url FROM stores WHERE name LIKE 'Curaleaf%' ORDER BY name"
);
const dbStores = result.rows;
console.log('\n=== COMPARISON ===\n');
const scrapedMap = new Map(scrapedStores.map(s => [s.slug, s]));
const updates = [];
for (const dbStore of dbStores) {
const scraped = scrapedMap.get(dbStore.slug);
if (scraped) {
if (dbStore.dutchie_url !== scraped.url) {
console.log(`⚠️ URL mismatch for "${dbStore.name}"`);
console.log(` DB: ${dbStore.dutchie_url}`);
console.log(` Web: ${scraped.url}`);
updates.push({ id: dbStore.id, url: scraped.url });
} else {
console.log(`✅ "${dbStore.name}" - correct`);
}
} else {
console.log(`⚠️ "${dbStore.name}" (${dbStore.slug}) - NOT FOUND on website`);
// Try to find by name matching
const possibleMatch = scrapedStores.find(s => {
const storeName = dbStore.name.toLowerCase().replace('curaleaf - ', '').replace('curaleaf-', '');
return s.name.toLowerCase().includes(storeName) ||
s.slug.toLowerCase().includes(storeName);
});
if (possibleMatch) {
console.log(` → Possible match: ${possibleMatch.slug}`);
console.log(` → URL: ${possibleMatch.url}`);
updates.push({
id: dbStore.id,
slug: possibleMatch.slug,
url: possibleMatch.url
});
}
}
}
// Check for stores on website but not in DB
for (const scraped of scrapedStores) {
const inDb = dbStores.find(db => db.slug === scraped.slug);
if (!inDb) {
console.log(`\n "${scraped.name}" (${scraped.slug}) - ON WEBSITE but not in DB`);
console.log(` URL: ${scraped.url}`);
}
}
if (updates.length > 0) {
console.log(`\n\n=== APPLYING ${updates.length} UPDATES ===\n`);
for (const update of updates) {
if (update.slug) {
await client.query(
'UPDATE stores SET slug = $1, dutchie_url = $2 WHERE id = $3 RETURNING name',
[update.slug, update.url, update.id]
);
console.log(`✅ Updated store ${update.id} with new slug: ${update.slug}`);
} else {
await client.query(
'UPDATE stores SET dutchie_url = $1 WHERE id = $2 RETURNING name',
[update.url, update.id]
);
console.log(`✅ Updated store ${update.id} with new URL`);
}
}
console.log(`\n🎉 Successfully updated ${updates.length} stores!`);
} else {
console.log('\n✅ All stores are up to date!');
}
} finally {
client.release();
await pool.end();
}
}
compareAndUpdate().catch(console.error);