feat: Add stale process monitor, users route, landing page, archive old scripts
- Add backend stale process monitoring API (/api/stale-processes) - Add users management route - Add frontend landing page and stale process monitor UI on /scraper-tools - Move old development scripts to backend/archive/ - Update frontend build with new features 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
169
backend/archive/verify-curaleaf.ts
Normal file
169
backend/archive/verify-curaleaf.ts
Normal file
@@ -0,0 +1,169 @@
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import { Pool } from 'pg';
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
const pool = new Pool({
|
||||
connectionString: process.env.DATABASE_URL || 'postgresql://dutchie:dutchie_local_pass@postgres:5432/dutchie_menus'
|
||||
});
|
||||
|
||||
const USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
||||
|
||||
async function scrapeArizonaStores() {
|
||||
const browser = await puppeteer.launch({
|
||||
headless: 'new',
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-blink-features=AutomationControlled'
|
||||
]
|
||||
});
|
||||
|
||||
try {
|
||||
const page = await browser.newPage();
|
||||
await page.setUserAgent(USER_AGENT);
|
||||
|
||||
console.log('Navigating to Curaleaf stores page...');
|
||||
await page.goto('https://curaleaf.com/stores/', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 30000
|
||||
});
|
||||
|
||||
await page.waitForTimeout(3000);
|
||||
|
||||
const stores = await page.evaluate(() => {
|
||||
const results: Array<{name: string; slug: string; url: string}> = [];
|
||||
const links = Array.from(document.querySelectorAll('a[href*="/stores/"]'));
|
||||
|
||||
for (const link of links) {
|
||||
const href = (link as HTMLAnchorElement).href;
|
||||
const text = link.textContent?.trim() || '';
|
||||
|
||||
if (href.includes('/stores/curaleaf') &&
|
||||
(href.toLowerCase().includes('-az-') ||
|
||||
href.toLowerCase().includes('arizona') ||
|
||||
href.toLowerCase().includes('dispensary-peoria') ||
|
||||
text.toLowerCase().includes('arizona') ||
|
||||
text.toLowerCase().includes(', az'))) {
|
||||
|
||||
const match = href.match(/\/stores\/([^\/\?#]+)/);
|
||||
if (match) {
|
||||
results.push({
|
||||
name: text,
|
||||
slug: match[1],
|
||||
url: href.split('?')[0].split('#')[0]
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
});
|
||||
|
||||
console.log(`\nFound ${stores.length} Arizona stores\n`);
|
||||
|
||||
const uniqueStores = Array.from(
|
||||
new Map(stores.map(s => [s.slug, s])).values()
|
||||
);
|
||||
|
||||
return uniqueStores;
|
||||
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
async function compareAndUpdate() {
|
||||
const client = await pool.connect();
|
||||
|
||||
try {
|
||||
console.log('Scraping Curaleaf website...\n');
|
||||
const scrapedStores = await scrapeArizonaStores();
|
||||
|
||||
console.log('\nQuerying database...\n');
|
||||
const result = await client.query(
|
||||
"SELECT id, name, slug, dutchie_url FROM stores WHERE name LIKE 'Curaleaf%' ORDER BY name"
|
||||
);
|
||||
|
||||
const dbStores = result.rows;
|
||||
|
||||
console.log('\n=== COMPARISON ===\n');
|
||||
|
||||
const scrapedMap = new Map(scrapedStores.map(s => [s.slug, s]));
|
||||
const updates = [];
|
||||
|
||||
for (const dbStore of dbStores) {
|
||||
const scraped = scrapedMap.get(dbStore.slug);
|
||||
|
||||
if (scraped) {
|
||||
if (dbStore.dutchie_url !== scraped.url) {
|
||||
console.log(`⚠️ URL mismatch for "${dbStore.name}"`);
|
||||
console.log(` DB: ${dbStore.dutchie_url}`);
|
||||
console.log(` Web: ${scraped.url}`);
|
||||
updates.push({ id: dbStore.id, url: scraped.url });
|
||||
} else {
|
||||
console.log(`✅ "${dbStore.name}" - correct`);
|
||||
}
|
||||
} else {
|
||||
console.log(`⚠️ "${dbStore.name}" (${dbStore.slug}) - NOT FOUND on website`);
|
||||
|
||||
// Try to find by name matching
|
||||
const possibleMatch = scrapedStores.find(s => {
|
||||
const storeName = dbStore.name.toLowerCase().replace('curaleaf - ', '').replace('curaleaf-', '');
|
||||
return s.name.toLowerCase().includes(storeName) ||
|
||||
s.slug.toLowerCase().includes(storeName);
|
||||
});
|
||||
|
||||
if (possibleMatch) {
|
||||
console.log(` → Possible match: ${possibleMatch.slug}`);
|
||||
console.log(` → URL: ${possibleMatch.url}`);
|
||||
updates.push({
|
||||
id: dbStore.id,
|
||||
slug: possibleMatch.slug,
|
||||
url: possibleMatch.url
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for stores on website but not in DB
|
||||
for (const scraped of scrapedStores) {
|
||||
const inDb = dbStores.find(db => db.slug === scraped.slug);
|
||||
if (!inDb) {
|
||||
console.log(`\n➕ "${scraped.name}" (${scraped.slug}) - ON WEBSITE but not in DB`);
|
||||
console.log(` URL: ${scraped.url}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (updates.length > 0) {
|
||||
console.log(`\n\n=== APPLYING ${updates.length} UPDATES ===\n`);
|
||||
|
||||
for (const update of updates) {
|
||||
if (update.slug) {
|
||||
await client.query(
|
||||
'UPDATE stores SET slug = $1, dutchie_url = $2 WHERE id = $3 RETURNING name',
|
||||
[update.slug, update.url, update.id]
|
||||
);
|
||||
console.log(`✅ Updated store ${update.id} with new slug: ${update.slug}`);
|
||||
} else {
|
||||
await client.query(
|
||||
'UPDATE stores SET dutchie_url = $1 WHERE id = $2 RETURNING name',
|
||||
[update.url, update.id]
|
||||
);
|
||||
console.log(`✅ Updated store ${update.id} with new URL`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n🎉 Successfully updated ${updates.length} stores!`);
|
||||
} else {
|
||||
console.log('\n✅ All stores are up to date!');
|
||||
}
|
||||
|
||||
} finally {
|
||||
client.release();
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
compareAndUpdate().catch(console.error);
|
||||
Reference in New Issue
Block a user