Files
cannaiq/backend/archive/verify-curaleaf.ts
Kelly d91c55a344 feat: Add stale process monitor, users route, landing page, archive old scripts
- Add backend stale process monitoring API (/api/stale-processes)
- Add users management route
- Add frontend landing page and stale process monitor UI on /scraper-tools
- Move old development scripts to backend/archive/
- Update frontend build with new features

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-05 04:07:31 -07:00

170 lines
5.2 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
import { Pool } from 'pg';
puppeteer.use(StealthPlugin());
const pool = new Pool({
connectionString: process.env.DATABASE_URL || 'postgresql://dutchie:dutchie_local_pass@postgres:5432/dutchie_menus'
});
const USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
async function scrapeArizonaStores() {
const browser = await puppeteer.launch({
headless: 'new',
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-blink-features=AutomationControlled'
]
});
try {
const page = await browser.newPage();
await page.setUserAgent(USER_AGENT);
console.log('Navigating to Curaleaf stores page...');
await page.goto('https://curaleaf.com/stores/', {
waitUntil: 'networkidle2',
timeout: 30000
});
await page.waitForTimeout(3000);
const stores = await page.evaluate(() => {
const results: Array<{name: string; slug: string; url: string}> = [];
const links = Array.from(document.querySelectorAll('a[href*="/stores/"]'));
for (const link of links) {
const href = (link as HTMLAnchorElement).href;
const text = link.textContent?.trim() || '';
if (href.includes('/stores/curaleaf') &&
(href.toLowerCase().includes('-az-') ||
href.toLowerCase().includes('arizona') ||
href.toLowerCase().includes('dispensary-peoria') ||
text.toLowerCase().includes('arizona') ||
text.toLowerCase().includes(', az'))) {
const match = href.match(/\/stores\/([^\/\?#]+)/);
if (match) {
results.push({
name: text,
slug: match[1],
url: href.split('?')[0].split('#')[0]
});
}
}
}
return results;
});
console.log(`\nFound ${stores.length} Arizona stores\n`);
const uniqueStores = Array.from(
new Map(stores.map(s => [s.slug, s])).values()
);
return uniqueStores;
} finally {
await browser.close();
}
}
async function compareAndUpdate() {
const client = await pool.connect();
try {
console.log('Scraping Curaleaf website...\n');
const scrapedStores = await scrapeArizonaStores();
console.log('\nQuerying database...\n');
const result = await client.query(
"SELECT id, name, slug, dutchie_url FROM stores WHERE name LIKE 'Curaleaf%' ORDER BY name"
);
const dbStores = result.rows;
console.log('\n=== COMPARISON ===\n');
const scrapedMap = new Map(scrapedStores.map(s => [s.slug, s]));
const updates = [];
for (const dbStore of dbStores) {
const scraped = scrapedMap.get(dbStore.slug);
if (scraped) {
if (dbStore.dutchie_url !== scraped.url) {
console.log(`⚠️ URL mismatch for "${dbStore.name}"`);
console.log(` DB: ${dbStore.dutchie_url}`);
console.log(` Web: ${scraped.url}`);
updates.push({ id: dbStore.id, url: scraped.url });
} else {
console.log(`✅ "${dbStore.name}" - correct`);
}
} else {
console.log(`⚠️ "${dbStore.name}" (${dbStore.slug}) - NOT FOUND on website`);
// Try to find by name matching
const possibleMatch = scrapedStores.find(s => {
const storeName = dbStore.name.toLowerCase().replace('curaleaf - ', '').replace('curaleaf-', '');
return s.name.toLowerCase().includes(storeName) ||
s.slug.toLowerCase().includes(storeName);
});
if (possibleMatch) {
console.log(` → Possible match: ${possibleMatch.slug}`);
console.log(` → URL: ${possibleMatch.url}`);
updates.push({
id: dbStore.id,
slug: possibleMatch.slug,
url: possibleMatch.url
});
}
}
}
// Check for stores on website but not in DB
for (const scraped of scrapedStores) {
const inDb = dbStores.find(db => db.slug === scraped.slug);
if (!inDb) {
console.log(`\n "${scraped.name}" (${scraped.slug}) - ON WEBSITE but not in DB`);
console.log(` URL: ${scraped.url}`);
}
}
if (updates.length > 0) {
console.log(`\n\n=== APPLYING ${updates.length} UPDATES ===\n`);
for (const update of updates) {
if (update.slug) {
await client.query(
'UPDATE stores SET slug = $1, dutchie_url = $2 WHERE id = $3 RETURNING name',
[update.slug, update.url, update.id]
);
console.log(`✅ Updated store ${update.id} with new slug: ${update.slug}`);
} else {
await client.query(
'UPDATE stores SET dutchie_url = $1 WHERE id = $2 RETURNING name',
[update.url, update.id]
);
console.log(`✅ Updated store ${update.id} with new URL`);
}
}
console.log(`\n🎉 Successfully updated ${updates.length} stores!`);
} else {
console.log('\n✅ All stores are up to date!');
}
} finally {
client.release();
await pool.end();
}
}
compareAndUpdate().catch(console.error);