Files
cannaiq/backend/archive/verify-curaleaf-urls.js
Kelly d91c55a344 feat: Add stale process monitor, users route, landing page, archive old scripts
- Add backend stale process monitoring API (/api/stale-processes)
- Add users management route
- Add frontend landing page and stale process monitor UI on /scraper-tools
- Move old development scripts to backend/archive/
- Update frontend build with new features

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-05 04:07:31 -07:00

170 lines
4.9 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
const puppeteer = require('puppeteer-extra');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
const { Pool } = require('pg');
puppeteer.use(StealthPlugin());
const pool = new Pool({
connectionString: 'postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus'
});
async function scrapeArizonaStores() {
const browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
try {
const page = await browser.newPage();
// Set a desktop user agent
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
console.log('Navigating to Curaleaf stores page...');
await page.goto('https://curaleaf.com/stores/', {
waitUntil: 'networkidle2',
timeout: 30000
});
// Wait a bit for any dynamic content
await page.waitForTimeout(3000);
// Try to find Arizona stores
const stores = await page.evaluate(() => {
const results = [];
// Look for links that contain 'arizona' or 'az' in the URL
const links = Array.from(document.querySelectorAll('a[href*="/stores/"]'));
for (const link of links) {
const href = link.href;
const text = link.textContent.trim();
// Check if it's an Arizona store
if (href.includes('/stores/curaleaf') &&
(href.toLowerCase().includes('-az-') ||
href.toLowerCase().includes('arizona') ||
text.toLowerCase().includes('arizona') ||
text.toLowerCase().includes(', az'))) {
// Extract slug from URL
const match = href.match(/\/stores\/([^\/\?#]+)/);
if (match) {
results.push({
name: text,
slug: match[1],
url: href.split('?')[0].split('#')[0] // Remove query params and hash
});
}
}
}
return results;
});
console.log(`\nFound ${stores.length} Arizona stores on Curaleaf website:\n`);
// Remove duplicates based on slug
const uniqueStores = Array.from(
new Map(stores.map(s => [s.slug, s])).values()
);
uniqueStores.forEach((store, i) => {
console.log(`${i + 1}. ${store.name}`);
console.log(` Slug: ${store.slug}`);
console.log(` URL: ${store.url}\n`);
});
return uniqueStores;
} finally {
await browser.close();
}
}
async function compareWithDatabase(scrapedStores) {
const client = await pool.connect();
try {
// Get current stores from database
const result = await client.query(
"SELECT id, name, slug, dutchie_url FROM stores WHERE slug LIKE 'curaleaf%' AND slug LIKE '%az%' OR slug LIKE 'curaleaf-dispensary%'"
);
const dbStores = result.rows;
console.log('\n=== COMPARISON ===\n');
// Create maps for easy lookup
const scrapedMap = new Map(scrapedStores.map(s => [s.slug, s]));
const dbMap = new Map(dbStores.map(s => [s.slug, s]));
// Find stores that need updating
const updates = [];
for (const dbStore of dbStores) {
const scraped = scrapedMap.get(dbStore.slug);
if (!scraped) {
// Store in DB but not found on website
console.log(`⚠️ "${dbStore.name}" (${dbStore.slug}) - NOT FOUND on website`);
// Try to find by matching name
const matchByName = scrapedStores.find(s =>
s.name.toLowerCase().includes(dbStore.name.toLowerCase().replace('curaleaf - ', ''))
);
if (matchByName) {
console.log(` → Possible match: ${matchByName.slug}`);
updates.push({
id: dbStore.id,
oldSlug: dbStore.slug,
newSlug: matchByName.slug,
newUrl: matchByName.url,
name: dbStore.name
});
}
} else {
// Check if URL matches
if (dbStore.dutchie_url !== scraped.url) {
console.log(`✏️ "${dbStore.name}" - URL mismatch`);
console.log(` DB: ${dbStore.dutchie_url}`);
console.log(` Web: ${scraped.url}`);
} else {
console.log(`✅ "${dbStore.name}" - correct`);
}
}
}
// Find stores on website but not in DB
for (const scraped of scrapedStores) {
if (!dbMap.has(scraped.slug)) {
console.log(` "${scraped.name}" (${scraped.slug}) - ON WEBSITE but not in DB`);
}
}
if (updates.length > 0) {
console.log(`\n\nFound ${updates.length} stores that need updating. Apply updates? (This is a dry run, updates not applied)`);
updates.forEach(u => {
console.log(`\nUPDATE stores SET slug='${u.newSlug}', dutchie_url='${u.newUrl}' WHERE id=${u.id};`);
});
}
} finally {
client.release();
pool.end();
}
}
async function main() {
try {
const scrapedStores = await scrapeArizonaStores();
await compareWithDatabase(scrapedStores);
} catch (error) {
console.error('Error:', error);
pool.end();
}
}
main();