const puppeteer = require('puppeteer-extra'); const StealthPlugin = require('puppeteer-extra-plugin-stealth'); const { Pool } = require('pg'); puppeteer.use(StealthPlugin()); const pool = new Pool({ connectionString: 'postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus' }); async function scrapeArizonaStores() { const browser = await puppeteer.launch({ headless: 'new', args: ['--no-sandbox', '--disable-setuid-sandbox'] }); try { const page = await browser.newPage(); // Set a desktop user agent await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'); console.log('Navigating to Curaleaf stores page...'); await page.goto('https://curaleaf.com/stores/', { waitUntil: 'networkidle2', timeout: 30000 }); // Wait a bit for any dynamic content await page.waitForTimeout(3000); // Try to find Arizona stores const stores = await page.evaluate(() => { const results = []; // Look for links that contain 'arizona' or 'az' in the URL const links = Array.from(document.querySelectorAll('a[href*="/stores/"]')); for (const link of links) { const href = link.href; const text = link.textContent.trim(); // Check if it's an Arizona store if (href.includes('/stores/curaleaf') && (href.toLowerCase().includes('-az-') || href.toLowerCase().includes('arizona') || text.toLowerCase().includes('arizona') || text.toLowerCase().includes(', az'))) { // Extract slug from URL const match = href.match(/\/stores\/([^\/\?#]+)/); if (match) { results.push({ name: text, slug: match[1], url: href.split('?')[0].split('#')[0] // Remove query params and hash }); } } } return results; }); console.log(`\nFound ${stores.length} Arizona stores on Curaleaf website:\n`); // Remove duplicates based on slug const uniqueStores = Array.from( new Map(stores.map(s => [s.slug, s])).values() ); uniqueStores.forEach((store, i) => { console.log(`${i + 1}. ${store.name}`); console.log(` Slug: ${store.slug}`); console.log(` URL: ${store.url}\n`); }); return uniqueStores; } finally { await browser.close(); } } async function compareWithDatabase(scrapedStores) { const client = await pool.connect(); try { // Get current stores from database const result = await client.query( "SELECT id, name, slug, dutchie_url FROM stores WHERE slug LIKE 'curaleaf%' AND slug LIKE '%az%' OR slug LIKE 'curaleaf-dispensary%'" ); const dbStores = result.rows; console.log('\n=== COMPARISON ===\n'); // Create maps for easy lookup const scrapedMap = new Map(scrapedStores.map(s => [s.slug, s])); const dbMap = new Map(dbStores.map(s => [s.slug, s])); // Find stores that need updating const updates = []; for (const dbStore of dbStores) { const scraped = scrapedMap.get(dbStore.slug); if (!scraped) { // Store in DB but not found on website console.log(`⚠️ "${dbStore.name}" (${dbStore.slug}) - NOT FOUND on website`); // Try to find by matching name const matchByName = scrapedStores.find(s => s.name.toLowerCase().includes(dbStore.name.toLowerCase().replace('curaleaf - ', '')) ); if (matchByName) { console.log(` → Possible match: ${matchByName.slug}`); updates.push({ id: dbStore.id, oldSlug: dbStore.slug, newSlug: matchByName.slug, newUrl: matchByName.url, name: dbStore.name }); } } else { // Check if URL matches if (dbStore.dutchie_url !== scraped.url) { console.log(`✏️ "${dbStore.name}" - URL mismatch`); console.log(` DB: ${dbStore.dutchie_url}`); console.log(` Web: ${scraped.url}`); } else { console.log(`✅ "${dbStore.name}" - correct`); } } } // Find stores on website but not in DB for (const scraped of scrapedStores) { if (!dbMap.has(scraped.slug)) { console.log(`➕ "${scraped.name}" (${scraped.slug}) - ON WEBSITE but not in DB`); } } if (updates.length > 0) { console.log(`\n\nFound ${updates.length} stores that need updating. Apply updates? (This is a dry run, updates not applied)`); updates.forEach(u => { console.log(`\nUPDATE stores SET slug='${u.newSlug}', dutchie_url='${u.newUrl}' WHERE id=${u.id};`); }); } } finally { client.release(); pool.end(); } } async function main() { try { const scrapedStores = await scrapeArizonaStores(); await compareWithDatabase(scrapedStores); } catch (error) { console.error('Error:', error); pool.end(); } } main();