Files
cannaiq/backend/verify-curaleaf-urls.js
2025-11-28 19:45:44 -07:00

170 lines
4.9 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
const puppeteer = require('puppeteer-extra');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
const { Pool } = require('pg');
puppeteer.use(StealthPlugin());
const pool = new Pool({
connectionString: 'postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus'
});
async function scrapeArizonaStores() {
const browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
try {
const page = await browser.newPage();
// Set a desktop user agent
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
console.log('Navigating to Curaleaf stores page...');
await page.goto('https://curaleaf.com/stores/', {
waitUntil: 'networkidle2',
timeout: 30000
});
// Wait a bit for any dynamic content
await page.waitForTimeout(3000);
// Try to find Arizona stores
const stores = await page.evaluate(() => {
const results = [];
// Look for links that contain 'arizona' or 'az' in the URL
const links = Array.from(document.querySelectorAll('a[href*="/stores/"]'));
for (const link of links) {
const href = link.href;
const text = link.textContent.trim();
// Check if it's an Arizona store
if (href.includes('/stores/curaleaf') &&
(href.toLowerCase().includes('-az-') ||
href.toLowerCase().includes('arizona') ||
text.toLowerCase().includes('arizona') ||
text.toLowerCase().includes(', az'))) {
// Extract slug from URL
const match = href.match(/\/stores\/([^\/\?#]+)/);
if (match) {
results.push({
name: text,
slug: match[1],
url: href.split('?')[0].split('#')[0] // Remove query params and hash
});
}
}
}
return results;
});
console.log(`\nFound ${stores.length} Arizona stores on Curaleaf website:\n`);
// Remove duplicates based on slug
const uniqueStores = Array.from(
new Map(stores.map(s => [s.slug, s])).values()
);
uniqueStores.forEach((store, i) => {
console.log(`${i + 1}. ${store.name}`);
console.log(` Slug: ${store.slug}`);
console.log(` URL: ${store.url}\n`);
});
return uniqueStores;
} finally {
await browser.close();
}
}
async function compareWithDatabase(scrapedStores) {
const client = await pool.connect();
try {
// Get current stores from database
const result = await client.query(
"SELECT id, name, slug, dutchie_url FROM stores WHERE slug LIKE 'curaleaf%' AND slug LIKE '%az%' OR slug LIKE 'curaleaf-dispensary%'"
);
const dbStores = result.rows;
console.log('\n=== COMPARISON ===\n');
// Create maps for easy lookup
const scrapedMap = new Map(scrapedStores.map(s => [s.slug, s]));
const dbMap = new Map(dbStores.map(s => [s.slug, s]));
// Find stores that need updating
const updates = [];
for (const dbStore of dbStores) {
const scraped = scrapedMap.get(dbStore.slug);
if (!scraped) {
// Store in DB but not found on website
console.log(`⚠️ "${dbStore.name}" (${dbStore.slug}) - NOT FOUND on website`);
// Try to find by matching name
const matchByName = scrapedStores.find(s =>
s.name.toLowerCase().includes(dbStore.name.toLowerCase().replace('curaleaf - ', ''))
);
if (matchByName) {
console.log(` → Possible match: ${matchByName.slug}`);
updates.push({
id: dbStore.id,
oldSlug: dbStore.slug,
newSlug: matchByName.slug,
newUrl: matchByName.url,
name: dbStore.name
});
}
} else {
// Check if URL matches
if (dbStore.dutchie_url !== scraped.url) {
console.log(`✏️ "${dbStore.name}" - URL mismatch`);
console.log(` DB: ${dbStore.dutchie_url}`);
console.log(` Web: ${scraped.url}`);
} else {
console.log(`✅ "${dbStore.name}" - correct`);
}
}
}
// Find stores on website but not in DB
for (const scraped of scrapedStores) {
if (!dbMap.has(scraped.slug)) {
console.log(` "${scraped.name}" (${scraped.slug}) - ON WEBSITE but not in DB`);
}
}
if (updates.length > 0) {
console.log(`\n\nFound ${updates.length} stores that need updating. Apply updates? (This is a dry run, updates not applied)`);
updates.forEach(u => {
console.log(`\nUPDATE stores SET slug='${u.newSlug}', dutchie_url='${u.newUrl}' WHERE id=${u.id};`);
});
}
} finally {
client.release();
pool.end();
}
}
async function main() {
try {
const scrapedStores = await scrapeArizonaStores();
await compareWithDatabase(scrapedStores);
} catch (error) {
console.error('Error:', error);
pool.end();
}
}
main();