feat: Add stale process monitor, users route, landing page, archive old scripts

- Add backend stale process monitoring API (/api/stale-processes)
- Add users management route
- Add frontend landing page and stale process monitor UI on /scraper-tools
- Move old development scripts to backend/archive/
- Update frontend build with new features

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-05 04:07:31 -07:00
parent d2d44d2aeb
commit d91c55a344
3115 changed files with 5755 additions and 719 deletions

View File

@@ -0,0 +1,184 @@
import { chromium } from 'playwright-extra';
import stealth from 'puppeteer-extra-plugin-stealth';
import { pool } from './src/db/migrate';
chromium.use(stealth());
interface MapDispensary {
name: string;
address?: string;
city?: string;
state: string;
zip?: string;
latitude?: number;
longitude?: number;
phone?: string;
website?: string;
}
async function scrapeLeaflyMap() {
console.log('🗺️ Scraping dispensaries from Leafly Arizona map...\n');
const browser = await chromium.launch({
headless: false, // Show browser to see what's happening
});
const context = await browser.newContext({
viewport: { width: 1920, height: 1080 },
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
});
const page = await context.newPage();
try {
console.log('📄 Loading Leafly Arizona page...');
await page.goto('https://www.leafly.com/dispensaries/arizona', {
waitUntil: 'networkidle',
timeout: 60000
});
// Wait for page to fully load
await page.waitForTimeout(5000);
// Try to extract data from window object or JSON-LD
const mapData = await page.evaluate(() => {
const dispensaries: any[] = [];
// Method 1: Check for JSON-LD structured data
const scripts = document.querySelectorAll('script[type="application/ld+json"]');
scripts.forEach(script => {
try {
const data = JSON.parse(script.textContent || '');
if (data && typeof data === 'object') {
console.log('Found JSON-LD:', Object.keys(data));
}
} catch (e) {
// Skip invalid JSON
}
});
// Method 2: Check window object for data
const windowKeys = Object.keys(window).filter(key =>
key.toLowerCase().includes('store') ||
key.toLowerCase().includes('dispensar') ||
key.toLowerCase().includes('location') ||
key.toLowerCase().includes('map') ||
key.toLowerCase().includes('data')
);
console.log('Interesting window keys:', windowKeys);
// Method 3: Check for __NEXT_DATA__ (Next.js apps often use this)
const nextData = document.getElementById('__NEXT_DATA__');
if (nextData) {
try {
const data = JSON.parse(nextData.textContent || '');
console.log('Found __NEXT_DATA__:', Object.keys(data));
// Navigate through the data to find dispensaries
const pageProps = data?.props?.pageProps;
if (pageProps) {
console.log('PageProps keys:', Object.keys(pageProps));
// Common patterns for store data
if (pageProps.stores) {
console.log('Found stores array:', pageProps.stores.length);
return pageProps.stores;
}
if (pageProps.dispensaries) {
console.log('Found dispensaries array:', pageProps.dispensaries.length);
return pageProps.dispensaries;
}
if (pageProps.locations) {
console.log('Found locations array:', pageProps.locations.length);
return pageProps.locations;
}
if (pageProps.initialData) {
console.log('Found initialData:', Object.keys(pageProps.initialData));
return pageProps.initialData;
}
}
} catch (e) {
console.error('Error parsing __NEXT_DATA__:', e);
}
}
// Method 4: Check for map markers
const markers = document.querySelectorAll('[class*="marker"], [class*="pin"], [data-marker]');
console.log('Found map markers:', markers.length);
return dispensaries;
});
console.log('\n📊 Map data extracted:');
console.log(JSON.stringify(mapData, null, 2));
// If we found structured data, process it
if (Array.isArray(mapData) && mapData.length > 0) {
console.log(`\n✅ Found ${mapData.length} dispensaries from map data`);
let savedCount = 0;
let updatedCount = 0;
for (const dispensary of mapData) {
try {
const name = dispensary.name || dispensary.storeName || dispensary.title;
const address = dispensary.address || dispensary.streetAddress;
const city = dispensary.city || dispensary.locality;
const state = dispensary.state || dispensary.region || 'AZ';
const zip = dispensary.zip || dispensary.postalCode;
const lat = dispensary.latitude || dispensary.lat;
const lng = dispensary.longitude || dispensary.lng || dispensary.lon;
const phone = dispensary.phone || dispensary.telephone;
const website = dispensary.website || dispensary.url;
if (!name) continue;
// Check if exists
const existing = await pool.query(
'SELECT id FROM stores WHERE LOWER(name) = LOWER($1) AND state = $2',
[name, state]
);
if (existing.rows.length > 0) {
await pool.query(`
UPDATE stores SET
address = COALESCE($1, address),
city = COALESCE($2, city),
zip = COALESCE($3, zip),
phone = COALESCE($4, phone),
website = COALESCE($5, website),
latitude = COALESCE($6, latitude),
longitude = COALESCE($7, longitude),
updated_at = CURRENT_TIMESTAMP
WHERE id = $8
`, [address, city, zip, phone, website, lat, lng, existing.rows[0].id]);
updatedCount++;
} else {
await pool.query(`
INSERT INTO stores (
name, address, city, state, zip, phone, website,
latitude, longitude, active, created_at, updated_at
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, true, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
`, [name, address, city, state, zip, phone, website, lat, lng]);
savedCount++;
}
} catch (error) {
console.error(`Error saving dispensary: ${error}`);
}
}
console.log(`\n✅ Saved ${savedCount} new dispensaries`);
console.log(`✅ Updated ${updatedCount} existing dispensaries`);
}
} catch (error) {
console.error(`Error: ${error}`);
throw error;
} finally {
await browser.close();
await pool.end();
}
}
scrapeLeaflyMap();