feat: Add stale process monitor, users route, landing page, archive old scripts
- Add backend stale process monitoring API (/api/stale-processes) - Add users management route - Add frontend landing page and stale process monitor UI on /scraper-tools - Move old development scripts to backend/archive/ - Update frontend build with new features 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
184
backend/archive/scrape-leafly-map.ts
Normal file
184
backend/archive/scrape-leafly-map.ts
Normal file
@@ -0,0 +1,184 @@
|
||||
import { chromium } from 'playwright-extra';
|
||||
import stealth from 'puppeteer-extra-plugin-stealth';
|
||||
import { pool } from './src/db/migrate';
|
||||
|
||||
chromium.use(stealth());
|
||||
|
||||
interface MapDispensary {
|
||||
name: string;
|
||||
address?: string;
|
||||
city?: string;
|
||||
state: string;
|
||||
zip?: string;
|
||||
latitude?: number;
|
||||
longitude?: number;
|
||||
phone?: string;
|
||||
website?: string;
|
||||
}
|
||||
|
||||
async function scrapeLeaflyMap() {
|
||||
console.log('🗺️ Scraping dispensaries from Leafly Arizona map...\n');
|
||||
|
||||
const browser = await chromium.launch({
|
||||
headless: false, // Show browser to see what's happening
|
||||
});
|
||||
|
||||
const context = await browser.newContext({
|
||||
viewport: { width: 1920, height: 1080 },
|
||||
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
});
|
||||
|
||||
const page = await context.newPage();
|
||||
|
||||
try {
|
||||
console.log('📄 Loading Leafly Arizona page...');
|
||||
|
||||
await page.goto('https://www.leafly.com/dispensaries/arizona', {
|
||||
waitUntil: 'networkidle',
|
||||
timeout: 60000
|
||||
});
|
||||
|
||||
// Wait for page to fully load
|
||||
await page.waitForTimeout(5000);
|
||||
|
||||
// Try to extract data from window object or JSON-LD
|
||||
const mapData = await page.evaluate(() => {
|
||||
const dispensaries: any[] = [];
|
||||
|
||||
// Method 1: Check for JSON-LD structured data
|
||||
const scripts = document.querySelectorAll('script[type="application/ld+json"]');
|
||||
scripts.forEach(script => {
|
||||
try {
|
||||
const data = JSON.parse(script.textContent || '');
|
||||
if (data && typeof data === 'object') {
|
||||
console.log('Found JSON-LD:', Object.keys(data));
|
||||
}
|
||||
} catch (e) {
|
||||
// Skip invalid JSON
|
||||
}
|
||||
});
|
||||
|
||||
// Method 2: Check window object for data
|
||||
const windowKeys = Object.keys(window).filter(key =>
|
||||
key.toLowerCase().includes('store') ||
|
||||
key.toLowerCase().includes('dispensar') ||
|
||||
key.toLowerCase().includes('location') ||
|
||||
key.toLowerCase().includes('map') ||
|
||||
key.toLowerCase().includes('data')
|
||||
);
|
||||
console.log('Interesting window keys:', windowKeys);
|
||||
|
||||
// Method 3: Check for __NEXT_DATA__ (Next.js apps often use this)
|
||||
const nextData = document.getElementById('__NEXT_DATA__');
|
||||
if (nextData) {
|
||||
try {
|
||||
const data = JSON.parse(nextData.textContent || '');
|
||||
console.log('Found __NEXT_DATA__:', Object.keys(data));
|
||||
|
||||
// Navigate through the data to find dispensaries
|
||||
const pageProps = data?.props?.pageProps;
|
||||
if (pageProps) {
|
||||
console.log('PageProps keys:', Object.keys(pageProps));
|
||||
|
||||
// Common patterns for store data
|
||||
if (pageProps.stores) {
|
||||
console.log('Found stores array:', pageProps.stores.length);
|
||||
return pageProps.stores;
|
||||
}
|
||||
if (pageProps.dispensaries) {
|
||||
console.log('Found dispensaries array:', pageProps.dispensaries.length);
|
||||
return pageProps.dispensaries;
|
||||
}
|
||||
if (pageProps.locations) {
|
||||
console.log('Found locations array:', pageProps.locations.length);
|
||||
return pageProps.locations;
|
||||
}
|
||||
if (pageProps.initialData) {
|
||||
console.log('Found initialData:', Object.keys(pageProps.initialData));
|
||||
return pageProps.initialData;
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Error parsing __NEXT_DATA__:', e);
|
||||
}
|
||||
}
|
||||
|
||||
// Method 4: Check for map markers
|
||||
const markers = document.querySelectorAll('[class*="marker"], [class*="pin"], [data-marker]');
|
||||
console.log('Found map markers:', markers.length);
|
||||
|
||||
return dispensaries;
|
||||
});
|
||||
|
||||
console.log('\n📊 Map data extracted:');
|
||||
console.log(JSON.stringify(mapData, null, 2));
|
||||
|
||||
// If we found structured data, process it
|
||||
if (Array.isArray(mapData) && mapData.length > 0) {
|
||||
console.log(`\n✅ Found ${mapData.length} dispensaries from map data`);
|
||||
|
||||
let savedCount = 0;
|
||||
let updatedCount = 0;
|
||||
|
||||
for (const dispensary of mapData) {
|
||||
try {
|
||||
const name = dispensary.name || dispensary.storeName || dispensary.title;
|
||||
const address = dispensary.address || dispensary.streetAddress;
|
||||
const city = dispensary.city || dispensary.locality;
|
||||
const state = dispensary.state || dispensary.region || 'AZ';
|
||||
const zip = dispensary.zip || dispensary.postalCode;
|
||||
const lat = dispensary.latitude || dispensary.lat;
|
||||
const lng = dispensary.longitude || dispensary.lng || dispensary.lon;
|
||||
const phone = dispensary.phone || dispensary.telephone;
|
||||
const website = dispensary.website || dispensary.url;
|
||||
|
||||
if (!name) continue;
|
||||
|
||||
// Check if exists
|
||||
const existing = await pool.query(
|
||||
'SELECT id FROM stores WHERE LOWER(name) = LOWER($1) AND state = $2',
|
||||
[name, state]
|
||||
);
|
||||
|
||||
if (existing.rows.length > 0) {
|
||||
await pool.query(`
|
||||
UPDATE stores SET
|
||||
address = COALESCE($1, address),
|
||||
city = COALESCE($2, city),
|
||||
zip = COALESCE($3, zip),
|
||||
phone = COALESCE($4, phone),
|
||||
website = COALESCE($5, website),
|
||||
latitude = COALESCE($6, latitude),
|
||||
longitude = COALESCE($7, longitude),
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
WHERE id = $8
|
||||
`, [address, city, zip, phone, website, lat, lng, existing.rows[0].id]);
|
||||
updatedCount++;
|
||||
} else {
|
||||
await pool.query(`
|
||||
INSERT INTO stores (
|
||||
name, address, city, state, zip, phone, website,
|
||||
latitude, longitude, active, created_at, updated_at
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, true, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
|
||||
`, [name, address, city, state, zip, phone, website, lat, lng]);
|
||||
savedCount++;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error saving dispensary: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n✅ Saved ${savedCount} new dispensaries`);
|
||||
console.log(`✅ Updated ${updatedCount} existing dispensaries`);
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error(`Error: ${error}`);
|
||||
throw error;
|
||||
} finally {
|
||||
await browser.close();
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
scrapeLeaflyMap();
|
||||
Reference in New Issue
Block a user