feat: Add stale process monitor, users route, landing page, archive old scripts
- Add backend stale process monitoring API (/api/stale-processes) - Add users management route - Add frontend landing page and stale process monitor UI on /scraper-tools - Move old development scripts to backend/archive/ - Update frontend build with new features 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
108
backend/archive/scrape-azdhs-better.ts
Normal file
108
backend/archive/scrape-azdhs-better.ts
Normal file
@@ -0,0 +1,108 @@
|
||||
import { chromium } from 'playwright-extra';
|
||||
import stealth from 'puppeteer-extra-plugin-stealth';
|
||||
import { pool } from './src/db/migrate';
|
||||
|
||||
chromium.use(stealth());
|
||||
|
||||
async function scrapeAZDHSBetter() {
|
||||
console.log('🏛️ Scraping AZDHS official map (improved approach)...\n');
|
||||
|
||||
const browser = await chromium.launch({
|
||||
headless: false,
|
||||
});
|
||||
|
||||
const context = await browser.newContext({
|
||||
viewport: { width: 1920, height: 1080 },
|
||||
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
});
|
||||
|
||||
const page = await context.newPage();
|
||||
|
||||
// Capture API requests
|
||||
const apiData: any[] = [];
|
||||
page.on('response', async (response) => {
|
||||
const url = response.url();
|
||||
if (url.includes('dispensar') || url.includes('facility') || url.includes('location')) {
|
||||
try {
|
||||
const json = await response.json();
|
||||
console.log(`📡 Captured API response from: ${url.substring(0, 100)}...`);
|
||||
apiData.push({ url, data: json });
|
||||
} catch (e) {
|
||||
// Not JSON
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
console.log('📄 Loading AZDHS page (waiting up to 60s for JavaScript)...');
|
||||
|
||||
await page.goto('https://azcarecheck.azdhs.gov/s/?facilityId=001t000000L0TApAAN', {
|
||||
waitUntil: 'domcontentloaded',
|
||||
timeout: 60000
|
||||
});
|
||||
|
||||
// Wait longer for JavaScript to execute
|
||||
console.log('⏳ Waiting 20 seconds for Salesforce to fully load...');
|
||||
await page.waitForTimeout(20000);
|
||||
|
||||
// Try to find and click "View All" or expand the map
|
||||
console.log('🔍 Looking for buttons to expand results...');
|
||||
|
||||
const viewAllButton = page.locator('button:has-text("View All"), button:has-text("Show All"), a:has-text("View All")').first();
|
||||
if (await viewAllButton.isVisible().catch(() => false)) {
|
||||
console.log(' ✅ Found View All button, clicking...');
|
||||
await viewAllButton.click();
|
||||
await page.waitForTimeout(5000);
|
||||
}
|
||||
|
||||
// Try extracting data directly from page
|
||||
console.log('\n📦 Extracting dispensary data from page...');
|
||||
|
||||
const dispensaries = await page.evaluate(() => {
|
||||
const results: any[] = [];
|
||||
|
||||
// Look for various data patterns
|
||||
const elements = document.querySelectorAll('[data-facility], [data-location], article, .facility, .location, .dispensary');
|
||||
|
||||
elements.forEach((el) => {
|
||||
const text = el.textContent || '';
|
||||
|
||||
// Try to extract structured data
|
||||
if (text.length > 20 && text.length < 500) {
|
||||
// Look for name patterns
|
||||
const nameMatch = text.match(/([A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,5})/);
|
||||
if (nameMatch) {
|
||||
results.push({
|
||||
rawText: text.substring(0, 200),
|
||||
element: el.className,
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return results;
|
||||
});
|
||||
|
||||
console.log(`\n📊 Found ${dispensaries.length} potential dispensary elements`);
|
||||
console.log(`📊 Captured ${apiData.length} API responses`);
|
||||
|
||||
if (apiData.length > 0) {
|
||||
console.log('\n🎯 Analyzing API data...');
|
||||
console.log(JSON.stringify(apiData[0], null, 2).substring(0, 1000));
|
||||
}
|
||||
|
||||
if (dispensaries.length > 0) {
|
||||
console.log('\n📋 Sample dispensary elements:');
|
||||
console.log(dispensaries.slice(0, 3));
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error(`❌ Error: ${error}`);
|
||||
throw error;
|
||||
} finally {
|
||||
await browser.close();
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
scrapeAZDHSBetter();
|
||||
Reference in New Issue
Block a user