- Add backend stale process monitoring API (/api/stale-processes) - Add users management route - Add frontend landing page and stale process monitor UI on /scraper-tools - Move old development scripts to backend/archive/ - Update frontend build with new features 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
109 lines
3.4 KiB
TypeScript
109 lines
3.4 KiB
TypeScript
import { chromium } from 'playwright-extra';
|
|
import stealth from 'puppeteer-extra-plugin-stealth';
|
|
import { pool } from './src/db/migrate';
|
|
|
|
chromium.use(stealth());
|
|
|
|
async function scrapeAZDHSBetter() {
|
|
console.log('🏛️ Scraping AZDHS official map (improved approach)...\n');
|
|
|
|
const browser = await chromium.launch({
|
|
headless: false,
|
|
});
|
|
|
|
const context = await browser.newContext({
|
|
viewport: { width: 1920, height: 1080 },
|
|
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
});
|
|
|
|
const page = await context.newPage();
|
|
|
|
// Capture API requests
|
|
const apiData: any[] = [];
|
|
page.on('response', async (response) => {
|
|
const url = response.url();
|
|
if (url.includes('dispensar') || url.includes('facility') || url.includes('location')) {
|
|
try {
|
|
const json = await response.json();
|
|
console.log(`📡 Captured API response from: ${url.substring(0, 100)}...`);
|
|
apiData.push({ url, data: json });
|
|
} catch (e) {
|
|
// Not JSON
|
|
}
|
|
}
|
|
});
|
|
|
|
try {
|
|
console.log('📄 Loading AZDHS page (waiting up to 60s for JavaScript)...');
|
|
|
|
await page.goto('https://azcarecheck.azdhs.gov/s/?facilityId=001t000000L0TApAAN', {
|
|
waitUntil: 'domcontentloaded',
|
|
timeout: 60000
|
|
});
|
|
|
|
// Wait longer for JavaScript to execute
|
|
console.log('⏳ Waiting 20 seconds for Salesforce to fully load...');
|
|
await page.waitForTimeout(20000);
|
|
|
|
// Try to find and click "View All" or expand the map
|
|
console.log('🔍 Looking for buttons to expand results...');
|
|
|
|
const viewAllButton = page.locator('button:has-text("View All"), button:has-text("Show All"), a:has-text("View All")').first();
|
|
if (await viewAllButton.isVisible().catch(() => false)) {
|
|
console.log(' ✅ Found View All button, clicking...');
|
|
await viewAllButton.click();
|
|
await page.waitForTimeout(5000);
|
|
}
|
|
|
|
// Try extracting data directly from page
|
|
console.log('\n📦 Extracting dispensary data from page...');
|
|
|
|
const dispensaries = await page.evaluate(() => {
|
|
const results: any[] = [];
|
|
|
|
// Look for various data patterns
|
|
const elements = document.querySelectorAll('[data-facility], [data-location], article, .facility, .location, .dispensary');
|
|
|
|
elements.forEach((el) => {
|
|
const text = el.textContent || '';
|
|
|
|
// Try to extract structured data
|
|
if (text.length > 20 && text.length < 500) {
|
|
// Look for name patterns
|
|
const nameMatch = text.match(/([A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,5})/);
|
|
if (nameMatch) {
|
|
results.push({
|
|
rawText: text.substring(0, 200),
|
|
element: el.className,
|
|
});
|
|
}
|
|
}
|
|
});
|
|
|
|
return results;
|
|
});
|
|
|
|
console.log(`\n📊 Found ${dispensaries.length} potential dispensary elements`);
|
|
console.log(`📊 Captured ${apiData.length} API responses`);
|
|
|
|
if (apiData.length > 0) {
|
|
console.log('\n🎯 Analyzing API data...');
|
|
console.log(JSON.stringify(apiData[0], null, 2).substring(0, 1000));
|
|
}
|
|
|
|
if (dispensaries.length > 0) {
|
|
console.log('\n📋 Sample dispensary elements:');
|
|
console.log(dispensaries.slice(0, 3));
|
|
}
|
|
|
|
} catch (error) {
|
|
console.error(`❌ Error: ${error}`);
|
|
throw error;
|
|
} finally {
|
|
await browser.close();
|
|
await pool.end();
|
|
}
|
|
}
|
|
|
|
scrapeAZDHSBetter();
|