- Add backend stale process monitoring API (/api/stale-processes) - Add users management route - Add frontend landing page and stale process monitor UI on /scraper-tools - Move old development scripts to backend/archive/ - Update frontend build with new features 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
213 lines
7.4 KiB
TypeScript
213 lines
7.4 KiB
TypeScript
import { chromium } from 'playwright-extra';
|
|
import stealth from 'puppeteer-extra-plugin-stealth';
|
|
import { pool } from './src/db/migrate';
|
|
|
|
chromium.use(stealth());
|
|
|
|
async function scrapeAZDHSAPI() {
|
|
console.log('🏛️ Scraping AZDHS via API interception...\n');
|
|
|
|
const browser = await chromium.launch({
|
|
headless: false,
|
|
});
|
|
|
|
const context = await browser.newContext({
|
|
viewport: { width: 1920, height: 1080 },
|
|
});
|
|
|
|
const page = await context.newPage();
|
|
|
|
// Capture ALL API responses
|
|
const allResponses: any[] = [];
|
|
|
|
page.on('response', async (response) => {
|
|
const url = response.url();
|
|
const contentType = response.headers()['content-type'] || '';
|
|
|
|
// Only capture JSON responses from azcarecheck domain
|
|
if (url.includes('azcarecheck.azdhs.gov') && contentType.includes('json')) {
|
|
try {
|
|
const json = await response.json();
|
|
console.log(`📡 Captured JSON from: ${url.substring(0, 80)}...`);
|
|
allResponses.push({
|
|
url,
|
|
data: json,
|
|
status: response.status()
|
|
});
|
|
} catch (e) {
|
|
// Not valid JSON or couldn't parse
|
|
}
|
|
}
|
|
});
|
|
|
|
try {
|
|
console.log('📄 Loading AZDHS page...');
|
|
|
|
await page.goto('https://azcarecheck.azdhs.gov/s/?facilityId=001t000000L0TApAAN', {
|
|
waitUntil: 'networkidle',
|
|
timeout: 120000
|
|
});
|
|
|
|
console.log('⏳ Waiting 60 seconds to capture all API calls...\n');
|
|
await page.waitForTimeout(60000);
|
|
|
|
console.log(`\n📊 Captured ${allResponses.length} JSON API responses\n`);
|
|
|
|
// Analyze responses to find dispensary data
|
|
let dispensaryData: any[] = [];
|
|
|
|
for (const resp of allResponses) {
|
|
const data = resp.data;
|
|
|
|
// Look for arrays that might contain dispensary data
|
|
const checkForDispensaries = (obj: any, path = ''): any[] => {
|
|
if (Array.isArray(obj) && obj.length > 50) {
|
|
// Check if array items look like dispensaries
|
|
const sample = obj[0];
|
|
if (sample && typeof sample === 'object') {
|
|
const keys = Object.keys(sample);
|
|
if (keys.some(k => k.toLowerCase().includes('name') ||
|
|
k.toLowerCase().includes('address') ||
|
|
k.toLowerCase().includes('facility'))) {
|
|
console.log(` ✅ Found potential dispensary array at ${path} with ${obj.length} items`);
|
|
console.log(` Sample keys: ${keys.slice(0, 10).join(', ')}`);
|
|
return obj;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (typeof obj === 'object' && obj !== null) {
|
|
for (const [key, value] of Object.entries(obj)) {
|
|
const result = checkForDispensaries(value, `${path}.${key}`);
|
|
if (result.length > 0) return result;
|
|
}
|
|
}
|
|
|
|
return [];
|
|
};
|
|
|
|
const found = checkForDispensaries(data);
|
|
if (found.length > 0) {
|
|
dispensaryData = found;
|
|
console.log(`\n🎯 Found dispensary data! ${found.length} entries`);
|
|
console.log(` URL: ${resp.url}\n`);
|
|
|
|
// Show sample of first entry
|
|
console.log('📋 Sample entry:');
|
|
console.log(JSON.stringify(found[0], null, 2).substring(0, 500));
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (dispensaryData.length === 0) {
|
|
console.log('❌ Could not find dispensary data in API responses\n');
|
|
console.log('🔍 All captured URLs:');
|
|
allResponses.forEach((r, i) => {
|
|
console.log(` ${i + 1}. ${r.url}`);
|
|
});
|
|
|
|
// Save raw responses for manual inspection
|
|
console.log('\n💾 Saving raw API responses to /tmp/azdhs-api-responses.json for inspection...');
|
|
const fs = require('fs');
|
|
fs.writeFileSync('/tmp/azdhs-api-responses.json', JSON.stringify(allResponses, null, 2));
|
|
|
|
await browser.close();
|
|
await pool.end();
|
|
return;
|
|
}
|
|
|
|
// Save to database
|
|
console.log('\n💾 Saving AZDHS dispensaries to database...\n');
|
|
|
|
let savedCount = 0;
|
|
let updatedCount = 0;
|
|
let skippedCount = 0;
|
|
|
|
for (const item of dispensaryData) {
|
|
try {
|
|
// Extract fields - need to inspect the actual structure
|
|
// Common Salesforce field patterns: Name, Name__c, FacilityName, etc.
|
|
const name = item.Name || item.name || item.FacilityName || item.facility_name ||
|
|
item.Name__c || item.dispensaryName || item.BusinessName;
|
|
|
|
const address = item.Address || item.address || item.Street || item.street ||
|
|
item.Address__c || item.StreetAddress || item.street_address;
|
|
|
|
const city = item.City || item.city || item.City__c;
|
|
const state = item.State || item.state || item.State__c || 'AZ';
|
|
const zip = item.Zip || item.zip || item.ZipCode || item.zip_code || item.PostalCode || item.Zip__c;
|
|
const phone = item.Phone || item.phone || item.PhoneNumber || item.phone_number || item.Phone__c;
|
|
const email = item.Email || item.email || item.Email__c;
|
|
const lat = item.Latitude || item.latitude || item.lat || item.Latitude__c;
|
|
const lng = item.Longitude || item.longitude || item.lng || item.lon || item.Longitude__c;
|
|
|
|
if (!name || name.length < 3) {
|
|
skippedCount++;
|
|
continue;
|
|
}
|
|
|
|
// Check if exists
|
|
const existing = await pool.query(
|
|
'SELECT id FROM stores WHERE LOWER(name) = LOWER($1) AND state = $2 AND data_source = $3',
|
|
[name, state, 'azdhs']
|
|
);
|
|
|
|
const slug = name.toLowerCase().replace(/[^a-z0-9]+/g, '-');
|
|
const dutchieUrl = `https://azcarecheck.azdhs.gov/s/?name=${encodeURIComponent(name)}`;
|
|
|
|
if (existing.rows.length > 0) {
|
|
await pool.query(`
|
|
UPDATE stores SET
|
|
address = COALESCE($1, address),
|
|
city = COALESCE($2, city),
|
|
zip = COALESCE($3, zip),
|
|
phone = COALESCE($4, phone),
|
|
email = COALESCE($5, email),
|
|
latitude = COALESCE($6, latitude),
|
|
longitude = COALESCE($7, longitude),
|
|
updated_at = CURRENT_TIMESTAMP
|
|
WHERE id = $8
|
|
`, [address, city, zip, phone, email, lat, lng, existing.rows[0].id]);
|
|
updatedCount++;
|
|
} else {
|
|
await pool.query(`
|
|
INSERT INTO stores (
|
|
name, slug, dutchie_url, address, city, state, zip, phone, email,
|
|
latitude, longitude, data_source, active, created_at, updated_at
|
|
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, 'azdhs', true, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
|
|
`, [name, slug, dutchieUrl, address, city, state, zip, phone, email, lat, lng]);
|
|
savedCount++;
|
|
}
|
|
} catch (error) {
|
|
console.error(`Error saving: ${error}`);
|
|
skippedCount++;
|
|
}
|
|
}
|
|
|
|
console.log(`\n✅ Saved ${savedCount} new AZDHS dispensaries`);
|
|
console.log(`✅ Updated ${updatedCount} existing AZDHS dispensaries`);
|
|
if (skippedCount > 0) console.log(`⚠️ Skipped ${skippedCount} entries`);
|
|
|
|
// Show totals by source
|
|
const totals = await pool.query(`
|
|
SELECT data_source, COUNT(*) as count
|
|
FROM stores
|
|
WHERE state = 'AZ'
|
|
GROUP BY data_source
|
|
ORDER BY data_source
|
|
`);
|
|
|
|
console.log('\n📊 Arizona dispensaries by source:');
|
|
console.table(totals.rows);
|
|
|
|
} catch (error) {
|
|
console.error(`❌ Error: ${error}`);
|
|
throw error;
|
|
} finally {
|
|
await browser.close();
|
|
await pool.end();
|
|
}
|
|
}
|
|
|
|
scrapeAZDHSAPI();
|