Files
cannaiq/backend/archive/parse-azdhs-copied-data.ts
Kelly d91c55a344 feat: Add stale process monitor, users route, landing page, archive old scripts
- Add backend stale process monitoring API (/api/stale-processes)
- Add users management route
- Add frontend landing page and stale process monitor UI on /scraper-tools
- Move old development scripts to backend/archive/
- Update frontend build with new features

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-05 04:07:31 -07:00

205 lines
6.5 KiB
TypeScript

import { pool } from './src/db/migrate';
import * as fs from 'fs';
async function parseAZDHSCopiedData() {
console.log('📋 Parsing manually copied AZDHS data...\n');
const fileContent = fs.readFileSync('/home/kelly/Documents/azdhs dispos', 'utf-8');
const lines = fileContent.split('\n').map(l => l.trim()).filter(l => l.length > 0);
const dispensaries: any[] = [];
let i = 0;
while (i < lines.length) {
// Skip if we hit "Get Details" without processing (edge case)
if (lines[i] === 'Get Details') {
i++;
continue;
}
let name = lines[i];
let companyName = '';
let statusLine = '';
let address = '';
let linesConsumed = 0;
// Check if next line is "Get Details" (edge case: end of data)
if (i + 1 < lines.length && lines[i + 1] === 'Get Details') {
i += 2;
continue;
}
// Two possible patterns:
// Pattern 1 (5 lines): Name, Company, Status, Address, Get Details
// Pattern 2 (4 lines): Name, Status, Address, Get Details (company name = dispensary name)
// Check if line i+1 contains "Operating" (status line)
const nextLine = lines[i + 1];
if (nextLine && nextLine.includes('Operating')) {
// Pattern 2: 4-line format
companyName = name; // Company name same as dispensary name
statusLine = lines[i + 1];
address = lines[i + 2];
const getDetails = lines[i + 3];
if (getDetails !== 'Get Details') {
console.log(`⚠️ Skipping malformed 4-line record at line ${i}: ${name}`);
i++;
continue;
}
linesConsumed = 4;
} else {
// Pattern 1: 5-line format
companyName = lines[i + 1];
statusLine = lines[i + 2];
address = lines[i + 3];
const getDetails = lines[i + 4];
if (getDetails !== 'Get Details') {
console.log(`⚠️ Skipping malformed 5-line record at line ${i}: ${name}`);
i++;
continue;
}
linesConsumed = 5;
}
// Parse phone from status line
let phone = '';
const phoneMatch = statusLine.match(/(\(\d{3}\)\s*\d{3}-\d{4}|\d{3}-\d{3}-\d{4}|\d{10})/);
if (phoneMatch) {
phone = phoneMatch[1].replace(/\D/g, ''); // Remove all non-digits
}
// Parse address components
// Format: "123 Street Name, City, AZ 85001"
let street = '', city = '', state = 'AZ', zip = '';
const addressParts = address.split(',').map(p => p.trim());
if (addressParts.length >= 3) {
street = addressParts.slice(0, -2).join(', '); // Everything before city
city = addressParts[addressParts.length - 2]; // Second to last
// Last part should be "AZ 85001"
const stateZip = addressParts[addressParts.length - 1];
const stateZipMatch = stateZip.match(/([A-Z]{2})\s+(\d{5})/);
if (stateZipMatch) {
state = stateZipMatch[1];
zip = stateZipMatch[2];
}
} else if (addressParts.length === 2) {
street = addressParts[0];
const cityStateZip = addressParts[1];
// Try to extract "City, AZ 85001" from second part
const match = cityStateZip.match(/([^,]+),?\s+([A-Z]{2})\s+(\d{5})/);
if (match) {
city = match[1].trim();
state = match[2];
zip = match[3];
}
} else {
// Single part address - try best effort
street = address;
const zipMatch = address.match(/\b(\d{5})\b/);
if (zipMatch) zip = zipMatch[1];
const cityMatch = address.match(/,\s*([A-Za-z\s]+),\s*AZ/);
if (cityMatch) city = cityMatch[1].trim();
}
dispensaries.push({
name,
companyName,
phone,
street,
city,
state,
zip,
statusLine
});
// Move to next record
i += linesConsumed;
}
console.log(`✅ Parsed ${dispensaries.length} dispensaries\n`);
if (dispensaries.length > 0) {
console.log('📋 Sample of first 5:');
console.table(dispensaries.slice(0, 5).map(d => ({
name: d.name.substring(0, 30),
city: d.city,
phone: d.phone,
zip: d.zip
})));
}
// Save to database
console.log('\n💾 Saving to azdhs_list table...\n');
let savedCount = 0;
let updatedCount = 0;
let skippedCount = 0;
for (const disp of dispensaries) {
if (!disp.name || disp.name.length < 3) {
skippedCount++;
continue;
}
try {
// Check if exists by name + address + state (to handle multiple locations with same name)
const existing = await pool.query(
'SELECT id FROM azdhs_list WHERE LOWER(name) = LOWER($1) AND LOWER(address) = LOWER($2) AND state = $3',
[disp.name, disp.street, disp.state]
);
const slug = disp.name.toLowerCase().replace(/[^a-z0-9]+/g, '-');
const azdhsUrl = `https://azcarecheck.azdhs.gov/s/?name=${encodeURIComponent(disp.name)}`;
if (existing.rows.length > 0) {
await pool.query(`
UPDATE azdhs_list SET
company_name = COALESCE($1, company_name),
address = COALESCE($2, address),
city = COALESCE($3, city),
zip = COALESCE($4, zip),
phone = COALESCE($5, phone),
status_line = COALESCE($6, status_line),
updated_at = CURRENT_TIMESTAMP
WHERE id = $7
`, [disp.companyName, disp.street, disp.city, disp.zip, disp.phone, disp.statusLine, existing.rows[0].id]);
updatedCount++;
} else {
await pool.query(`
INSERT INTO azdhs_list (
name, company_name, slug, address, city, state, zip, phone, status_line, azdhs_url,
created_at, updated_at
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
`, [disp.name, disp.companyName, slug, disp.street, disp.city, disp.state, disp.zip, disp.phone, disp.statusLine, azdhsUrl]);
savedCount++;
}
} catch (error) {
console.error(`Error saving ${disp.name}: ${error}`);
skippedCount++;
}
}
console.log(`\n✅ Saved ${savedCount} new AZDHS dispensaries`);
console.log(`✅ Updated ${updatedCount} existing AZDHS dispensaries`);
if (skippedCount > 0) console.log(`⚠️ Skipped ${skippedCount} entries`);
// Show total in azdhs_list
const total = await pool.query(`SELECT COUNT(*) as total FROM azdhs_list`);
console.log(`\n🎯 Total in azdhs_list table: ${total.rows[0].total}`);
// Show total in stores (for comparison)
const storesTotal = await pool.query(`SELECT COUNT(*) as total FROM stores WHERE state = 'AZ'`);
console.log(`🎯 Total in stores table (AZ): ${storesTotal.rows[0].total}`);
await pool.end();
}
parseAZDHSCopiedData();