feat: Add stale process monitor, users route, landing page, archive old scripts
- Add backend stale process monitoring API (/api/stale-processes) - Add users management route - Add frontend landing page and stale process monitor UI on /scraper-tools - Move old development scripts to backend/archive/ - Update frontend build with new features 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
178
backend/archive/enrich-single-dispensary.ts
Normal file
178
backend/archive/enrich-single-dispensary.ts
Normal file
@@ -0,0 +1,178 @@
|
||||
import { firefox } from 'playwright';
|
||||
import { pool } from './src/db/migrate';
|
||||
import { getRandomProxy } from './src/utils/proxyManager';
|
||||
|
||||
async function enrichSingleDispensary() {
|
||||
const address = '1115 Circulo Mercado';
|
||||
const city = 'Rio Rico';
|
||||
const state = 'AZ';
|
||||
const zip = '85648';
|
||||
|
||||
console.log(`🦊 Enriching: ${address}, ${city}, ${state} ${zip}\\n`);
|
||||
|
||||
const proxy = await getRandomProxy();
|
||||
if (!proxy) {
|
||||
console.log('❌ No proxies available');
|
||||
await pool.end();
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`🔌 Using proxy: ${proxy.server}\\n`);
|
||||
|
||||
const browser = await firefox.launch({
|
||||
headless: false,
|
||||
firefoxUserPrefs: {
|
||||
'geo.enabled': true,
|
||||
'geo.provider.use_corelocation': true,
|
||||
'geo.prompt.testing': true,
|
||||
'geo.prompt.testing.allow': true,
|
||||
}
|
||||
});
|
||||
|
||||
const contextOptions: any = {
|
||||
viewport: { width: 1920, height: 1080 },
|
||||
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
|
||||
geolocation: { latitude: 33.4484, longitude: -112.0740 },
|
||||
permissions: ['geolocation'],
|
||||
proxy: {
|
||||
server: proxy.server,
|
||||
username: proxy.username,
|
||||
password: proxy.password
|
||||
}
|
||||
};
|
||||
|
||||
const context = await browser.newContext(contextOptions);
|
||||
const page = await context.newPage();
|
||||
|
||||
try {
|
||||
// Search Google Maps
|
||||
const searchQuery = `dispensary ${address}, ${city}, ${state} ${zip}`;
|
||||
const encodedQuery = encodeURIComponent(searchQuery);
|
||||
const url = `https://www.google.com/maps/search/${encodedQuery}`;
|
||||
|
||||
console.log(`📍 Searching Maps: ${searchQuery}`);
|
||||
await page.goto(url, {
|
||||
waitUntil: 'domcontentloaded',
|
||||
timeout: 30000
|
||||
});
|
||||
|
||||
// Wait for results
|
||||
await page.waitForTimeout(5000);
|
||||
|
||||
// Extract business data
|
||||
const businessData = await page.evaluate(() => {
|
||||
const data: any = {};
|
||||
|
||||
// Try to find the place name
|
||||
const nameSelectors = [
|
||||
'h1[class*="fontHeadline"]',
|
||||
'h1.DUwDvf',
|
||||
'[data-item-id*="name"] h1'
|
||||
];
|
||||
|
||||
for (const selector of nameSelectors) {
|
||||
const el = document.querySelector(selector);
|
||||
if (el?.textContent) {
|
||||
data.name = el.textContent.trim();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Try to find website
|
||||
const websiteSelectors = [
|
||||
'a[data-item-id="authority"]',
|
||||
'a[data-tooltip="Open website"]',
|
||||
'a[aria-label*="Website"]'
|
||||
];
|
||||
|
||||
for (const selector of websiteSelectors) {
|
||||
const el = document.querySelector(selector) as HTMLAnchorElement;
|
||||
if (el?.href && !el.href.includes('google.com')) {
|
||||
data.website = el.href;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Try to find phone
|
||||
const phoneSelectors = [
|
||||
'button[data-item-id*="phone"]',
|
||||
'button[aria-label*="Phone"]',
|
||||
'[data-tooltip*="Copy phone number"]'
|
||||
];
|
||||
|
||||
for (const selector of phoneSelectors) {
|
||||
const el = document.querySelector(selector);
|
||||
if (el?.textContent) {
|
||||
const phoneMatch = el.textContent.match(/\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/);
|
||||
if (phoneMatch) {
|
||||
data.phone = phoneMatch[0];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to find rating
|
||||
const ratingEl = document.querySelector('[role="img"][aria-label*="stars"]');
|
||||
if (ratingEl) {
|
||||
const label = ratingEl.getAttribute('aria-label');
|
||||
const match = label?.match(/(\d+\.?\d*)\s*stars?/);
|
||||
if (match) {
|
||||
data.rating = parseFloat(match[1]);
|
||||
}
|
||||
}
|
||||
|
||||
// Try to find review count
|
||||
const reviewEl = document.querySelector('[aria-label*="reviews"]');
|
||||
if (reviewEl) {
|
||||
const label = reviewEl.getAttribute('aria-label');
|
||||
const match = label?.match(/([\d,]+)\s*reviews?/);
|
||||
if (match) {
|
||||
data.reviewCount = parseInt(match[1].replace(/,/g, ''));
|
||||
}
|
||||
}
|
||||
|
||||
return data;
|
||||
});
|
||||
|
||||
console.log(`\\n✅ Found data:`, businessData);
|
||||
|
||||
// Update dutchie database
|
||||
if (businessData.name) {
|
||||
await pool.query(`
|
||||
UPDATE azdhs_list
|
||||
SET
|
||||
dba_name = $1,
|
||||
website = $2,
|
||||
phone = $3,
|
||||
google_rating = $4,
|
||||
google_review_count = $5,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
WHERE address = $6 AND city = $7
|
||||
`, [
|
||||
businessData.name,
|
||||
businessData.website,
|
||||
businessData.phone?.replace(/\\D/g, ''),
|
||||
businessData.rating,
|
||||
businessData.reviewCount,
|
||||
address,
|
||||
city
|
||||
]);
|
||||
|
||||
console.log(`\\n✅ Updated database!`);
|
||||
} else {
|
||||
console.log(`\\n❌ No business name found`);
|
||||
}
|
||||
|
||||
// Keep browser open for 10 seconds so you can see the results
|
||||
console.log(`\\n⏳ Keeping browser open for 10 seconds...`);
|
||||
await page.waitForTimeout(10000);
|
||||
|
||||
} catch (error) {
|
||||
console.log(`❌ Error: ${error}`);
|
||||
} finally {
|
||||
await browser.close();
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
enrichSingleDispensary();
|
||||
Reference in New Issue
Block a user