- Add backend stale process monitoring API (/api/stale-processes) - Add users management route - Add frontend landing page and stale process monitor UI on /scraper-tools - Move old development scripts to backend/archive/ - Update frontend build with new features 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
179 lines
4.9 KiB
TypeScript
179 lines
4.9 KiB
TypeScript
import { firefox } from 'playwright';
|
|
import { pool } from './src/db/migrate';
|
|
import { getRandomProxy } from './src/utils/proxyManager';
|
|
|
|
async function enrichSingleDispensary() {
|
|
const address = '1115 Circulo Mercado';
|
|
const city = 'Rio Rico';
|
|
const state = 'AZ';
|
|
const zip = '85648';
|
|
|
|
console.log(`🦊 Enriching: ${address}, ${city}, ${state} ${zip}\\n`);
|
|
|
|
const proxy = await getRandomProxy();
|
|
if (!proxy) {
|
|
console.log('❌ No proxies available');
|
|
await pool.end();
|
|
return;
|
|
}
|
|
|
|
console.log(`🔌 Using proxy: ${proxy.server}\\n`);
|
|
|
|
const browser = await firefox.launch({
|
|
headless: false,
|
|
firefoxUserPrefs: {
|
|
'geo.enabled': true,
|
|
'geo.provider.use_corelocation': true,
|
|
'geo.prompt.testing': true,
|
|
'geo.prompt.testing.allow': true,
|
|
}
|
|
});
|
|
|
|
const contextOptions: any = {
|
|
viewport: { width: 1920, height: 1080 },
|
|
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
|
|
geolocation: { latitude: 33.4484, longitude: -112.0740 },
|
|
permissions: ['geolocation'],
|
|
proxy: {
|
|
server: proxy.server,
|
|
username: proxy.username,
|
|
password: proxy.password
|
|
}
|
|
};
|
|
|
|
const context = await browser.newContext(contextOptions);
|
|
const page = await context.newPage();
|
|
|
|
try {
|
|
// Search Google Maps
|
|
const searchQuery = `dispensary ${address}, ${city}, ${state} ${zip}`;
|
|
const encodedQuery = encodeURIComponent(searchQuery);
|
|
const url = `https://www.google.com/maps/search/${encodedQuery}`;
|
|
|
|
console.log(`📍 Searching Maps: ${searchQuery}`);
|
|
await page.goto(url, {
|
|
waitUntil: 'domcontentloaded',
|
|
timeout: 30000
|
|
});
|
|
|
|
// Wait for results
|
|
await page.waitForTimeout(5000);
|
|
|
|
// Extract business data
|
|
const businessData = await page.evaluate(() => {
|
|
const data: any = {};
|
|
|
|
// Try to find the place name
|
|
const nameSelectors = [
|
|
'h1[class*="fontHeadline"]',
|
|
'h1.DUwDvf',
|
|
'[data-item-id*="name"] h1'
|
|
];
|
|
|
|
for (const selector of nameSelectors) {
|
|
const el = document.querySelector(selector);
|
|
if (el?.textContent) {
|
|
data.name = el.textContent.trim();
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Try to find website
|
|
const websiteSelectors = [
|
|
'a[data-item-id="authority"]',
|
|
'a[data-tooltip="Open website"]',
|
|
'a[aria-label*="Website"]'
|
|
];
|
|
|
|
for (const selector of websiteSelectors) {
|
|
const el = document.querySelector(selector) as HTMLAnchorElement;
|
|
if (el?.href && !el.href.includes('google.com')) {
|
|
data.website = el.href;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Try to find phone
|
|
const phoneSelectors = [
|
|
'button[data-item-id*="phone"]',
|
|
'button[aria-label*="Phone"]',
|
|
'[data-tooltip*="Copy phone number"]'
|
|
];
|
|
|
|
for (const selector of phoneSelectors) {
|
|
const el = document.querySelector(selector);
|
|
if (el?.textContent) {
|
|
const phoneMatch = el.textContent.match(/\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/);
|
|
if (phoneMatch) {
|
|
data.phone = phoneMatch[0];
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Try to find rating
|
|
const ratingEl = document.querySelector('[role="img"][aria-label*="stars"]');
|
|
if (ratingEl) {
|
|
const label = ratingEl.getAttribute('aria-label');
|
|
const match = label?.match(/(\d+\.?\d*)\s*stars?/);
|
|
if (match) {
|
|
data.rating = parseFloat(match[1]);
|
|
}
|
|
}
|
|
|
|
// Try to find review count
|
|
const reviewEl = document.querySelector('[aria-label*="reviews"]');
|
|
if (reviewEl) {
|
|
const label = reviewEl.getAttribute('aria-label');
|
|
const match = label?.match(/([\d,]+)\s*reviews?/);
|
|
if (match) {
|
|
data.reviewCount = parseInt(match[1].replace(/,/g, ''));
|
|
}
|
|
}
|
|
|
|
return data;
|
|
});
|
|
|
|
console.log(`\\n✅ Found data:`, businessData);
|
|
|
|
// Update dutchie database
|
|
if (businessData.name) {
|
|
await pool.query(`
|
|
UPDATE azdhs_list
|
|
SET
|
|
dba_name = $1,
|
|
website = $2,
|
|
phone = $3,
|
|
google_rating = $4,
|
|
google_review_count = $5,
|
|
updated_at = CURRENT_TIMESTAMP
|
|
WHERE address = $6 AND city = $7
|
|
`, [
|
|
businessData.name,
|
|
businessData.website,
|
|
businessData.phone?.replace(/\\D/g, ''),
|
|
businessData.rating,
|
|
businessData.reviewCount,
|
|
address,
|
|
city
|
|
]);
|
|
|
|
console.log(`\\n✅ Updated database!`);
|
|
} else {
|
|
console.log(`\\n❌ No business name found`);
|
|
}
|
|
|
|
// Keep browser open for 10 seconds so you can see the results
|
|
console.log(`\\n⏳ Keeping browser open for 10 seconds...`);
|
|
await page.waitForTimeout(10000);
|
|
|
|
} catch (error) {
|
|
console.log(`❌ Error: ${error}`);
|
|
} finally {
|
|
await browser.close();
|
|
await pool.end();
|
|
}
|
|
}
|
|
|
|
enrichSingleDispensary();
|