feat: Add stale process monitor, users route, landing page, archive old scripts
- Add backend stale process monitoring API (/api/stale-processes) - Add users management route - Add frontend landing page and stale process monitor UI on /scraper-tools - Move old development scripts to backend/archive/ - Update frontend build with new features 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
195
backend/archive/scrape-two-gates.ts
Normal file
195
backend/archive/scrape-two-gates.ts
Normal file
@@ -0,0 +1,195 @@
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import { Pool } from 'pg';
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
const pool = new Pool({
|
||||
connectionString: 'postgresql://sail:password@localhost:5432/dutchie_menus'
|
||||
});
|
||||
|
||||
async function scrapeTwoGates() {
|
||||
let browser;
|
||||
|
||||
try {
|
||||
// Get random proxy
|
||||
const proxyResult = await pool.query(`
|
||||
SELECT host, port, protocol FROM proxies
|
||||
ORDER BY RANDOM() LIMIT 1
|
||||
`);
|
||||
|
||||
const proxy = proxyResult.rows[0];
|
||||
const proxyUrl = `${proxy.protocol}://${proxy.host}:${proxy.port}`;
|
||||
|
||||
console.log('🔌 Proxy:', `${proxy.host}:${proxy.port}`);
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
`--proxy-server=${proxyUrl}`
|
||||
]
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
|
||||
const mobileUA = 'Mozilla/5.0 (Linux; Android 10; SM-G973F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Mobile Safari/537.36';
|
||||
await page.setUserAgent(mobileUA);
|
||||
|
||||
console.log('📱 UA:', mobileUA);
|
||||
console.log('');
|
||||
|
||||
const url = 'https://curaleaf.com/stores/curaleaf-dispensary-phoenix-airport/brands';
|
||||
console.log('🌐 Going to:', url);
|
||||
|
||||
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await page.waitForTimeout(3000);
|
||||
|
||||
console.log('📍 URL:', page.url());
|
||||
|
||||
if (page.url().includes('/age-gate')) {
|
||||
console.log('');
|
||||
console.log('═══ GATE 1: STATE SELECTOR ═══');
|
||||
|
||||
// Gate 1: Select state
|
||||
await page.waitForSelector('button[role="combobox"]', { timeout: 10000 });
|
||||
await page.click('button[role="combobox"]');
|
||||
console.log(' ✅ Clicked dropdown');
|
||||
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
// Click Arizona
|
||||
await page.evaluate(() => {
|
||||
const options = Array.from(document.querySelectorAll('[role="option"]'));
|
||||
const azOption = options.find(opt =>
|
||||
opt.textContent?.toLowerCase().includes('arizona')
|
||||
) as HTMLElement;
|
||||
if (azOption) azOption.click();
|
||||
});
|
||||
|
||||
console.log(' ✅ Selected Arizona');
|
||||
await page.waitForTimeout(3000);
|
||||
|
||||
console.log(' 📍 URL after selection:', page.url());
|
||||
|
||||
// Check what's on the page now
|
||||
const gateContent = await page.evaluate(() => ({
|
||||
title: document.title,
|
||||
text: document.body.innerText.substring(0, 600),
|
||||
buttons: Array.from(document.querySelectorAll('button, a')).map(btn => ({
|
||||
text: btn.textContent?.trim() || '',
|
||||
visible: (btn as HTMLElement).offsetParent !== null
|
||||
})).filter(b => b.visible && b.text.length > 0 && b.text.length < 100)
|
||||
}));
|
||||
|
||||
console.log('');
|
||||
console.log('📄 CURRENT PAGE:');
|
||||
console.log('Title:', gateContent.title);
|
||||
console.log('Visible buttons:', JSON.stringify(gateContent.buttons, null, 2));
|
||||
console.log('');
|
||||
console.log('Page text preview:');
|
||||
console.log(gateContent.text);
|
||||
|
||||
// Gate 2: Look for age confirmation button
|
||||
console.log('');
|
||||
console.log('═══ GATE 2: AGE VERIFICATION ═══');
|
||||
|
||||
// Try to find and click the age confirmation button
|
||||
const ageButtonClicked = await page.evaluate(() => {
|
||||
const buttons = Array.from(document.querySelectorAll('button, a'));
|
||||
|
||||
// Look for common age confirmation patterns
|
||||
const ageButton = buttons.find(btn => {
|
||||
const text = btn.textContent?.toLowerCase() || '';
|
||||
const ariaLabel = btn.getAttribute('aria-label')?.toLowerCase() || '';
|
||||
|
||||
return (
|
||||
text.includes('21') ||
|
||||
text.includes('yes') ||
|
||||
text.includes('enter') ||
|
||||
text.includes('continue') ||
|
||||
text.includes('confirm') ||
|
||||
(ariaLabel.includes('age') && !ariaLabel.includes('not'))
|
||||
);
|
||||
}) as HTMLElement;
|
||||
|
||||
if (ageButton && ageButton.offsetParent !== null) {
|
||||
ageButton.click();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
if (ageButtonClicked) {
|
||||
console.log(' ✅ Clicked age confirmation button');
|
||||
|
||||
// Wait for navigation or page update
|
||||
try {
|
||||
await page.waitForNavigation({ timeout: 10000 });
|
||||
console.log(' ✅ Redirected to:', page.url());
|
||||
} catch (e) {
|
||||
console.log(' ⏳ No navigation, checking if page updated...');
|
||||
await page.waitForTimeout(3000);
|
||||
console.log(' 📍 Current URL:', page.url());
|
||||
}
|
||||
} else {
|
||||
console.log(' ⚠️ Could not find age confirmation button');
|
||||
}
|
||||
}
|
||||
|
||||
// Final check and scrape
|
||||
console.log('');
|
||||
console.log('═══ SCRAPING BRANDS ═══');
|
||||
console.log('📍 Final URL:', page.url());
|
||||
|
||||
await page.waitForTimeout(3000);
|
||||
|
||||
const brands = await page.evaluate(() => {
|
||||
const selectors = [
|
||||
'[data-testid*="brand"]',
|
||||
'[class*="Brand"]',
|
||||
'[class*="brand"]',
|
||||
'a[href*="/brand/"]'
|
||||
];
|
||||
|
||||
const found = new Set<string>();
|
||||
|
||||
selectors.forEach(selector => {
|
||||
document.querySelectorAll(selector).forEach(el => {
|
||||
const text = el.textContent?.trim();
|
||||
if (text && text.length > 0 && text.length < 50) {
|
||||
found.add(text);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return Array.from(found);
|
||||
});
|
||||
|
||||
console.log(`\nFound ${brands.length} brands`);
|
||||
|
||||
if (brands.length > 0) {
|
||||
console.log('─'.repeat(60));
|
||||
brands.forEach((b, i) => console.log(` ${i + 1}. ${b}`));
|
||||
console.log('─'.repeat(60));
|
||||
} else {
|
||||
const pageData = await page.evaluate(() => ({
|
||||
title: document.title,
|
||||
bodyText: document.body.innerText.substring(0, 500)
|
||||
}));
|
||||
|
||||
console.log('\n📄 PAGE (no brands found):');
|
||||
console.log('Title:', pageData.title);
|
||||
console.log('Text:', pageData.bodyText);
|
||||
}
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('❌ Error:', error.message);
|
||||
} finally {
|
||||
if (browser) await browser.close();
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
scrapeTwoGates();
|
||||
Reference in New Issue
Block a user