feat: Add stale process monitor, users route, landing page, archive old scripts

- Add backend stale process monitoring API (/api/stale-processes)
- Add users management route
- Add frontend landing page and stale process monitor UI on /scraper-tools
- Move old development scripts to backend/archive/
- Update frontend build with new features

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-05 04:07:31 -07:00
parent d2d44d2aeb
commit d91c55a344
3115 changed files with 5755 additions and 719 deletions

View File

@@ -0,0 +1,153 @@
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
import { Pool } from 'pg';
puppeteer.use(StealthPlugin());
const pool = new Pool({
connectionString: 'postgresql://sail:password@localhost:5432/dutchie_menus'
});
async function testBrandScrape() {
let browser;
try {
// 1. Get the store
const storeResult = await pool.query(
"SELECT id, name, slug, dutchie_url FROM stores WHERE slug = $1",
['curaleaf-az-48th-street-med']
);
if (storeResult.rows.length === 0) {
console.log('Store not found');
return;
}
const store = storeResult.rows[0];
console.log(`\nTesting brand scrape for: ${store.name}`);
console.log(`URL: ${store.dutchie_url}\n`);
// 2. Get an active proxy
const proxyResult = await pool.query(`
SELECT host, port, protocol, username, password
FROM proxies
LIMIT 1
`);
if (proxyResult.rows.length === 0) {
console.log('No active proxies available - will try without proxy');
}
const proxy = proxyResult.rows[0];
if (proxy) {
console.log(`Using proxy: ${proxy.host}:${proxy.port}`);
}
console.log(`User-Agent: Googlebot`);
console.log('─'.repeat(60));
// 3. Launch browser with proxy
const browserArgs = [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-blink-features=AutomationControlled'
];
if (proxy) {
const proxyUrl = `${proxy.protocol}://${proxy.host}:${proxy.port}`;
browserArgs.push(`--proxy-server=${proxyUrl}`);
}
browser = await puppeteer.launch({
headless: true,
args: browserArgs
});
const page = await browser.newPage();
// Set Googlebot user agent
await page.setUserAgent('Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)');
// Anti-detection
await page.evaluateOnNewDocument(() => {
Object.defineProperty(navigator, 'webdriver', {
get: () => false,
});
});
// 4. Navigate and extract brands
console.log('\nNavigating to store page...');
await page.goto(store.dutchie_url, { waitUntil: 'networkidle2', timeout: 60000 });
console.log('Page loaded, extracting brands...\n');
// 5. Extract brands from product cards
const brands = await page.evaluate(() => {
const brandSet = new Set<string>();
// Try multiple selectors
const selectors = [
'[class*="brand"]',
'[class*="Brand"]',
'[data-testid*="brand"]',
'[class*="product"] [class*="brand"]'
];
for (const selector of selectors) {
const elements = document.querySelectorAll(selector);
elements.forEach(el => {
const text = el.textContent?.trim();
if (text && text.length > 0 && text.length < 100) {
brandSet.add(text);
}
});
}
return Array.from(brandSet);
});
console.log('BRANDS FOUND:');
console.log('─'.repeat(60));
if (brands.length === 0) {
console.log('No brands found!');
console.log('\nLet me also check what the page structure looks like...\n');
// Debug: show page structure
const pageInfo = await page.evaluate(() => {
return {
title: document.title,
productCards: document.querySelectorAll('[class*="product"], [class*="Product"]').length,
hasImages: document.querySelectorAll('img[src*="dutchie"]').length,
bodyText: document.body.textContent?.substring(0, 500)
};
});
console.log('Page Title:', pageInfo.title);
console.log('Product Cards Found:', pageInfo.productCards);
console.log('Dutchie Images:', pageInfo.hasImages);
console.log('\nFirst 500 chars of page:');
console.log(pageInfo.bodyText);
} else {
brands.sort().forEach((brand, i) => {
console.log(`${i + 1}. ${brand}`);
});
console.log('─'.repeat(60));
console.log(`Total unique brands: ${brands.length}`);
}
} catch (error: any) {
console.error('Error:', error.message);
if (error.stack) {
console.error(error.stack);
}
} finally {
if (browser) {
await browser.close();
}
await pool.end();
}
}
testBrandScrape();