feat: Add stale process monitor, users route, landing page, archive old scripts

- Add backend stale process monitoring API (/api/stale-processes)
- Add users management route
- Add frontend landing page and stale process monitor UI on /scraper-tools
- Move old development scripts to backend/archive/
- Update frontend build with new features

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-05 04:07:31 -07:00
parent d2d44d2aeb
commit d91c55a344
3115 changed files with 5755 additions and 719 deletions

View File

@@ -0,0 +1,240 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
const playwright_1 = require("playwright");
const pg_1 = require("pg");
const pool = new pg_1.Pool({
connectionString: process.env.DATABASE_URL
});
async function extractPlatformId(browser, dispensary) {
let capturedId = null;
const context = await browser.newContext();
const page = await context.newPage();
// Intercept network requests to find retailer IDs
page.on('request', (request) => {
const url = request.url();
if (url.includes('dutchie') || url.includes('plus.dutchie') || url.includes('api.dutchie')) {
// Check URL for retailer ID
const urlMatch = url.match(/[\/=]([a-f0-9]{24})(?:[\/\?&]|$)/i);
if (urlMatch && !capturedId) {
capturedId = urlMatch[1];
console.log(` Captured from URL: ${capturedId}`);
}
const postData = request.postData();
if (postData) {
// Look for retailerId in GraphQL variables
const match = postData.match(/["']?retailerId["']?\s*:\s*["']([a-f0-9]{24})["']/i);
if (match && !capturedId) {
capturedId = match[1];
console.log(` Captured retailerId: ${capturedId}`);
}
// Also look for dispensaryId
const dispMatch = postData.match(/["']?dispensaryId["']?\s*:\s*["']([a-f0-9]{24})["']/i);
if (dispMatch && !capturedId) {
capturedId = dispMatch[1];
console.log(` Captured dispensaryId: ${capturedId}`);
}
}
}
});
try {
console.log(`\nLoading ${dispensary.name}: ${dispensary.website}`);
await page.goto(dispensary.website, { waitUntil: 'domcontentloaded', timeout: 30000 });
// Wait for initial load
await page.waitForTimeout(2000);
// Check page content for retailerId
const content = await page.content();
// Try various patterns in page content
const patterns = [
/["']retailerId["']\s*:\s*["']([a-f0-9]{24})["']/i,
/dispensaryId["']\s*:\s*["']([a-f0-9]{24})["']/i,
/retailer["']?\s*:\s*["']([a-f0-9]{24})["']/i,
/dutchie\.com\/embedded-menu\/([a-f0-9]{24})/i,
/dutchie\.com\/dispensary\/([a-f0-9]{24})/i,
/plus\.dutchie\.com\/plus\/([a-f0-9]{24})/i,
/retailerId=([a-f0-9]{24})/i,
];
for (const pattern of patterns) {
const match = content.match(pattern);
if (match && !capturedId) {
capturedId = match[1];
console.log(` Found in content: ${capturedId}`);
break;
}
}
// Check __NEXT_DATA__ if present
if (!capturedId) {
const nextData = await page.evaluate(() => {
const el = document.getElementById('__NEXT_DATA__');
return el?.textContent || null;
});
if (nextData) {
for (const pattern of patterns) {
const match = nextData.match(pattern);
if (match) {
capturedId = match[1];
console.log(` Found in __NEXT_DATA__: ${capturedId}`);
break;
}
}
}
}
// Look for iframes that might contain dutchie embed
if (!capturedId) {
const iframes = await page.evaluate(() => {
return Array.from(document.querySelectorAll('iframe')).map(f => f.src);
});
for (const src of iframes) {
if (src.includes('dutchie')) {
const match = src.match(/([a-f0-9]{24})/i);
if (match) {
capturedId = match[1];
console.log(` Found in iframe: ${capturedId}`);
break;
}
}
}
}
// If still not found, try clicking on "Shop" or "Menu" links
if (!capturedId) {
const menuSelectors = [
'a:has-text("Shop")',
'a:has-text("Menu")',
'a:has-text("Order")',
'a[href*="menu"]',
'a[href*="shop"]',
'a[href*="order"]',
'button:has-text("Shop")',
'button:has-text("Menu")',
];
for (const selector of menuSelectors) {
try {
const element = page.locator(selector).first();
const isVisible = await element.isVisible({ timeout: 500 });
if (isVisible) {
const href = await element.getAttribute('href');
// If it's an internal link, click it
if (href && !href.startsWith('http')) {
console.log(` Clicking ${selector}...`);
await element.click();
await page.waitForTimeout(3000);
// Check new page content
const newContent = await page.content();
for (const pattern of patterns) {
const match = newContent.match(pattern);
if (match && !capturedId) {
capturedId = match[1];
console.log(` Found after navigation: ${capturedId}`);
break;
}
}
// Check iframes on new page
if (!capturedId) {
const newIframes = await page.evaluate(() => {
return Array.from(document.querySelectorAll('iframe')).map(f => f.src);
});
for (const src of newIframes) {
if (src.includes('dutchie')) {
const match = src.match(/([a-f0-9]{24})/i);
if (match) {
capturedId = match[1];
console.log(` Found in iframe after nav: ${capturedId}`);
break;
}
}
}
}
if (capturedId)
break;
}
}
}
catch (e) {
// Continue to next selector
}
}
}
// If still not found, wait longer for async dutchie widget to load
if (!capturedId) {
console.log(` Waiting for async content...`);
await page.waitForTimeout(5000);
// Check for dutchie script tags
const scripts = await page.evaluate(() => {
return Array.from(document.querySelectorAll('script')).map(s => s.src || s.innerHTML?.substring(0, 500));
});
for (const script of scripts) {
if (script && script.includes('dutchie')) {
for (const pattern of patterns) {
const match = script.match(pattern);
if (match && !capturedId) {
capturedId = match[1];
console.log(` Found in script: ${capturedId}`);
break;
}
}
if (capturedId)
break;
}
}
// Final check of iframes after wait
if (!capturedId) {
const finalIframes = await page.evaluate(() => {
return Array.from(document.querySelectorAll('iframe')).map(f => f.src);
});
for (const src of finalIframes) {
if (src.includes('dutchie')) {
const match = src.match(/([a-f0-9]{24})/i);
if (match) {
capturedId = match[1];
console.log(` Found in iframe (delayed): ${capturedId}`);
break;
}
}
}
}
}
}
catch (e) {
console.log(` Error: ${e.message.substring(0, 80)}`);
}
finally {
await context.close();
}
return capturedId;
}
async function main() {
// Get dispensaries missing platform IDs
const result = await pool.query(`
SELECT id, name, website
FROM dispensaries
WHERE state = 'AZ'
AND menu_type = 'dutchie'
AND (platform_dispensary_id IS NULL OR platform_dispensary_id = '')
AND website IS NOT NULL AND website != ''
ORDER BY name
`);
console.log(`Found ${result.rows.length} dispensaries to process\n`);
const browser = await playwright_1.chromium.launch({ headless: true });
const results = [];
for (const dispensary of result.rows) {
const platformId = await extractPlatformId(browser, dispensary);
results.push({ id: dispensary.id, name: dispensary.name, platformId });
if (platformId) {
// Update database
await pool.query('UPDATE dispensaries SET platform_dispensary_id = $1 WHERE id = $2', [platformId, dispensary.id]);
console.log(` Updated database with ${platformId}`);
}
}
await browser.close();
console.log('\n=== SUMMARY ===');
const found = results.filter(r => r.platformId);
const notFound = results.filter(r => !r.platformId);
console.log(`\nFound (${found.length}):`);
found.forEach(r => console.log(` ${r.id}: ${r.name} -> ${r.platformId}`));
console.log(`\nNot Found (${notFound.length}):`);
notFound.forEach(r => console.log(` ${r.id}: ${r.name}`));
await pool.end();
}
main().catch(e => {
console.error('Error:', e);
process.exit(1);
});