feat: Add stale process monitor, users route, landing page, archive old scripts
- Add backend stale process monitoring API (/api/stale-processes) - Add users management route - Add frontend landing page and stale process monitor UI on /scraper-tools - Move old development scripts to backend/archive/ - Update frontend build with new features 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
240
backend/dist/scripts/extract-platform-ids.js
vendored
Normal file
240
backend/dist/scripts/extract-platform-ids.js
vendored
Normal file
@@ -0,0 +1,240 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
const playwright_1 = require("playwright");
|
||||
const pg_1 = require("pg");
|
||||
const pool = new pg_1.Pool({
|
||||
connectionString: process.env.DATABASE_URL
|
||||
});
|
||||
async function extractPlatformId(browser, dispensary) {
|
||||
let capturedId = null;
|
||||
const context = await browser.newContext();
|
||||
const page = await context.newPage();
|
||||
// Intercept network requests to find retailer IDs
|
||||
page.on('request', (request) => {
|
||||
const url = request.url();
|
||||
if (url.includes('dutchie') || url.includes('plus.dutchie') || url.includes('api.dutchie')) {
|
||||
// Check URL for retailer ID
|
||||
const urlMatch = url.match(/[\/=]([a-f0-9]{24})(?:[\/\?&]|$)/i);
|
||||
if (urlMatch && !capturedId) {
|
||||
capturedId = urlMatch[1];
|
||||
console.log(` Captured from URL: ${capturedId}`);
|
||||
}
|
||||
const postData = request.postData();
|
||||
if (postData) {
|
||||
// Look for retailerId in GraphQL variables
|
||||
const match = postData.match(/["']?retailerId["']?\s*:\s*["']([a-f0-9]{24})["']/i);
|
||||
if (match && !capturedId) {
|
||||
capturedId = match[1];
|
||||
console.log(` Captured retailerId: ${capturedId}`);
|
||||
}
|
||||
// Also look for dispensaryId
|
||||
const dispMatch = postData.match(/["']?dispensaryId["']?\s*:\s*["']([a-f0-9]{24})["']/i);
|
||||
if (dispMatch && !capturedId) {
|
||||
capturedId = dispMatch[1];
|
||||
console.log(` Captured dispensaryId: ${capturedId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
try {
|
||||
console.log(`\nLoading ${dispensary.name}: ${dispensary.website}`);
|
||||
await page.goto(dispensary.website, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
||||
// Wait for initial load
|
||||
await page.waitForTimeout(2000);
|
||||
// Check page content for retailerId
|
||||
const content = await page.content();
|
||||
// Try various patterns in page content
|
||||
const patterns = [
|
||||
/["']retailerId["']\s*:\s*["']([a-f0-9]{24})["']/i,
|
||||
/dispensaryId["']\s*:\s*["']([a-f0-9]{24})["']/i,
|
||||
/retailer["']?\s*:\s*["']([a-f0-9]{24})["']/i,
|
||||
/dutchie\.com\/embedded-menu\/([a-f0-9]{24})/i,
|
||||
/dutchie\.com\/dispensary\/([a-f0-9]{24})/i,
|
||||
/plus\.dutchie\.com\/plus\/([a-f0-9]{24})/i,
|
||||
/retailerId=([a-f0-9]{24})/i,
|
||||
];
|
||||
for (const pattern of patterns) {
|
||||
const match = content.match(pattern);
|
||||
if (match && !capturedId) {
|
||||
capturedId = match[1];
|
||||
console.log(` Found in content: ${capturedId}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Check __NEXT_DATA__ if present
|
||||
if (!capturedId) {
|
||||
const nextData = await page.evaluate(() => {
|
||||
const el = document.getElementById('__NEXT_DATA__');
|
||||
return el?.textContent || null;
|
||||
});
|
||||
if (nextData) {
|
||||
for (const pattern of patterns) {
|
||||
const match = nextData.match(pattern);
|
||||
if (match) {
|
||||
capturedId = match[1];
|
||||
console.log(` Found in __NEXT_DATA__: ${capturedId}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Look for iframes that might contain dutchie embed
|
||||
if (!capturedId) {
|
||||
const iframes = await page.evaluate(() => {
|
||||
return Array.from(document.querySelectorAll('iframe')).map(f => f.src);
|
||||
});
|
||||
for (const src of iframes) {
|
||||
if (src.includes('dutchie')) {
|
||||
const match = src.match(/([a-f0-9]{24})/i);
|
||||
if (match) {
|
||||
capturedId = match[1];
|
||||
console.log(` Found in iframe: ${capturedId}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// If still not found, try clicking on "Shop" or "Menu" links
|
||||
if (!capturedId) {
|
||||
const menuSelectors = [
|
||||
'a:has-text("Shop")',
|
||||
'a:has-text("Menu")',
|
||||
'a:has-text("Order")',
|
||||
'a[href*="menu"]',
|
||||
'a[href*="shop"]',
|
||||
'a[href*="order"]',
|
||||
'button:has-text("Shop")',
|
||||
'button:has-text("Menu")',
|
||||
];
|
||||
for (const selector of menuSelectors) {
|
||||
try {
|
||||
const element = page.locator(selector).first();
|
||||
const isVisible = await element.isVisible({ timeout: 500 });
|
||||
if (isVisible) {
|
||||
const href = await element.getAttribute('href');
|
||||
// If it's an internal link, click it
|
||||
if (href && !href.startsWith('http')) {
|
||||
console.log(` Clicking ${selector}...`);
|
||||
await element.click();
|
||||
await page.waitForTimeout(3000);
|
||||
// Check new page content
|
||||
const newContent = await page.content();
|
||||
for (const pattern of patterns) {
|
||||
const match = newContent.match(pattern);
|
||||
if (match && !capturedId) {
|
||||
capturedId = match[1];
|
||||
console.log(` Found after navigation: ${capturedId}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Check iframes on new page
|
||||
if (!capturedId) {
|
||||
const newIframes = await page.evaluate(() => {
|
||||
return Array.from(document.querySelectorAll('iframe')).map(f => f.src);
|
||||
});
|
||||
for (const src of newIframes) {
|
||||
if (src.includes('dutchie')) {
|
||||
const match = src.match(/([a-f0-9]{24})/i);
|
||||
if (match) {
|
||||
capturedId = match[1];
|
||||
console.log(` Found in iframe after nav: ${capturedId}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (capturedId)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (e) {
|
||||
// Continue to next selector
|
||||
}
|
||||
}
|
||||
}
|
||||
// If still not found, wait longer for async dutchie widget to load
|
||||
if (!capturedId) {
|
||||
console.log(` Waiting for async content...`);
|
||||
await page.waitForTimeout(5000);
|
||||
// Check for dutchie script tags
|
||||
const scripts = await page.evaluate(() => {
|
||||
return Array.from(document.querySelectorAll('script')).map(s => s.src || s.innerHTML?.substring(0, 500));
|
||||
});
|
||||
for (const script of scripts) {
|
||||
if (script && script.includes('dutchie')) {
|
||||
for (const pattern of patterns) {
|
||||
const match = script.match(pattern);
|
||||
if (match && !capturedId) {
|
||||
capturedId = match[1];
|
||||
console.log(` Found in script: ${capturedId}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (capturedId)
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Final check of iframes after wait
|
||||
if (!capturedId) {
|
||||
const finalIframes = await page.evaluate(() => {
|
||||
return Array.from(document.querySelectorAll('iframe')).map(f => f.src);
|
||||
});
|
||||
for (const src of finalIframes) {
|
||||
if (src.includes('dutchie')) {
|
||||
const match = src.match(/([a-f0-9]{24})/i);
|
||||
if (match) {
|
||||
capturedId = match[1];
|
||||
console.log(` Found in iframe (delayed): ${capturedId}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (e) {
|
||||
console.log(` Error: ${e.message.substring(0, 80)}`);
|
||||
}
|
||||
finally {
|
||||
await context.close();
|
||||
}
|
||||
return capturedId;
|
||||
}
|
||||
async function main() {
|
||||
// Get dispensaries missing platform IDs
|
||||
const result = await pool.query(`
|
||||
SELECT id, name, website
|
||||
FROM dispensaries
|
||||
WHERE state = 'AZ'
|
||||
AND menu_type = 'dutchie'
|
||||
AND (platform_dispensary_id IS NULL OR platform_dispensary_id = '')
|
||||
AND website IS NOT NULL AND website != ''
|
||||
ORDER BY name
|
||||
`);
|
||||
console.log(`Found ${result.rows.length} dispensaries to process\n`);
|
||||
const browser = await playwright_1.chromium.launch({ headless: true });
|
||||
const results = [];
|
||||
for (const dispensary of result.rows) {
|
||||
const platformId = await extractPlatformId(browser, dispensary);
|
||||
results.push({ id: dispensary.id, name: dispensary.name, platformId });
|
||||
if (platformId) {
|
||||
// Update database
|
||||
await pool.query('UPDATE dispensaries SET platform_dispensary_id = $1 WHERE id = $2', [platformId, dispensary.id]);
|
||||
console.log(` Updated database with ${platformId}`);
|
||||
}
|
||||
}
|
||||
await browser.close();
|
||||
console.log('\n=== SUMMARY ===');
|
||||
const found = results.filter(r => r.platformId);
|
||||
const notFound = results.filter(r => !r.platformId);
|
||||
console.log(`\nFound (${found.length}):`);
|
||||
found.forEach(r => console.log(` ${r.id}: ${r.name} -> ${r.platformId}`));
|
||||
console.log(`\nNot Found (${notFound.length}):`);
|
||||
notFound.forEach(r => console.log(` ${r.id}: ${r.name}`));
|
||||
await pool.end();
|
||||
}
|
||||
main().catch(e => {
|
||||
console.error('Error:', e);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user