- Add backend stale process monitoring API (/api/stale-processes) - Add users management route - Add frontend landing page and stale process monitor UI on /scraper-tools - Move old development scripts to backend/archive/ - Update frontend build with new features 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
241 lines
10 KiB
JavaScript
241 lines
10 KiB
JavaScript
"use strict";
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
const playwright_1 = require("playwright");
|
|
const pg_1 = require("pg");
|
|
const pool = new pg_1.Pool({
|
|
connectionString: process.env.DATABASE_URL
|
|
});
|
|
async function extractPlatformId(browser, dispensary) {
|
|
let capturedId = null;
|
|
const context = await browser.newContext();
|
|
const page = await context.newPage();
|
|
// Intercept network requests to find retailer IDs
|
|
page.on('request', (request) => {
|
|
const url = request.url();
|
|
if (url.includes('dutchie') || url.includes('plus.dutchie') || url.includes('api.dutchie')) {
|
|
// Check URL for retailer ID
|
|
const urlMatch = url.match(/[\/=]([a-f0-9]{24})(?:[\/\?&]|$)/i);
|
|
if (urlMatch && !capturedId) {
|
|
capturedId = urlMatch[1];
|
|
console.log(` Captured from URL: ${capturedId}`);
|
|
}
|
|
const postData = request.postData();
|
|
if (postData) {
|
|
// Look for retailerId in GraphQL variables
|
|
const match = postData.match(/["']?retailerId["']?\s*:\s*["']([a-f0-9]{24})["']/i);
|
|
if (match && !capturedId) {
|
|
capturedId = match[1];
|
|
console.log(` Captured retailerId: ${capturedId}`);
|
|
}
|
|
// Also look for dispensaryId
|
|
const dispMatch = postData.match(/["']?dispensaryId["']?\s*:\s*["']([a-f0-9]{24})["']/i);
|
|
if (dispMatch && !capturedId) {
|
|
capturedId = dispMatch[1];
|
|
console.log(` Captured dispensaryId: ${capturedId}`);
|
|
}
|
|
}
|
|
}
|
|
});
|
|
try {
|
|
console.log(`\nLoading ${dispensary.name}: ${dispensary.website}`);
|
|
await page.goto(dispensary.website, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
// Wait for initial load
|
|
await page.waitForTimeout(2000);
|
|
// Check page content for retailerId
|
|
const content = await page.content();
|
|
// Try various patterns in page content
|
|
const patterns = [
|
|
/["']retailerId["']\s*:\s*["']([a-f0-9]{24})["']/i,
|
|
/dispensaryId["']\s*:\s*["']([a-f0-9]{24})["']/i,
|
|
/retailer["']?\s*:\s*["']([a-f0-9]{24})["']/i,
|
|
/dutchie\.com\/embedded-menu\/([a-f0-9]{24})/i,
|
|
/dutchie\.com\/dispensary\/([a-f0-9]{24})/i,
|
|
/plus\.dutchie\.com\/plus\/([a-f0-9]{24})/i,
|
|
/retailerId=([a-f0-9]{24})/i,
|
|
];
|
|
for (const pattern of patterns) {
|
|
const match = content.match(pattern);
|
|
if (match && !capturedId) {
|
|
capturedId = match[1];
|
|
console.log(` Found in content: ${capturedId}`);
|
|
break;
|
|
}
|
|
}
|
|
// Check __NEXT_DATA__ if present
|
|
if (!capturedId) {
|
|
const nextData = await page.evaluate(() => {
|
|
const el = document.getElementById('__NEXT_DATA__');
|
|
return el?.textContent || null;
|
|
});
|
|
if (nextData) {
|
|
for (const pattern of patterns) {
|
|
const match = nextData.match(pattern);
|
|
if (match) {
|
|
capturedId = match[1];
|
|
console.log(` Found in __NEXT_DATA__: ${capturedId}`);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Look for iframes that might contain dutchie embed
|
|
if (!capturedId) {
|
|
const iframes = await page.evaluate(() => {
|
|
return Array.from(document.querySelectorAll('iframe')).map(f => f.src);
|
|
});
|
|
for (const src of iframes) {
|
|
if (src.includes('dutchie')) {
|
|
const match = src.match(/([a-f0-9]{24})/i);
|
|
if (match) {
|
|
capturedId = match[1];
|
|
console.log(` Found in iframe: ${capturedId}`);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// If still not found, try clicking on "Shop" or "Menu" links
|
|
if (!capturedId) {
|
|
const menuSelectors = [
|
|
'a:has-text("Shop")',
|
|
'a:has-text("Menu")',
|
|
'a:has-text("Order")',
|
|
'a[href*="menu"]',
|
|
'a[href*="shop"]',
|
|
'a[href*="order"]',
|
|
'button:has-text("Shop")',
|
|
'button:has-text("Menu")',
|
|
];
|
|
for (const selector of menuSelectors) {
|
|
try {
|
|
const element = page.locator(selector).first();
|
|
const isVisible = await element.isVisible({ timeout: 500 });
|
|
if (isVisible) {
|
|
const href = await element.getAttribute('href');
|
|
// If it's an internal link, click it
|
|
if (href && !href.startsWith('http')) {
|
|
console.log(` Clicking ${selector}...`);
|
|
await element.click();
|
|
await page.waitForTimeout(3000);
|
|
// Check new page content
|
|
const newContent = await page.content();
|
|
for (const pattern of patterns) {
|
|
const match = newContent.match(pattern);
|
|
if (match && !capturedId) {
|
|
capturedId = match[1];
|
|
console.log(` Found after navigation: ${capturedId}`);
|
|
break;
|
|
}
|
|
}
|
|
// Check iframes on new page
|
|
if (!capturedId) {
|
|
const newIframes = await page.evaluate(() => {
|
|
return Array.from(document.querySelectorAll('iframe')).map(f => f.src);
|
|
});
|
|
for (const src of newIframes) {
|
|
if (src.includes('dutchie')) {
|
|
const match = src.match(/([a-f0-9]{24})/i);
|
|
if (match) {
|
|
capturedId = match[1];
|
|
console.log(` Found in iframe after nav: ${capturedId}`);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (capturedId)
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
catch (e) {
|
|
// Continue to next selector
|
|
}
|
|
}
|
|
}
|
|
// If still not found, wait longer for async dutchie widget to load
|
|
if (!capturedId) {
|
|
console.log(` Waiting for async content...`);
|
|
await page.waitForTimeout(5000);
|
|
// Check for dutchie script tags
|
|
const scripts = await page.evaluate(() => {
|
|
return Array.from(document.querySelectorAll('script')).map(s => s.src || s.innerHTML?.substring(0, 500));
|
|
});
|
|
for (const script of scripts) {
|
|
if (script && script.includes('dutchie')) {
|
|
for (const pattern of patterns) {
|
|
const match = script.match(pattern);
|
|
if (match && !capturedId) {
|
|
capturedId = match[1];
|
|
console.log(` Found in script: ${capturedId}`);
|
|
break;
|
|
}
|
|
}
|
|
if (capturedId)
|
|
break;
|
|
}
|
|
}
|
|
// Final check of iframes after wait
|
|
if (!capturedId) {
|
|
const finalIframes = await page.evaluate(() => {
|
|
return Array.from(document.querySelectorAll('iframe')).map(f => f.src);
|
|
});
|
|
for (const src of finalIframes) {
|
|
if (src.includes('dutchie')) {
|
|
const match = src.match(/([a-f0-9]{24})/i);
|
|
if (match) {
|
|
capturedId = match[1];
|
|
console.log(` Found in iframe (delayed): ${capturedId}`);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
catch (e) {
|
|
console.log(` Error: ${e.message.substring(0, 80)}`);
|
|
}
|
|
finally {
|
|
await context.close();
|
|
}
|
|
return capturedId;
|
|
}
|
|
async function main() {
|
|
// Get dispensaries missing platform IDs
|
|
const result = await pool.query(`
|
|
SELECT id, name, website
|
|
FROM dispensaries
|
|
WHERE state = 'AZ'
|
|
AND menu_type = 'dutchie'
|
|
AND (platform_dispensary_id IS NULL OR platform_dispensary_id = '')
|
|
AND website IS NOT NULL AND website != ''
|
|
ORDER BY name
|
|
`);
|
|
console.log(`Found ${result.rows.length} dispensaries to process\n`);
|
|
const browser = await playwright_1.chromium.launch({ headless: true });
|
|
const results = [];
|
|
for (const dispensary of result.rows) {
|
|
const platformId = await extractPlatformId(browser, dispensary);
|
|
results.push({ id: dispensary.id, name: dispensary.name, platformId });
|
|
if (platformId) {
|
|
// Update database
|
|
await pool.query('UPDATE dispensaries SET platform_dispensary_id = $1 WHERE id = $2', [platformId, dispensary.id]);
|
|
console.log(` Updated database with ${platformId}`);
|
|
}
|
|
}
|
|
await browser.close();
|
|
console.log('\n=== SUMMARY ===');
|
|
const found = results.filter(r => r.platformId);
|
|
const notFound = results.filter(r => !r.platformId);
|
|
console.log(`\nFound (${found.length}):`);
|
|
found.forEach(r => console.log(` ${r.id}: ${r.name} -> ${r.platformId}`));
|
|
console.log(`\nNot Found (${notFound.length}):`);
|
|
notFound.forEach(r => console.log(` ${r.id}: ${r.name}`));
|
|
await pool.end();
|
|
}
|
|
main().catch(e => {
|
|
console.error('Error:', e);
|
|
process.exit(1);
|
|
});
|