"use strict"; /** * Platform ID Extractor - Standalone script for extracting Dutchie platform IDs * * This script visits dispensary websites to capture their Dutchie retailerId * by intercepting network requests to the Dutchie GraphQL API. * * It does NOT use the main orchestrator - it's a standalone browser-based tool. */ Object.defineProperty(exports, "__esModule", { value: true }); const playwright_1 = require("playwright"); const pg_1 = require("pg"); const pool = new pg_1.Pool({ connectionString: process.env.DATABASE_URL }); async function extractPlatformId(browser, dispensary) { let capturedId = null; let captureSource = null; let errorMsg = null; const context = await browser.newContext({ userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' }); const page = await context.newPage(); // Patterns to match retailer IDs in various formats const idPatterns = [ /["']retailerId["']\s*:\s*["']([a-f0-9]{24})["']/i, /["']dispensaryId["']\s*:\s*["']([a-f0-9]{24})["']/i, /retailer["']?\s*:\s*["']([a-f0-9]{24})["']/i, /dutchie\.com\/embedded-menu\/([a-f0-9]{24})/i, /dutchie\.com\/dispensary\/([a-f0-9]{24})/i, /plus\.dutchie\.com\/plus\/([a-f0-9]{24})/i, /retailerId=([a-f0-9]{24})/i, /\/([a-f0-9]{24})(?:\/|\?|$)/i, // Generic ID in URL path ]; // Intercept network requests page.on('request', (request) => { if (capturedId) return; const url = request.url(); if (url.includes('dutchie') || url.includes('api.dutchie')) { // Check URL for retailer ID for (const pattern of idPatterns) { const match = url.match(pattern); if (match && match[1] && match[1].length === 24) { capturedId = match[1]; captureSource = 'request_url'; break; } } // Check POST data const postData = request.postData(); if (postData && !capturedId) { for (const pattern of idPatterns) { const match = postData.match(pattern); if (match && match[1] && match[1].length === 24) { capturedId = match[1]; captureSource = 'request_body'; break; } } } } }); try { console.log(`\n[${dispensary.id}] ${dispensary.name}: ${dispensary.website}`); // Load main page await page.goto(dispensary.website, { waitUntil: 'domcontentloaded', timeout: 25000 }); await page.waitForTimeout(2000); // Check page content if (!capturedId) { const content = await page.content(); for (const pattern of idPatterns) { const match = content.match(pattern); if (match && match[1] && match[1].length === 24) { capturedId = match[1]; captureSource = 'page_content'; break; } } } // Check __NEXT_DATA__ if (!capturedId) { const nextData = await page.evaluate(() => { const el = document.getElementById('__NEXT_DATA__'); return el?.textContent || null; }); if (nextData) { for (const pattern of idPatterns) { const match = nextData.match(pattern); if (match && match[1] && match[1].length === 24) { capturedId = match[1]; captureSource = '__NEXT_DATA__'; break; } } } } // Check iframes if (!capturedId) { const iframes = await page.evaluate(() => { return Array.from(document.querySelectorAll('iframe')).map(f => f.src); }); for (const src of iframes) { if (src.includes('dutchie')) { const match = src.match(/([a-f0-9]{24})/i); if (match) { capturedId = match[1]; captureSource = 'iframe_src'; break; } } } } // Check scripts if (!capturedId) { const scripts = await page.evaluate(() => { return Array.from(document.querySelectorAll('script')) .map(s => s.src || s.innerHTML?.substring(0, 1000)) .filter(Boolean); }); for (const script of scripts) { if (script && (script.includes('dutchie') || script.includes('retailerId'))) { for (const pattern of idPatterns) { const match = script.match(pattern); if (match && match[1] && match[1].length === 24) { capturedId = match[1]; captureSource = 'script'; break; } } if (capturedId) break; } } } // Try navigating to menu/shop page if (!capturedId) { const menuLink = await page.evaluate(() => { const links = Array.from(document.querySelectorAll('a')); for (const link of links) { const href = link.href?.toLowerCase() || ''; const text = link.textContent?.toLowerCase() || ''; if (href.includes('menu') || href.includes('shop') || href.includes('order') || text.includes('menu') || text.includes('shop') || text.includes('order')) { return link.href; } } return null; }); if (menuLink && !menuLink.startsWith('javascript:')) { try { console.log(` -> Following menu link: ${menuLink.substring(0, 60)}...`); await page.goto(menuLink, { waitUntil: 'domcontentloaded', timeout: 20000 }); await page.waitForTimeout(3000); // Recheck all sources on new page const newContent = await page.content(); for (const pattern of idPatterns) { const match = newContent.match(pattern); if (match && match[1] && match[1].length === 24) { capturedId = match[1]; captureSource = 'menu_page_content'; break; } } // Check iframes on new page if (!capturedId) { const newIframes = await page.evaluate(() => { return Array.from(document.querySelectorAll('iframe')).map(f => f.src); }); for (const src of newIframes) { if (src.includes('dutchie')) { const match = src.match(/([a-f0-9]{24})/i); if (match) { capturedId = match[1]; captureSource = 'menu_page_iframe'; break; } } } } } catch (navError) { // Menu navigation failed, continue } } } // Final wait for async content if (!capturedId) { await page.waitForTimeout(3000); // Final iframe check const finalIframes = await page.evaluate(() => { return Array.from(document.querySelectorAll('iframe')).map(f => f.src); }); for (const src of finalIframes) { if (src.includes('dutchie')) { const match = src.match(/([a-f0-9]{24})/i); if (match) { capturedId = match[1]; captureSource = 'delayed_iframe'; break; } } } } if (capturedId) { console.log(` ✓ Found: ${capturedId} (${captureSource})`); } else { console.log(` ✗ Not found`); } } catch (e) { errorMsg = e.message.substring(0, 100); console.log(` ✗ Error: ${errorMsg}`); } finally { await context.close(); } return { id: dispensary.id, name: dispensary.name, website: dispensary.website, platformId: capturedId, source: captureSource, error: errorMsg }; } async function main() { // Get specific dispensary ID from command line, or process all missing const targetId = process.argv[2] ? parseInt(process.argv[2], 10) : null; let query; let params = []; if (targetId) { query = ` SELECT id, name, website FROM dispensaries WHERE id = $1 AND website IS NOT NULL AND website != '' `; params = [targetId]; } else { query = ` SELECT id, name, website FROM dispensaries WHERE state = 'AZ' AND menu_type = 'dutchie' AND (platform_dispensary_id IS NULL OR platform_dispensary_id = '') AND website IS NOT NULL AND website != '' ORDER BY name `; } const result = await pool.query(query, params); if (result.rows.length === 0) { console.log('No dispensaries to process'); await pool.end(); return; } console.log(`\n=== Platform ID Extractor ===`); console.log(`Processing ${result.rows.length} dispensaries...\n`); const browser = await playwright_1.chromium.launch({ headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox'] }); const results = []; for (const dispensary of result.rows) { const extractionResult = await extractPlatformId(browser, dispensary); results.push(extractionResult); // Update database immediately if found if (extractionResult.platformId) { await pool.query('UPDATE dispensaries SET platform_dispensary_id = $1 WHERE id = $2', [extractionResult.platformId, extractionResult.id]); } } await browser.close(); // Summary console.log('\n' + '='.repeat(60)); console.log('SUMMARY'); console.log('='.repeat(60)); const found = results.filter(r => r.platformId); const notFound = results.filter(r => !r.platformId); console.log(`\nFound: ${found.length}/${results.length}`); if (found.length > 0) { console.log('\nSuccessful extractions:'); found.forEach(r => console.log(` [${r.id}] ${r.name} -> ${r.platformId} (${r.source})`)); } if (notFound.length > 0) { console.log(`\nNot found: ${notFound.length}`); notFound.forEach(r => { const reason = r.error || 'No Dutchie ID detected'; console.log(` [${r.id}] ${r.name}: ${reason}`); }); } await pool.end(); } main().catch(e => { console.error('Fatal error:', e); process.exit(1); });