- Add backend stale process monitoring API (/api/stale-processes) - Add users management route - Add frontend landing page and stale process monitor UI on /scraper-tools - Move old development scripts to backend/archive/ - Update frontend build with new features 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
162 lines
5.8 KiB
TypeScript
162 lines
5.8 KiB
TypeScript
import { firefox } from 'playwright';
|
|
import { pool } from './src/db/migrate.js';
|
|
import { getRandomProxy } from './src/utils/proxyManager.js';
|
|
|
|
async function checkProduct() {
|
|
const proxy = await getRandomProxy();
|
|
if (!proxy) {
|
|
console.log('No proxy available');
|
|
process.exit(1);
|
|
}
|
|
|
|
console.log(`Using proxy: ${proxy.server}`);
|
|
|
|
const browser = await firefox.launch({
|
|
headless: true,
|
|
firefoxUserPrefs: {
|
|
'geo.enabled': true,
|
|
}
|
|
});
|
|
|
|
const context = await browser.newContext({
|
|
viewport: { width: 1920, height: 1080 },
|
|
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
|
|
geolocation: { latitude: 33.4484, longitude: -112.0740 },
|
|
permissions: ['geolocation'],
|
|
proxy: {
|
|
server: proxy.server,
|
|
username: proxy.username,
|
|
password: proxy.password
|
|
}
|
|
});
|
|
|
|
const page = await context.newPage();
|
|
|
|
try {
|
|
console.log('Loading product page...');
|
|
const url = process.argv[2] || 'https://dutchie.com/embedded-menu/AZ-Deeply-Rooted/product/abundant-organics-flower-mylar-abundant-horizon';
|
|
await page.goto(url, {
|
|
waitUntil: 'domcontentloaded',
|
|
timeout: 30000
|
|
});
|
|
|
|
await page.waitForTimeout(5000);
|
|
|
|
const productData = await page.evaluate(() => {
|
|
const data: any = { fields: {} };
|
|
const allText = document.body.textContent || '';
|
|
|
|
// 1. BASIC INFO
|
|
const nameEl = document.querySelector('h1');
|
|
data.fields.name = nameEl?.textContent?.trim() || null;
|
|
|
|
// 2. CATEGORY - look for breadcrumbs or category links
|
|
const breadcrumbs = Array.from(document.querySelectorAll('[class*="breadcrumb"] a, nav a'));
|
|
data.fields.category = breadcrumbs.map(b => b.textContent?.trim()).filter(Boolean);
|
|
|
|
// 3. BRAND
|
|
const brandSelectors = ['[class*="brand"]', '[data-testid*="brand"]', 'span:has-text("Brand")', 'label:has-text("Brand")'];
|
|
for (const sel of brandSelectors) {
|
|
try {
|
|
const el = document.querySelector(sel);
|
|
if (el && el.textContent && !el.textContent.includes('Brand:')) {
|
|
data.fields.brand = el.textContent.trim();
|
|
break;
|
|
}
|
|
} catch {}
|
|
}
|
|
|
|
// 4. PRICES
|
|
const priceMatches = allText.match(/\$(\d+\.?\d*)/g);
|
|
data.fields.prices = priceMatches || [];
|
|
|
|
// 5. THC/CBD CONTENT
|
|
const thcMatch = allText.match(/THC[:\s]*(\d+\.?\d*)\s*%/i);
|
|
const cbdMatch = allText.match(/CBD[:\s]*(\d+\.?\d*)\s*%/i);
|
|
data.fields.thc = thcMatch ? parseFloat(thcMatch[1]) : null;
|
|
data.fields.cbd = cbdMatch ? parseFloat(cbdMatch[1]) : null;
|
|
|
|
// 6. STRAIN TYPE
|
|
if (allText.match(/\bindica\b/i)) data.fields.strainType = 'Indica';
|
|
else if (allText.match(/\bsativa\b/i)) data.fields.strainType = 'Sativa';
|
|
else if (allText.match(/\bhybrid\b/i)) data.fields.strainType = 'Hybrid';
|
|
|
|
// 7. WEIGHT/SIZE OPTIONS
|
|
const weights = allText.matchAll(/(\d+\.?\d*\s*(?:g|oz|mg|ml|gram|ounce))/gi);
|
|
data.fields.weights = Array.from(weights).map(m => m[1].trim());
|
|
|
|
// 8. DESCRIPTION
|
|
const descSelectors = ['[class*="description"]', '[class*="Description"]', 'p[class*="product"]'];
|
|
for (const sel of descSelectors) {
|
|
const el = document.querySelector(sel);
|
|
if (el?.textContent && el.textContent.length > 20) {
|
|
data.fields.description = el.textContent.trim().substring(0, 500);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// 9. EFFECTS
|
|
const effectNames = ['Relaxed', 'Happy', 'Euphoric', 'Uplifted', 'Creative', 'Energetic', 'Focused', 'Calm', 'Sleepy', 'Hungry'];
|
|
data.fields.effects = effectNames.filter(e => allText.match(new RegExp(`\\b${e}\\b`, 'i')));
|
|
|
|
// 10. TERPENES
|
|
const terpeneNames = ['Myrcene', 'Limonene', 'Caryophyllene', 'Pinene', 'Linalool', 'Humulene'];
|
|
data.fields.terpenes = terpeneNames.filter(t => allText.match(new RegExp(`\\b${t}\\b`, 'i')));
|
|
|
|
// 11. FLAVORS
|
|
const flavorNames = ['Sweet', 'Citrus', 'Earthy', 'Pine', 'Berry', 'Diesel', 'Sour', 'Floral', 'Spicy'];
|
|
data.fields.flavors = flavorNames.filter(f => allText.match(new RegExp(`\\b${f}\\b`, 'i')));
|
|
|
|
// 12. SPECIAL INFO
|
|
data.fields.hasSpecialText = allText.includes('Special') || allText.includes('Sale') || allText.includes('Deal');
|
|
const endsMatch = allText.match(/(?:ends?|expires?)\s+(?:in\s+)?(\d+)\s+(min|hour|day)/i);
|
|
data.fields.specialEndsIn = endsMatch ? `${endsMatch[1]} ${endsMatch[2]}` : null;
|
|
|
|
// 13. IMAGE URLS
|
|
const images = Array.from(document.querySelectorAll('img[src*="dutchie"]'));
|
|
data.fields.imageUrls = images.map(img => (img as HTMLImageElement).src).filter(Boolean);
|
|
|
|
// 14. ALL VISIBLE TEXT (for debugging)
|
|
data.allVisibleText = allText.substring(0, 1000);
|
|
|
|
// 15. STRUCTURED DATA FROM SCRIPTS
|
|
const scripts = Array.from(document.querySelectorAll('script'));
|
|
data.structuredData = {};
|
|
|
|
for (const script of scripts) {
|
|
const content = script.textContent || '';
|
|
|
|
const idMatch = content.match(/"id":"([a-f0-9-]+)"/);
|
|
if (idMatch && idMatch[1].length > 10) {
|
|
data.structuredData.productId = idMatch[1];
|
|
}
|
|
|
|
const variantMatch = content.match(/"variantId":"([^"]+)"/);
|
|
if (variantMatch) {
|
|
data.structuredData.variantId = variantMatch[1];
|
|
}
|
|
|
|
const categoryMatch = content.match(/"category":"([^"]+)"/);
|
|
if (categoryMatch) {
|
|
data.structuredData.category = categoryMatch[1];
|
|
}
|
|
}
|
|
|
|
return data;
|
|
});
|
|
|
|
console.log('\n=== PRODUCT DATA (Time: ' + new Date().toISOString() + ') ===');
|
|
console.log(JSON.stringify(productData, null, 2));
|
|
|
|
await browser.close();
|
|
await pool.end();
|
|
} catch (error) {
|
|
console.error('Error:', error);
|
|
await browser.close();
|
|
await pool.end();
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
checkProduct();
|