import { firefox } from 'playwright'; import { pool } from './src/db/migrate.js'; import { getRandomProxy } from './src/utils/proxyManager.js'; const dispensaryId = parseInt(process.argv[2] || '112', 10); interface Brand { slug: string; name: string; url: string; } async function scrapeBrandsList(menuUrl: string, context: any, page: any): Promise { try { const brandsUrl = `${menuUrl}/brands`; console.log(`šŸ“„ Loading brands page: ${brandsUrl}`); await page.goto(brandsUrl, { waitUntil: 'domcontentloaded', timeout: 60000 }); console.log(`ā³ Waiting for brands to render...`); await page.waitForSelector('a[href*="/brands/"]', { timeout: 45000 }); console.log(`āœ… Brands appeared!`); await page.waitForTimeout(3000); // Scroll to load all brands console.log(`šŸ“œ Scrolling to load all brands...`); let previousHeight = 0; let scrollAttempts = 0; const maxScrolls = 10; while (scrollAttempts < maxScrolls) { await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); await page.waitForTimeout(1500); const currentHeight = await page.evaluate(() => document.body.scrollHeight); if (currentHeight === previousHeight) break; previousHeight = currentHeight; scrollAttempts++; } // Extract all brand links const brands = await page.evaluate(() => { const brandLinks = Array.from(document.querySelectorAll('a[href*="/brands/"]')); const extracted = brandLinks.map(link => { const href = link.getAttribute('href') || ''; const slug = href.split('/brands/')[1]?.replace(/\/$/, '') || ''; // Get brand name from the ContentWrapper div to avoid placeholder letter duplication const contentWrapper = link.querySelector('[class*="ContentWrapper"]'); const name = contentWrapper?.textContent?.trim() || link.textContent?.trim() || slug; return { slug, name, url: href.startsWith('http') ? href : href }; }); // Filter out duplicates and invalid entries const seen = new Set(); const unique = extracted.filter(b => { if (!b.slug || !b.name || seen.has(b.slug)) return false; seen.add(b.slug); return true; }); return unique; }); console.log(`āœ… Found ${brands.length} total brands`); return brands; } catch (error: any) { console.error(`āŒ Error scraping brands list:`, error.message); return []; } } async function main() { console.log(`\n${'='.repeat(60)}`); console.log(`šŸ­ POPULATING JOB QUEUE`); console.log(` Dispensary ID: ${dispensaryId}`); console.log(`${'='.repeat(60)}\n`); // Get dispensary info const dispensaryResult = await pool.query( "SELECT id, name, menu_url FROM dispensaries WHERE id = $1", [dispensaryId] ); if (dispensaryResult.rows.length === 0) { console.error(`āŒ Dispensary ID ${dispensaryId} not found`); process.exit(1); } const menuUrl = dispensaryResult.rows[0].menu_url; console.log(`āœ… Dispensary: ${dispensaryResult.rows[0].name}`); console.log(` Menu URL: ${menuUrl}\n`); // Get proxy const proxy = await getRandomProxy(); if (!proxy) { console.log(`āŒ No proxy available`); process.exit(1); } console.log(`šŸ” Using proxy: ${proxy.server}\n`); // Launch browser const browser = await firefox.launch({ headless: true }); const context = await browser.newContext({ viewport: { width: 1920, height: 1080 }, userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36', proxy: { server: proxy.server, username: proxy.username, password: proxy.password } }); const page = await context.newPage(); // Get all brands const allBrands = await scrapeBrandsList(menuUrl, context, page); if (allBrands.length === 0) { console.log(`āŒ No brands found`); await browser.close(); process.exit(1); } console.log(`\nšŸ“‹ Found ${allBrands.length} brands. Populating job queue...\n`); // Insert jobs into database let inserted = 0; let skipped = 0; for (const brand of allBrands) { try { await pool.query(` INSERT INTO brand_scrape_jobs (dispensary_id, brand_slug, brand_name, status) VALUES ($1, $2, $3, 'pending') ON CONFLICT (dispensary_id, brand_slug) DO NOTHING `, [dispensaryId, brand.slug, brand.name]); const result = await pool.query( 'SELECT id FROM brand_scrape_jobs WHERE dispensary_id = $1 AND brand_slug = $2', [dispensaryId, brand.slug] ); if (result.rows.length > 0) { inserted++; if (inserted % 10 === 0) { console.log(` Inserted ${inserted}/${allBrands.length} jobs...`); } } else { skipped++; } } catch (error: any) { console.error(`āŒ Error inserting job for ${brand.name}:`, error.message); } } console.log(`\n${'='.repeat(60)}`); console.log(`āœ… JOB QUEUE POPULATED`); console.log(` Total brands: ${allBrands.length}`); console.log(` Jobs inserted: ${inserted}`); console.log(` Jobs skipped (already exist): ${skipped}`); console.log(`${'='.repeat(60)}\n`); await browser.close(); await pool.end(); } main().catch(console.error);