Force new git SHA to avoid CI scientific notation bug. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
180 lines
5.3 KiB
TypeScript
180 lines
5.3 KiB
TypeScript
import puppeteer from 'puppeteer';
|
|
|
|
async function sleep(ms: number): Promise<void> {
|
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
}
|
|
|
|
async function main() {
|
|
console.log('Loading ALL brands from https://shop.bestdispensary.com/brands');
|
|
|
|
const browser = await puppeteer.launch({
|
|
headless: true,
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
|
});
|
|
|
|
const page = await browser.newPage();
|
|
await page.setViewport({ width: 1920, height: 1080 });
|
|
|
|
await page.setRequestInterception(true);
|
|
page.on('request', (req) => {
|
|
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
|
req.abort();
|
|
} else {
|
|
req.continue();
|
|
}
|
|
});
|
|
|
|
await page.goto('https://shop.bestdispensary.com/brands', {
|
|
waitUntil: 'networkidle2',
|
|
timeout: 60000
|
|
});
|
|
await sleep(3000);
|
|
|
|
// Bypass age gate
|
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
|
if (ageGate) {
|
|
console.log('Age gate detected, bypassing...');
|
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
|
if (btn) await btn.click();
|
|
await sleep(2000);
|
|
}
|
|
|
|
console.log('Current URL:', page.url());
|
|
|
|
// Get initial brand count
|
|
let brandCount = await page.evaluate(() => {
|
|
const seen = new Set<string>();
|
|
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
|
|
const href = a.getAttribute('href');
|
|
if (href) seen.add(href);
|
|
});
|
|
return seen.size;
|
|
});
|
|
console.log(`Initial brand count: ${brandCount}`);
|
|
|
|
// Aggressive scrolling
|
|
console.log('\nScrolling to load ALL brands...');
|
|
let previousCount = 0;
|
|
let sameCount = 0;
|
|
|
|
for (let i = 0; i < 50; i++) {
|
|
// Scroll to bottom
|
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
|
await sleep(1000);
|
|
|
|
brandCount = await page.evaluate(() => {
|
|
const seen = new Set<string>();
|
|
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
|
|
const href = a.getAttribute('href');
|
|
if (href) seen.add(href);
|
|
});
|
|
return seen.size;
|
|
});
|
|
|
|
if (brandCount === previousCount) {
|
|
sameCount++;
|
|
if (sameCount >= 5) {
|
|
console.log(` Scroll ${i+1}: ${brandCount} brands (stopping - no change)`);
|
|
break;
|
|
}
|
|
} else {
|
|
sameCount = 0;
|
|
console.log(` Scroll ${i+1}: ${brandCount} brands`);
|
|
}
|
|
previousCount = brandCount;
|
|
}
|
|
|
|
// Get all unique brands
|
|
const brands = await page.evaluate(() => {
|
|
const results: { name: string; href: string }[] = [];
|
|
const seen = new Set<string>();
|
|
|
|
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
|
|
const href = a.getAttribute('href') || '';
|
|
const normalizedHref = href.toLowerCase();
|
|
if (seen.has(normalizedHref)) return;
|
|
seen.add(normalizedHref);
|
|
|
|
// Get brand name
|
|
let name = '';
|
|
const heading = a.querySelector('h3, h4, h5, [class*="name"]');
|
|
if (heading) {
|
|
name = heading.textContent?.trim() || '';
|
|
}
|
|
if (!name) {
|
|
name = a.textContent?.trim().split('\n')[0] || '';
|
|
}
|
|
if (!name) {
|
|
name = href.split('/brand/')[1]?.replace(/-/g, ' ') || '';
|
|
}
|
|
|
|
results.push({ name: name.slice(0, 50), href });
|
|
});
|
|
|
|
return results.sort((a, b) => a.name.localeCompare(b.name));
|
|
});
|
|
|
|
console.log('\n' + '='.repeat(60));
|
|
console.log('TOTAL BRANDS FOUND: ' + brands.length);
|
|
console.log('='.repeat(60));
|
|
|
|
brands.forEach((b, i) => {
|
|
const num = (i + 1).toString().padStart(3, ' ');
|
|
console.log(`${num}. ${b.name} (${b.href})`);
|
|
});
|
|
|
|
// Now visit each brand page and count products
|
|
console.log('\n' + '='.repeat(60));
|
|
console.log('PRODUCTS PER BRAND');
|
|
console.log('='.repeat(60));
|
|
|
|
const brandProducts: { brand: string; products: number }[] = [];
|
|
|
|
for (let i = 0; i < brands.length; i++) {
|
|
const brand = brands[i];
|
|
try {
|
|
const brandUrl = brand.href.startsWith('http')
|
|
? brand.href
|
|
: `https://shop.bestdispensary.com${brand.href}`;
|
|
|
|
await page.goto(brandUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
await sleep(1500);
|
|
|
|
// Scroll to load products
|
|
for (let j = 0; j < 10; j++) {
|
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
|
await sleep(800);
|
|
}
|
|
|
|
const productCount = await page.evaluate(() => {
|
|
const seen = new Set<string>();
|
|
document.querySelectorAll('a[href*="/product/"]').forEach((a: Element) => {
|
|
const img = a.querySelector('img');
|
|
const name = img?.getAttribute('alt') || a.textContent?.trim() || '';
|
|
if (name) seen.add(name);
|
|
});
|
|
return seen.size;
|
|
});
|
|
|
|
brandProducts.push({ brand: brand.name, products: productCount });
|
|
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ${productCount} products`);
|
|
|
|
} catch (err: any) {
|
|
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ERROR - ${err.message?.slice(0, 30)}`);
|
|
brandProducts.push({ brand: brand.name, products: 0 });
|
|
}
|
|
}
|
|
|
|
// Summary
|
|
const totalProducts = brandProducts.reduce((sum, b) => sum + b.products, 0);
|
|
console.log('\n' + '='.repeat(60));
|
|
console.log('SUMMARY');
|
|
console.log('='.repeat(60));
|
|
console.log(`Total brands: ${brands.length}`);
|
|
console.log(`Total products: ${totalProducts}`);
|
|
|
|
await browser.close();
|
|
}
|
|
|
|
main().catch(console.error);
|