80 lines
2.6 KiB
TypeScript
80 lines
2.6 KiB
TypeScript
import { firefox } from 'playwright';
|
|
import { pool } from './src/db/migrate.js';
|
|
import { getRandomProxy } from './src/utils/proxyManager.js';
|
|
|
|
const dispensaryId = 112;
|
|
|
|
async function main() {
|
|
const dispensaryResult = await pool.query(
|
|
"SELECT id, name, menu_url FROM dispensaries WHERE id = $1",
|
|
[dispensaryId]
|
|
);
|
|
|
|
const menuUrl = dispensaryResult.rows[0].menu_url;
|
|
const proxy = await getRandomProxy();
|
|
|
|
const browser = await firefox.launch({ headless: true });
|
|
const context = await browser.newContext({
|
|
viewport: { width: 1920, height: 1080 },
|
|
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
|
proxy: {
|
|
server: proxy.server,
|
|
username: proxy.username,
|
|
password: proxy.password
|
|
}
|
|
});
|
|
|
|
const page = await context.newPage();
|
|
const brandsUrl = `${menuUrl}/brands`;
|
|
|
|
console.log(`Loading: ${brandsUrl}`);
|
|
await page.goto(brandsUrl, { waitUntil: 'domcontentloaded', timeout: 60000 });
|
|
await page.waitForSelector('a[href*="/brands/"]', { timeout: 45000 });
|
|
await page.waitForTimeout(3000);
|
|
|
|
// Get the HTML structure of the first 5 brand links
|
|
const brandStructures = await page.evaluate(() => {
|
|
const brandLinks = Array.from(document.querySelectorAll('a[href*="/brands/"]')).slice(0, 10);
|
|
|
|
return brandLinks.map(link => {
|
|
const href = link.getAttribute('href') || '';
|
|
const slug = href.split('/brands/')[1]?.replace(/\/$/, '') || '';
|
|
|
|
return {
|
|
slug,
|
|
innerHTML: (link as HTMLElement).innerHTML.substring(0, 300),
|
|
textContent: link.textContent?.trim(),
|
|
childElementCount: link.childElementCount,
|
|
children: Array.from(link.children).map(child => ({
|
|
tag: child.tagName.toLowerCase(),
|
|
class: child.className,
|
|
text: child.textContent?.trim()
|
|
}))
|
|
};
|
|
});
|
|
});
|
|
|
|
console.log('\n' + '='.repeat(80));
|
|
console.log('BRAND LINK STRUCTURES:');
|
|
console.log('='.repeat(80));
|
|
|
|
brandStructures.forEach((brand, idx) => {
|
|
console.log(`\n${idx + 1}. slug: ${brand.slug}`);
|
|
console.log(` textContent: "${brand.textContent}"`);
|
|
console.log(` childElementCount: ${brand.childElementCount}`);
|
|
console.log(` children:`);
|
|
brand.children.forEach((child, childIdx) => {
|
|
console.log(` ${childIdx + 1}. <${child.tag}> class="${child.class}"`);
|
|
console.log(` text: "${child.text}"`);
|
|
});
|
|
console.log(` innerHTML: ${brand.innerHTML.substring(0, 200)}`);
|
|
});
|
|
|
|
console.log('\n' + '='.repeat(80));
|
|
|
|
await browser.close();
|
|
await pool.end();
|
|
}
|
|
|
|
main().catch(console.error);
|