fix: Block images/fonts/media in Puppeteer to save bandwidth
Add request interception to all Puppeteer handlers to block unnecessary resources (images, fonts, media, stylesheets). We only need HTML/JS for the session cookie, then the GraphQL JSON response. This was causing 2.4GB of bandwidth from assets2.dutchie.com - every page visit downloaded all product thumbnails, logos, etc. Files updated: - product-discovery-http.ts - entry-point-discovery.ts - store-discovery-http.ts - store-discovery-state.ts - puppeteer-preflight.ts Note: Product images from payload are still downloaded once to MinIO via image-storage.ts - this only blocks browser-rendered page images. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -150,6 +150,17 @@ export async function runPuppeteerPreflight(
|
|||||||
|
|
||||||
const page = await browser.newPage();
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
// Block unnecessary resources to save bandwidth
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (request: any) => {
|
||||||
|
const resourceType = request.resourceType();
|
||||||
|
if (['image', 'font', 'media', 'stylesheet'].includes(resourceType)) {
|
||||||
|
request.abort();
|
||||||
|
} else {
|
||||||
|
request.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// If proxy has auth, set it up
|
// If proxy has auth, set it up
|
||||||
if (proxyUrl) {
|
if (proxyUrl) {
|
||||||
const proxyUrlParsed = new URL(proxyUrl);
|
const proxyUrlParsed = new URL(proxyUrl);
|
||||||
|
|||||||
@@ -233,6 +233,18 @@ export async function handleEntryPointDiscovery(ctx: TaskContext): Promise<TaskR
|
|||||||
|
|
||||||
const page = await browser.newPage();
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
// Block unnecessary resources to save bandwidth
|
||||||
|
// We only need HTML/JS for session, then GraphQL JSON
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (request: any) => {
|
||||||
|
const resourceType = request.resourceType();
|
||||||
|
if (['image', 'font', 'media', 'stylesheet'].includes(resourceType)) {
|
||||||
|
request.abort();
|
||||||
|
} else {
|
||||||
|
request.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// Setup proxy auth if needed
|
// Setup proxy auth if needed
|
||||||
if (proxyUrl) {
|
if (proxyUrl) {
|
||||||
const proxyUrlParsed = new URL(proxyUrl);
|
const proxyUrlParsed = new URL(proxyUrl);
|
||||||
|
|||||||
@@ -100,6 +100,19 @@ export async function handleProductDiscoveryHttp(ctx: TaskContext): Promise<Task
|
|||||||
|
|
||||||
const page = await browser.newPage();
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
// Block unnecessary resources to save bandwidth
|
||||||
|
// We only need HTML/JS for session, then GraphQL JSON
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (request: any) => {
|
||||||
|
const resourceType = request.resourceType();
|
||||||
|
// Block images, fonts, media, and stylesheets - we don't need them
|
||||||
|
if (['image', 'font', 'media', 'stylesheet'].includes(resourceType)) {
|
||||||
|
request.abort();
|
||||||
|
} else {
|
||||||
|
request.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// Setup proxy auth if needed
|
// Setup proxy auth if needed
|
||||||
if (proxyUrl) {
|
if (proxyUrl) {
|
||||||
const proxyUrlParsed = new URL(proxyUrl);
|
const proxyUrlParsed = new URL(proxyUrl);
|
||||||
|
|||||||
@@ -112,6 +112,18 @@ export async function handleStoreDiscoveryHttp(ctx: TaskContext): Promise<TaskRe
|
|||||||
|
|
||||||
const page = await browser.newPage();
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
// Block unnecessary resources to save bandwidth
|
||||||
|
// We only need HTML/JS for session, then GraphQL JSON
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (request: any) => {
|
||||||
|
const resourceType = request.resourceType();
|
||||||
|
if (['image', 'font', 'media', 'stylesheet'].includes(resourceType)) {
|
||||||
|
request.abort();
|
||||||
|
} else {
|
||||||
|
request.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// Setup proxy auth if needed
|
// Setup proxy auth if needed
|
||||||
if (proxyUrl) {
|
if (proxyUrl) {
|
||||||
const proxyUrlParsed = new URL(proxyUrl);
|
const proxyUrlParsed = new URL(proxyUrl);
|
||||||
|
|||||||
@@ -111,6 +111,17 @@ export async function handleStoreDiscoveryState(ctx: TaskContext): Promise<TaskR
|
|||||||
|
|
||||||
const page = await browser.newPage();
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
// Block unnecessary resources to save bandwidth
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (request: any) => {
|
||||||
|
const resourceType = request.resourceType();
|
||||||
|
if (['image', 'font', 'media', 'stylesheet'].includes(resourceType)) {
|
||||||
|
request.abort();
|
||||||
|
} else {
|
||||||
|
request.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// Setup proxy auth if needed
|
// Setup proxy auth if needed
|
||||||
if (proxyUrl) {
|
if (proxyUrl) {
|
||||||
const proxyUrlParsed = new URL(proxyUrl);
|
const proxyUrlParsed = new URL(proxyUrl);
|
||||||
|
|||||||
Reference in New Issue
Block a user