- Move deprecated directories to src/_deprecated/: - hydration/ (old pipeline approach) - scraper-v2/ (old Puppeteer scraper) - canonical-hydration/ (merged into tasks) - Unused services: availability, crawler-logger, geolocation, etc - Unused utils: age-gate-playwright, HomepageValidator, stealthBrowser - Archive outdated docs to docs/_archive/: - ANALYTICS_RUNBOOK.md - ANALYTICS_V2_EXAMPLES.md - BRAND_INTELLIGENCE_API.md - CRAWL_PIPELINE.md - TASK_WORKFLOW_2024-12-10.md - WORKER_TASK_ARCHITECTURE.md - ORGANIC_SCRAPING_GUIDE.md - Add docs/CODEBASE_MAP.md as single source of truth - Add warning files to deprecated/archived directories - Slim down CLAUDE.md to essential rules only 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
198 lines
6.3 KiB
TypeScript
198 lines
6.3 KiB
TypeScript
import { Page, Browser, BrowserContext } from 'playwright';
|
|
import { logger } from '../services/logger';
|
|
|
|
/**
|
|
* Detects if a Playwright page has an age verification gate
|
|
*/
|
|
export async function hasAgeGatePlaywright(page: Page): Promise<boolean> {
|
|
try {
|
|
const url = page.url();
|
|
const bodyText = await page.textContent('body') || '';
|
|
|
|
const hasAgeVerification =
|
|
url.includes('/age-gate') ||
|
|
bodyText.includes('age verification') ||
|
|
bodyText.includes('Please select your state') ||
|
|
bodyText.includes('are you 21') ||
|
|
bodyText.includes('are you 18') ||
|
|
bodyText.includes('Enter your date of birth') ||
|
|
bodyText.toLowerCase().includes('verify your age');
|
|
|
|
return hasAgeVerification;
|
|
} catch (err) {
|
|
logger.warn('age-gate', `Error detecting age gate: ${err}`);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Attempts to bypass an age gate using Playwright
|
|
* Handles multiple age gate patterns including Curaleaf's complex React-based gate
|
|
*
|
|
* @param page - Playwright page object
|
|
* @param state - State to select (e.g., 'Arizona', 'California')
|
|
* @returns Promise<boolean> - true if bypass succeeded, false otherwise
|
|
*/
|
|
export async function bypassAgeGatePlaywright(
|
|
page: Page,
|
|
state: string = 'Arizona'
|
|
): Promise<boolean> {
|
|
try {
|
|
const hasGate = await hasAgeGatePlaywright(page);
|
|
|
|
if (!hasGate) {
|
|
logger.info('age-gate', 'No age gate detected');
|
|
return true;
|
|
}
|
|
|
|
logger.info('age-gate', `Age gate detected - attempting to bypass with state: ${state}...`);
|
|
|
|
// Wait for age gate to fully render
|
|
await page.waitForTimeout(2000);
|
|
|
|
// Method 1: Curaleaf-style (state dropdown + "I'm over 21" button)
|
|
try {
|
|
const stateButton = page.locator('button#state, button[id="state"]').first();
|
|
const stateButtonExists = await stateButton.count() > 0;
|
|
|
|
if (stateButtonExists) {
|
|
logger.info('age-gate', 'Found Curaleaf-style state dropdown...');
|
|
await stateButton.click();
|
|
await page.waitForTimeout(1000);
|
|
|
|
// Select state
|
|
const stateOption = page.locator('[role="option"]').filter({ hasText: new RegExp(`^${state}$`, 'i') });
|
|
const stateExists = await stateOption.count() > 0;
|
|
|
|
if (stateExists) {
|
|
logger.info('age-gate', `Clicking ${state} option...`);
|
|
await stateOption.first().click();
|
|
await page.waitForTimeout(2000);
|
|
|
|
// Look for "I'm over 21" button
|
|
const ageButton = page.locator('button').filter({ hasText: /I'm over 21|I am 21|I'm 21|over 21/i });
|
|
const ageButtonExists = await ageButton.count() > 0;
|
|
|
|
if (ageButtonExists) {
|
|
logger.info('age-gate', 'Clicking age verification button...');
|
|
await ageButton.first().click();
|
|
await page.waitForLoadState('domcontentloaded', { timeout: 15000 });
|
|
await page.waitForTimeout(3000);
|
|
|
|
// Check if we successfully bypassed
|
|
const finalUrl = page.url();
|
|
if (!finalUrl.includes('/age-gate')) {
|
|
logger.info('age-gate', `✅ Age gate bypass successful`);
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} catch (e) {
|
|
logger.warn('age-gate', `Curaleaf method failed: ${e}`);
|
|
}
|
|
|
|
// Method 2: Simple "Yes" or "I'm 21" button (for simpler age gates)
|
|
try {
|
|
const simpleButton = page.locator('button, a, [role="button"]').filter({
|
|
hasText: /yes|i am 21|i'm 21|enter the site|continue|confirm/i
|
|
});
|
|
const simpleExists = await simpleButton.count() > 0;
|
|
|
|
if (simpleExists) {
|
|
logger.info('age-gate', 'Found simple age gate button...');
|
|
await simpleButton.first().click();
|
|
await page.waitForLoadState('domcontentloaded', { timeout: 10000 });
|
|
await page.waitForTimeout(2000);
|
|
|
|
const finalUrl = page.url();
|
|
if (!finalUrl.includes('/age-gate')) {
|
|
logger.info('age-gate', `✅ Age gate bypass successful`);
|
|
return true;
|
|
}
|
|
}
|
|
} catch (e) {
|
|
logger.warn('age-gate', `Simple button method failed: ${e}`);
|
|
}
|
|
|
|
// Method 3: Standard select dropdown
|
|
try {
|
|
const selectExists = await page.locator('select').count() > 0;
|
|
if (selectExists) {
|
|
logger.info('age-gate', 'Found select dropdown...');
|
|
const select = page.locator('select').first();
|
|
await select.selectOption({ label: state });
|
|
await page.waitForTimeout(1000);
|
|
|
|
// Look for submit button
|
|
const submitButton = page.locator('button[type="submit"], input[type="submit"]');
|
|
const submitExists = await submitButton.count() > 0;
|
|
|
|
if (submitExists) {
|
|
await submitButton.first().click();
|
|
await page.waitForLoadState('domcontentloaded', { timeout: 10000 });
|
|
await page.waitForTimeout(2000);
|
|
|
|
const finalUrl = page.url();
|
|
if (!finalUrl.includes('/age-gate')) {
|
|
logger.info('age-gate', `✅ Age gate bypass successful`);
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
} catch (e) {
|
|
logger.warn('age-gate', `Select dropdown method failed: ${e}`);
|
|
}
|
|
|
|
// Verify final state
|
|
const finalUrl = page.url();
|
|
if (finalUrl.includes('/age-gate')) {
|
|
logger.error('age-gate', `❌ Age gate bypass failed - still at: ${finalUrl}`);
|
|
return false;
|
|
}
|
|
|
|
logger.info('age-gate', `✅ Age gate bypass successful`);
|
|
return true;
|
|
|
|
} catch (err) {
|
|
logger.error('age-gate', `Error bypassing age gate: ${err}`);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Helper to detect the state from a store URL
|
|
*/
|
|
export function detectStateFromUrlPlaywright(url: string): string {
|
|
const stateMap: { [key: string]: string } = {
|
|
'-az-': 'Arizona',
|
|
'arizona': 'Arizona',
|
|
'-ca-': 'California',
|
|
'california': 'California',
|
|
'-co-': 'Colorado',
|
|
'colorado': 'Colorado',
|
|
'-fl-': 'Florida',
|
|
'florida': 'Florida',
|
|
'-il-': 'Illinois',
|
|
'illinois': 'Illinois',
|
|
'-ma-': 'Massachusetts',
|
|
'-mi-': 'Michigan',
|
|
'-nv-': 'Nevada',
|
|
'-nj-': 'New Jersey',
|
|
'-ny-': 'New York',
|
|
'-or-': 'Oregon',
|
|
'-pa-': 'Pennsylvania',
|
|
'-wa-': 'Washington',
|
|
};
|
|
|
|
const lowerUrl = url.toLowerCase();
|
|
for (const [pattern, stateName] of Object.entries(stateMap)) {
|
|
if (lowerUrl.includes(pattern)) {
|
|
return stateName;
|
|
}
|
|
}
|
|
|
|
// Default to Arizona
|
|
return 'Arizona';
|
|
}
|