Files
cannaiq/backend/src/_deprecated/utils/age-gate-playwright.ts
Kelly a35976b9e9 chore: Clean up deprecated code and docs
- Move deprecated directories to src/_deprecated/:
  - hydration/ (old pipeline approach)
  - scraper-v2/ (old Puppeteer scraper)
  - canonical-hydration/ (merged into tasks)
  - Unused services: availability, crawler-logger, geolocation, etc
  - Unused utils: age-gate-playwright, HomepageValidator, stealthBrowser

- Archive outdated docs to docs/_archive/:
  - ANALYTICS_RUNBOOK.md
  - ANALYTICS_V2_EXAMPLES.md
  - BRAND_INTELLIGENCE_API.md
  - CRAWL_PIPELINE.md
  - TASK_WORKFLOW_2024-12-10.md
  - WORKER_TASK_ARCHITECTURE.md
  - ORGANIC_SCRAPING_GUIDE.md

- Add docs/CODEBASE_MAP.md as single source of truth
- Add warning files to deprecated/archived directories
- Slim down CLAUDE.md to essential rules only

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-11 22:17:40 -07:00

198 lines
6.3 KiB
TypeScript

import { Page, Browser, BrowserContext } from 'playwright';
import { logger } from '../services/logger';
/**
* Detects if a Playwright page has an age verification gate
*/
export async function hasAgeGatePlaywright(page: Page): Promise<boolean> {
try {
const url = page.url();
const bodyText = await page.textContent('body') || '';
const hasAgeVerification =
url.includes('/age-gate') ||
bodyText.includes('age verification') ||
bodyText.includes('Please select your state') ||
bodyText.includes('are you 21') ||
bodyText.includes('are you 18') ||
bodyText.includes('Enter your date of birth') ||
bodyText.toLowerCase().includes('verify your age');
return hasAgeVerification;
} catch (err) {
logger.warn('age-gate', `Error detecting age gate: ${err}`);
return false;
}
}
/**
* Attempts to bypass an age gate using Playwright
* Handles multiple age gate patterns including Curaleaf's complex React-based gate
*
* @param page - Playwright page object
* @param state - State to select (e.g., 'Arizona', 'California')
* @returns Promise<boolean> - true if bypass succeeded, false otherwise
*/
export async function bypassAgeGatePlaywright(
page: Page,
state: string = 'Arizona'
): Promise<boolean> {
try {
const hasGate = await hasAgeGatePlaywright(page);
if (!hasGate) {
logger.info('age-gate', 'No age gate detected');
return true;
}
logger.info('age-gate', `Age gate detected - attempting to bypass with state: ${state}...`);
// Wait for age gate to fully render
await page.waitForTimeout(2000);
// Method 1: Curaleaf-style (state dropdown + "I'm over 21" button)
try {
const stateButton = page.locator('button#state, button[id="state"]').first();
const stateButtonExists = await stateButton.count() > 0;
if (stateButtonExists) {
logger.info('age-gate', 'Found Curaleaf-style state dropdown...');
await stateButton.click();
await page.waitForTimeout(1000);
// Select state
const stateOption = page.locator('[role="option"]').filter({ hasText: new RegExp(`^${state}$`, 'i') });
const stateExists = await stateOption.count() > 0;
if (stateExists) {
logger.info('age-gate', `Clicking ${state} option...`);
await stateOption.first().click();
await page.waitForTimeout(2000);
// Look for "I'm over 21" button
const ageButton = page.locator('button').filter({ hasText: /I'm over 21|I am 21|I'm 21|over 21/i });
const ageButtonExists = await ageButton.count() > 0;
if (ageButtonExists) {
logger.info('age-gate', 'Clicking age verification button...');
await ageButton.first().click();
await page.waitForLoadState('domcontentloaded', { timeout: 15000 });
await page.waitForTimeout(3000);
// Check if we successfully bypassed
const finalUrl = page.url();
if (!finalUrl.includes('/age-gate')) {
logger.info('age-gate', `✅ Age gate bypass successful`);
return true;
}
}
}
}
} catch (e) {
logger.warn('age-gate', `Curaleaf method failed: ${e}`);
}
// Method 2: Simple "Yes" or "I'm 21" button (for simpler age gates)
try {
const simpleButton = page.locator('button, a, [role="button"]').filter({
hasText: /yes|i am 21|i'm 21|enter the site|continue|confirm/i
});
const simpleExists = await simpleButton.count() > 0;
if (simpleExists) {
logger.info('age-gate', 'Found simple age gate button...');
await simpleButton.first().click();
await page.waitForLoadState('domcontentloaded', { timeout: 10000 });
await page.waitForTimeout(2000);
const finalUrl = page.url();
if (!finalUrl.includes('/age-gate')) {
logger.info('age-gate', `✅ Age gate bypass successful`);
return true;
}
}
} catch (e) {
logger.warn('age-gate', `Simple button method failed: ${e}`);
}
// Method 3: Standard select dropdown
try {
const selectExists = await page.locator('select').count() > 0;
if (selectExists) {
logger.info('age-gate', 'Found select dropdown...');
const select = page.locator('select').first();
await select.selectOption({ label: state });
await page.waitForTimeout(1000);
// Look for submit button
const submitButton = page.locator('button[type="submit"], input[type="submit"]');
const submitExists = await submitButton.count() > 0;
if (submitExists) {
await submitButton.first().click();
await page.waitForLoadState('domcontentloaded', { timeout: 10000 });
await page.waitForTimeout(2000);
const finalUrl = page.url();
if (!finalUrl.includes('/age-gate')) {
logger.info('age-gate', `✅ Age gate bypass successful`);
return true;
}
}
}
} catch (e) {
logger.warn('age-gate', `Select dropdown method failed: ${e}`);
}
// Verify final state
const finalUrl = page.url();
if (finalUrl.includes('/age-gate')) {
logger.error('age-gate', `❌ Age gate bypass failed - still at: ${finalUrl}`);
return false;
}
logger.info('age-gate', `✅ Age gate bypass successful`);
return true;
} catch (err) {
logger.error('age-gate', `Error bypassing age gate: ${err}`);
return false;
}
}
/**
* Helper to detect the state from a store URL
*/
export function detectStateFromUrlPlaywright(url: string): string {
const stateMap: { [key: string]: string } = {
'-az-': 'Arizona',
'arizona': 'Arizona',
'-ca-': 'California',
'california': 'California',
'-co-': 'Colorado',
'colorado': 'Colorado',
'-fl-': 'Florida',
'florida': 'Florida',
'-il-': 'Illinois',
'illinois': 'Illinois',
'-ma-': 'Massachusetts',
'-mi-': 'Michigan',
'-nv-': 'Nevada',
'-nj-': 'New Jersey',
'-ny-': 'New York',
'-or-': 'Oregon',
'-pa-': 'Pennsylvania',
'-wa-': 'Washington',
};
const lowerUrl = url.toLowerCase();
for (const [pattern, stateName] of Object.entries(stateMap)) {
if (lowerUrl.includes(pattern)) {
return stateName;
}
}
// Default to Arizona
return 'Arizona';
}