The job_run_logs table tracks scheduled job orchestration, not individual worker jobs. Worker info (worker_id, worker_hostname) belongs on dispensary_crawl_jobs, not job_run_logs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
264 lines
12 KiB
JavaScript
264 lines
12 KiB
JavaScript
"use strict";
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.setAgeGateCookies = setAgeGateCookies;
|
|
exports.hasAgeGate = hasAgeGate;
|
|
exports.bypassAgeGate = bypassAgeGate;
|
|
exports.detectStateFromUrl = detectStateFromUrl;
|
|
const logger_1 = require("../services/logger");
|
|
/**
|
|
* Sets age gate bypass cookies before navigating to a page
|
|
* This should be called BEFORE page.goto() to prevent the age gate from showing
|
|
*
|
|
* @param page - Puppeteer page object
|
|
* @param url - URL to extract domain from
|
|
* @param state - State to set in cookie
|
|
*/
|
|
async function setAgeGateCookies(page, url, state = 'Arizona') {
|
|
try {
|
|
const urlObj = new URL(url);
|
|
const domain = urlObj.hostname.replace('www.', '');
|
|
// Set cookies that bypass age gates
|
|
await page.setCookie({
|
|
name: 'age_gate_passed',
|
|
value: 'true',
|
|
domain: `.${domain}`,
|
|
path: '/',
|
|
expires: Date.now() / 1000 + 365 * 24 * 60 * 60, // 1 year
|
|
httpOnly: false,
|
|
secure: false,
|
|
sameSite: 'Lax'
|
|
}, {
|
|
name: 'selected_state',
|
|
value: state,
|
|
domain: `.${domain}`,
|
|
path: '/',
|
|
expires: Date.now() / 1000 + 365 * 24 * 60 * 60, // 1 year
|
|
httpOnly: false,
|
|
secure: false,
|
|
sameSite: 'Lax'
|
|
}, {
|
|
name: 'age_verified',
|
|
value: 'true',
|
|
domain: `.${domain}`,
|
|
path: '/',
|
|
expires: Date.now() / 1000 + 365 * 24 * 60 * 60,
|
|
httpOnly: false,
|
|
secure: false,
|
|
sameSite: 'Lax'
|
|
});
|
|
logger_1.logger.info('age-gate', `Set age gate bypass cookies for ${domain} (state: ${state})`);
|
|
}
|
|
catch (err) {
|
|
logger_1.logger.warn('age-gate', `Failed to set age gate cookies: ${err}`);
|
|
}
|
|
}
|
|
/**
|
|
* Detects if a page has an age verification gate
|
|
*/
|
|
async function hasAgeGate(page) {
|
|
return await page.evaluate(() => {
|
|
const bodyText = document.body.textContent || '';
|
|
const hasAgeVerification = bodyText.includes('age verification') ||
|
|
bodyText.includes('Please select your state') ||
|
|
bodyText.includes('are you 21') ||
|
|
bodyText.includes('are you 18') ||
|
|
bodyText.includes('Enter your date of birth') ||
|
|
bodyText.toLowerCase().includes('verify');
|
|
return hasAgeVerification;
|
|
});
|
|
}
|
|
/**
|
|
* Attempts to bypass an age gate by selecting the appropriate state
|
|
* Works with multiple age gate patterns used by cannabis dispensaries
|
|
*
|
|
* @param page - Puppeteer page object
|
|
* @param state - State to select (e.g., 'Arizona', 'California'). Defaults to 'Arizona'
|
|
* @returns Promise<boolean> - true if bypass was attempted, false if no age gate found
|
|
*/
|
|
async function bypassAgeGate(page, state = 'Arizona', useSavedCookies = true) {
|
|
try {
|
|
const hasGate = await hasAgeGate(page);
|
|
if (!hasGate) {
|
|
logger_1.logger.info('age-gate', 'No age gate detected');
|
|
return false;
|
|
}
|
|
logger_1.logger.info('age-gate', `Age gate detected - attempting to bypass with state: ${state}...`);
|
|
// Wait a bit for React components to fully render
|
|
await page.waitForTimeout(2000);
|
|
// Try Method 0: Custom dropdown button (shadcn/radix style - Curaleaf)
|
|
let customDropdownWorked = false;
|
|
try {
|
|
// Click button to open dropdown
|
|
const dropdownButton = await page.$('button#state, button[id="state"]');
|
|
if (dropdownButton) {
|
|
logger_1.logger.info('age-gate', 'Found state dropdown button, clicking...');
|
|
await dropdownButton.click();
|
|
await page.waitForTimeout(800);
|
|
// Click the state option and trigger React events
|
|
const stateClicked = await page.evaluate((selectedState) => {
|
|
const options = Array.from(document.querySelectorAll('[role="option"]'));
|
|
const stateOption = options.find(el => el.textContent?.toLowerCase() === selectedState.toLowerCase());
|
|
if (stateOption instanceof HTMLElement) {
|
|
// Trigger multiple events that React might be listening for
|
|
stateOption.dispatchEvent(new MouseEvent('mousedown', { bubbles: true }));
|
|
stateOption.dispatchEvent(new MouseEvent('mouseup', { bubbles: true }));
|
|
stateOption.click();
|
|
stateOption.dispatchEvent(new MouseEvent('click', { bubbles: true }));
|
|
stateOption.dispatchEvent(new Event('change', { bubbles: true }));
|
|
stateOption.dispatchEvent(new Event('input', { bubbles: true }));
|
|
return true;
|
|
}
|
|
return false;
|
|
}, state);
|
|
if (stateClicked) {
|
|
logger_1.logger.info('age-gate', `Clicked ${state} option with React events`);
|
|
await page.waitForTimeout(1000);
|
|
// Look for and click any submit/continue button that appeared
|
|
const submitClicked = await page.evaluate(() => {
|
|
const buttons = Array.from(document.querySelectorAll('button, [role="button"], a'));
|
|
const submitBtn = buttons.find(el => {
|
|
const text = el.textContent?.toLowerCase() || '';
|
|
const ariaLabel = el.getAttribute('aria-label')?.toLowerCase() || '';
|
|
return text.includes('continue') || text.includes('submit') ||
|
|
text.includes('enter') || text.includes('confirm') ||
|
|
ariaLabel.includes('continue') || ariaLabel.includes('submit');
|
|
});
|
|
if (submitBtn instanceof HTMLElement && submitBtn.offsetParent !== null) {
|
|
submitBtn.click();
|
|
return true;
|
|
}
|
|
return false;
|
|
});
|
|
if (submitClicked) {
|
|
logger_1.logger.info('age-gate', `Found and clicked submit button`);
|
|
}
|
|
customDropdownWorked = true;
|
|
}
|
|
}
|
|
}
|
|
catch (e) {
|
|
logger_1.logger.warn('age-gate', `Dropdown method failed: ${e}`);
|
|
}
|
|
// Try Method 1: Dropdown select
|
|
const selectFound = await page.evaluate((selectedState) => {
|
|
const selects = Array.from(document.querySelectorAll('select'));
|
|
for (const select of selects) {
|
|
const options = Array.from(select.options);
|
|
const stateOption = options.find(opt => opt.text.toLowerCase().includes(selectedState.toLowerCase()) ||
|
|
opt.value.toLowerCase().includes(selectedState.toLowerCase()));
|
|
if (stateOption) {
|
|
select.value = stateOption.value;
|
|
select.dispatchEvent(new Event('change', { bubbles: true }));
|
|
select.dispatchEvent(new Event('input', { bubbles: true }));
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}, state);
|
|
// Try Method 2: State button/card (click state, then click confirm)
|
|
let stateClicked = false;
|
|
if (!selectFound) {
|
|
stateClicked = await page.evaluate((selectedState) => {
|
|
const allElements = Array.from(document.querySelectorAll('button, a, div, span, [role="button"], [class*="state"], [class*="State"], [class*="card"], [class*="option"]'));
|
|
const stateButton = allElements.find(el => el.textContent?.toLowerCase().includes(selectedState.toLowerCase()));
|
|
if (stateButton instanceof HTMLElement) {
|
|
stateButton.click();
|
|
return true;
|
|
}
|
|
return false;
|
|
}, state);
|
|
if (stateClicked) {
|
|
// Wait for confirm button to appear and click it
|
|
await page.waitForTimeout(1000);
|
|
await page.evaluate(() => {
|
|
const confirmBtns = Array.from(document.querySelectorAll('button, a, [role="button"]'));
|
|
const confirmBtn = confirmBtns.find(el => {
|
|
const text = el.textContent?.toLowerCase() || '';
|
|
return text.includes('enter') || text.includes('continue') || text.includes('yes') || text.includes('confirm');
|
|
});
|
|
if (confirmBtn instanceof HTMLElement) {
|
|
confirmBtn.click();
|
|
}
|
|
});
|
|
}
|
|
}
|
|
// Try Method 3: Direct "Yes" or age confirmation button
|
|
const yesClicked = await page.evaluate(() => {
|
|
const confirmButtons = Array.from(document.querySelectorAll('button, a, [role="button"]'));
|
|
const yesButton = confirmButtons.find(el => {
|
|
const text = el.textContent?.toLowerCase() || '';
|
|
return text.includes('yes') ||
|
|
text.includes('i am 21') ||
|
|
text.includes('i am 18') ||
|
|
text.includes('enter the site') ||
|
|
text.includes('enter') ||
|
|
text.includes('continue');
|
|
});
|
|
if (yesButton instanceof HTMLElement) {
|
|
yesButton.click();
|
|
return true;
|
|
}
|
|
return false;
|
|
});
|
|
const bypassed = customDropdownWorked || selectFound || stateClicked || yesClicked;
|
|
if (bypassed) {
|
|
// Wait for navigation to complete after clicking age gate button
|
|
logger_1.logger.info('age-gate', `Waiting for navigation after age gate bypass...`);
|
|
try {
|
|
await page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: 15000 });
|
|
}
|
|
catch (e) {
|
|
// Navigation might not trigger, that's ok - wait a bit anyway
|
|
await page.waitForTimeout(3000);
|
|
}
|
|
// Give the page extra time to load content
|
|
await page.waitForTimeout(3000);
|
|
// Verify we actually bypassed by checking the URL
|
|
const finalUrl = page.url();
|
|
if (finalUrl.includes('/age-gate')) {
|
|
logger_1.logger.error('age-gate', `❌ Age gate bypass failed - still at age gate URL: ${finalUrl}`);
|
|
return false;
|
|
}
|
|
logger_1.logger.info('age-gate', `✅ Age gate bypass completed - now at: ${finalUrl}`);
|
|
return true;
|
|
}
|
|
else {
|
|
logger_1.logger.warn('age-gate', `Could not find ${state} option or confirmation button in age gate`);
|
|
return false;
|
|
}
|
|
}
|
|
catch (err) {
|
|
logger_1.logger.error('age-gate', `Error bypassing age gate: ${err}`);
|
|
return false;
|
|
}
|
|
}
|
|
/**
|
|
* Helper to detect the state from a store URL
|
|
* @param url - Store URL
|
|
* @returns State name (e.g., 'Arizona', 'California')
|
|
*/
|
|
function detectStateFromUrl(url) {
|
|
const stateMap = {
|
|
'-az-': 'Arizona',
|
|
'-ca-': 'California',
|
|
'-co-': 'Colorado',
|
|
'-fl-': 'Florida',
|
|
'-il-': 'Illinois',
|
|
'-ma-': 'Massachusetts',
|
|
'-mi-': 'Michigan',
|
|
'-nv-': 'Nevada',
|
|
'-nj-': 'New Jersey',
|
|
'-ny-': 'New York',
|
|
'-or-': 'Oregon',
|
|
'-pa-': 'Pennsylvania',
|
|
'-wa-': 'Washington',
|
|
};
|
|
for (const [pattern, stateName] of Object.entries(stateMap)) {
|
|
if (url.toLowerCase().includes(pattern)) {
|
|
return stateName;
|
|
}
|
|
}
|
|
// Default to Arizona if state not detected
|
|
return 'Arizona';
|
|
}
|