The job_run_logs table tracks scheduled job orchestration, not individual worker jobs. Worker info (worker_id, worker_hostname) belongs on dispensary_crawl_jobs, not job_run_logs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
265 lines
9.4 KiB
JavaScript
265 lines
9.4 KiB
JavaScript
"use strict";
|
|
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
if (k2 === undefined) k2 = k;
|
|
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
}
|
|
Object.defineProperty(o, k2, desc);
|
|
}) : (function(o, m, k, k2) {
|
|
if (k2 === undefined) k2 = k;
|
|
o[k2] = m[k];
|
|
}));
|
|
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
}) : function(o, v) {
|
|
o["default"] = v;
|
|
});
|
|
var __importStar = (this && this.__importStar) || (function () {
|
|
var ownKeys = function(o) {
|
|
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
var ar = [];
|
|
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
return ar;
|
|
};
|
|
return ownKeys(o);
|
|
};
|
|
return function (mod) {
|
|
if (mod && mod.__esModule) return mod;
|
|
var result = {};
|
|
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
__setModuleDefault(result, mod);
|
|
return result;
|
|
};
|
|
})();
|
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
};
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.createStealthBrowser = createStealthBrowser;
|
|
exports.createStealthContext = createStealthContext;
|
|
exports.randomDelay = randomDelay;
|
|
exports.humanMouseMove = humanMouseMove;
|
|
exports.humanScroll = humanScroll;
|
|
exports.humanType = humanType;
|
|
exports.simulateHumanBehavior = simulateHumanBehavior;
|
|
exports.waitForPageLoad = waitForPageLoad;
|
|
exports.isCloudflareChallenge = isCloudflareChallenge;
|
|
exports.waitForCloudflareChallenge = waitForCloudflareChallenge;
|
|
exports.saveCookies = saveCookies;
|
|
exports.loadCookies = loadCookies;
|
|
const playwright_extra_1 = require("playwright-extra");
|
|
const puppeteer_extra_plugin_stealth_1 = __importDefault(require("puppeteer-extra-plugin-stealth"));
|
|
// Add stealth plugin
|
|
playwright_extra_1.chromium.use((0, puppeteer_extra_plugin_stealth_1.default)());
|
|
/**
|
|
* Create a stealth browser instance with anti-detection measures
|
|
*/
|
|
async function createStealthBrowser(options = {}) {
|
|
const launchOptions = {
|
|
headless: options.headless !== false,
|
|
args: [
|
|
'--disable-blink-features=AutomationControlled',
|
|
'--disable-features=IsolateOrigins,site-per-process',
|
|
'--disable-web-security',
|
|
'--disable-features=VizDisplayCompositor',
|
|
'--no-sandbox',
|
|
'--disable-setuid-sandbox',
|
|
'--disable-dev-shm-usage',
|
|
'--disable-accelerated-2d-canvas',
|
|
'--no-first-run',
|
|
'--no-zygote',
|
|
'--disable-gpu',
|
|
],
|
|
};
|
|
if (options.proxy) {
|
|
launchOptions.proxy = options.proxy;
|
|
}
|
|
const browser = await playwright_extra_1.chromium.launch(launchOptions);
|
|
return browser;
|
|
}
|
|
/**
|
|
* Create a stealth context with realistic browser fingerprint
|
|
*/
|
|
async function createStealthContext(browser, options = {}) {
|
|
const userAgent = options.userAgent ||
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
|
const context = await browser.newContext({
|
|
userAgent,
|
|
viewport: { width: 1920, height: 1080 },
|
|
locale: 'en-US',
|
|
timezoneId: 'America/Phoenix',
|
|
permissions: ['geolocation'],
|
|
geolocation: { latitude: 33.4484, longitude: -112.074 }, // Phoenix, AZ
|
|
colorScheme: 'light',
|
|
deviceScaleFactor: 1,
|
|
hasTouch: false,
|
|
isMobile: false,
|
|
javaScriptEnabled: true,
|
|
extraHTTPHeaders: {
|
|
'Accept-Language': 'en-US,en;q=0.9',
|
|
'Accept-Encoding': 'gzip, deflate, br',
|
|
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
|
'Sec-Fetch-Site': 'none',
|
|
'Sec-Fetch-Mode': 'navigate',
|
|
'Sec-Fetch-User': '?1',
|
|
'Sec-Fetch-Dest': 'document',
|
|
'Upgrade-Insecure-Requests': '1',
|
|
},
|
|
});
|
|
// Set age verification cookies for Dutchie
|
|
await context.addCookies([
|
|
{
|
|
name: 'age_verified',
|
|
value: 'true',
|
|
domain: '.dutchie.com',
|
|
path: '/',
|
|
expires: Math.floor(Date.now() / 1000) + 86400 * 30, // 30 days
|
|
},
|
|
{
|
|
name: 'initial_location',
|
|
value: JSON.stringify({ state: options.state || 'Arizona' }),
|
|
domain: '.dutchie.com',
|
|
path: '/',
|
|
expires: Math.floor(Date.now() / 1000) + 86400 * 30,
|
|
},
|
|
]);
|
|
return context;
|
|
}
|
|
/**
|
|
* Random delay between min and max milliseconds
|
|
*/
|
|
function randomDelay(min, max) {
|
|
const delay = Math.floor(Math.random() * (max - min + 1)) + min;
|
|
return new Promise((resolve) => setTimeout(resolve, delay));
|
|
}
|
|
/**
|
|
* Simulate human-like mouse movement
|
|
*/
|
|
async function humanMouseMove(page, x, y) {
|
|
const steps = 20;
|
|
const currentPos = await page.evaluate(() => ({ x: 0, y: 0 }));
|
|
for (let i = 0; i <= steps; i++) {
|
|
const progress = i / steps;
|
|
const easeProgress = easeInOutQuad(progress);
|
|
const nextX = currentPos.x + (x - currentPos.x) * easeProgress;
|
|
const nextY = currentPos.y + (y - currentPos.y) * easeProgress;
|
|
await page.mouse.move(nextX, nextY);
|
|
await randomDelay(5, 15);
|
|
}
|
|
}
|
|
/**
|
|
* Easing function for smooth mouse movement
|
|
*/
|
|
function easeInOutQuad(t) {
|
|
return t < 0.5 ? 2 * t * t : -1 + (4 - 2 * t) * t;
|
|
}
|
|
/**
|
|
* Simulate human-like scrolling
|
|
*/
|
|
async function humanScroll(page, scrollAmount = 500) {
|
|
const scrollSteps = 10;
|
|
const stepSize = scrollAmount / scrollSteps;
|
|
for (let i = 0; i < scrollSteps; i++) {
|
|
await page.mouse.wheel(0, stepSize);
|
|
await randomDelay(50, 150);
|
|
}
|
|
}
|
|
/**
|
|
* Simulate human-like typing
|
|
*/
|
|
async function humanType(page, selector, text) {
|
|
await page.click(selector);
|
|
await randomDelay(100, 300);
|
|
for (const char of text) {
|
|
await page.keyboard.type(char);
|
|
await randomDelay(50, 150);
|
|
}
|
|
}
|
|
/**
|
|
* Random realistic behavior before interacting with page
|
|
*/
|
|
async function simulateHumanBehavior(page) {
|
|
// Random small mouse movements
|
|
for (let i = 0; i < 3; i++) {
|
|
const x = Math.random() * 500 + 100;
|
|
const y = Math.random() * 300 + 100;
|
|
await humanMouseMove(page, x, y);
|
|
await randomDelay(200, 500);
|
|
}
|
|
// Small scroll
|
|
await humanScroll(page, 100);
|
|
await randomDelay(300, 700);
|
|
}
|
|
/**
|
|
* Wait for page to be fully loaded with human-like delay
|
|
*/
|
|
async function waitForPageLoad(page, timeout = 60000) {
|
|
try {
|
|
await page.waitForLoadState('networkidle', { timeout });
|
|
await randomDelay(500, 1500); // Random delay after load
|
|
}
|
|
catch (error) {
|
|
// If networkidle times out, try domcontentloaded as fallback
|
|
console.log('⚠️ networkidle timeout, waiting for domcontentloaded...');
|
|
await page.waitForLoadState('domcontentloaded', { timeout: 30000 });
|
|
await randomDelay(1000, 2000);
|
|
}
|
|
}
|
|
/**
|
|
* Check if we're on a Cloudflare challenge page
|
|
*/
|
|
async function isCloudflareChallenge(page) {
|
|
const title = await page.title();
|
|
const content = await page.content();
|
|
return (title.includes('Cloudflare') ||
|
|
title.includes('Just a moment') ||
|
|
title.includes('Attention Required') ||
|
|
content.includes('challenge-platform') ||
|
|
content.includes('cf-challenge') ||
|
|
content.includes('Checking your browser'));
|
|
}
|
|
/**
|
|
* Wait for Cloudflare challenge to complete
|
|
*/
|
|
async function waitForCloudflareChallenge(page, maxWaitMs = 60000) {
|
|
const startTime = Date.now();
|
|
let attempts = 0;
|
|
while (Date.now() - startTime < maxWaitMs) {
|
|
attempts++;
|
|
if (!(await isCloudflareChallenge(page))) {
|
|
console.log(`✅ Cloudflare challenge passed after ${attempts} attempts (${Math.floor((Date.now() - startTime) / 1000)}s)`);
|
|
return true;
|
|
}
|
|
const remaining = Math.floor((maxWaitMs - (Date.now() - startTime)) / 1000);
|
|
console.log(`⏳ Waiting for Cloudflare challenge... (attempt ${attempts}, ${remaining}s remaining)`);
|
|
// Random delay between checks
|
|
await randomDelay(2000, 3000);
|
|
}
|
|
console.log('❌ Cloudflare challenge timeout - may need residential proxy or manual intervention');
|
|
return false;
|
|
}
|
|
/**
|
|
* Save session cookies to file
|
|
*/
|
|
async function saveCookies(context, filepath) {
|
|
const cookies = await context.cookies();
|
|
const fs = await Promise.resolve().then(() => __importStar(require('fs/promises')));
|
|
await fs.writeFile(filepath, JSON.stringify(cookies, null, 2));
|
|
}
|
|
/**
|
|
* Load session cookies from file
|
|
*/
|
|
async function loadCookies(context, filepath) {
|
|
try {
|
|
const fs = await Promise.resolve().then(() => __importStar(require('fs/promises')));
|
|
const cookiesString = await fs.readFile(filepath, 'utf-8');
|
|
const cookies = JSON.parse(cookiesString);
|
|
await context.addCookies(cookies);
|
|
return true;
|
|
}
|
|
catch (error) {
|
|
return false;
|
|
}
|
|
}
|