The job_run_logs table tracks scheduled job orchestration, not individual worker jobs. Worker info (worker_id, worker_hostname) belongs on dispensary_crawl_jobs, not job_run_logs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
324 lines
12 KiB
JavaScript
324 lines
12 KiB
JavaScript
"use strict";
|
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
};
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.isBotDetectionError = isBotDetectionError;
|
|
exports.putProxyInTimeout = putProxyInTimeout;
|
|
exports.isProxyInTimeout = isProxyInTimeout;
|
|
exports.getActiveProxy = getActiveProxy;
|
|
exports.testProxy = testProxy;
|
|
exports.saveProxyTestResult = saveProxyTestResult;
|
|
exports.testAllProxies = testAllProxies;
|
|
exports.addProxy = addProxy;
|
|
exports.addProxiesFromList = addProxiesFromList;
|
|
exports.moveProxyToFailed = moveProxyToFailed;
|
|
exports.incrementProxyFailure = incrementProxyFailure;
|
|
const axios_1 = __importDefault(require("axios"));
|
|
const socks_proxy_agent_1 = require("socks-proxy-agent");
|
|
const https_proxy_agent_1 = require("https-proxy-agent");
|
|
const migrate_1 = require("../db/migrate");
|
|
// In-memory proxy timeout tracking
|
|
// Maps proxy ID to timestamp when timeout expires
|
|
const proxyTimeouts = new Map();
|
|
const PROXY_TIMEOUT_MS = 35000; // 35 seconds timeout for bot-detected proxies
|
|
// Check if error message indicates bot detection
|
|
function isBotDetectionError(errorMsg) {
|
|
const botPatterns = [
|
|
/bot detection/i,
|
|
/captcha/i,
|
|
/challenge/i,
|
|
/cloudflare/i,
|
|
/access denied/i,
|
|
/rate limit/i,
|
|
/too many requests/i,
|
|
/temporarily blocked/i,
|
|
/suspicious activity/i,
|
|
];
|
|
return botPatterns.some(pattern => pattern.test(errorMsg));
|
|
}
|
|
// Put proxy in timeout (bot detection cooldown)
|
|
function putProxyInTimeout(proxyId, reason) {
|
|
const timeoutUntil = Date.now() + PROXY_TIMEOUT_MS;
|
|
proxyTimeouts.set(proxyId, timeoutUntil);
|
|
console.log(`🚫 Proxy ${proxyId} in timeout for ${PROXY_TIMEOUT_MS / 1000}s: ${reason}`);
|
|
}
|
|
// Check if proxy is currently in timeout
|
|
function isProxyInTimeout(proxyId) {
|
|
const timeoutUntil = proxyTimeouts.get(proxyId);
|
|
if (!timeoutUntil)
|
|
return false;
|
|
if (Date.now() >= timeoutUntil) {
|
|
// Timeout expired, remove it
|
|
proxyTimeouts.delete(proxyId);
|
|
console.log(`✅ Proxy ${proxyId} timeout expired, back in rotation`);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
// Get active proxy that's not in timeout
|
|
async function getActiveProxy() {
|
|
const result = await migrate_1.pool.query(`
|
|
SELECT id, host, port, protocol, username, password
|
|
FROM proxies
|
|
WHERE active = true
|
|
ORDER BY RANDOM()
|
|
`);
|
|
// Filter out proxies in timeout
|
|
for (const proxy of result.rows) {
|
|
if (!isProxyInTimeout(proxy.id)) {
|
|
return proxy;
|
|
}
|
|
}
|
|
// All proxies are in timeout, wait for first one to expire
|
|
if (proxyTimeouts.size > 0) {
|
|
const nextAvailable = Math.min(...Array.from(proxyTimeouts.values()));
|
|
const waitTime = Math.max(0, nextAvailable - Date.now());
|
|
console.log(`⏳ All proxies in timeout, waiting ${Math.ceil(waitTime / 1000)}s for next available...`);
|
|
await new Promise(resolve => setTimeout(resolve, waitTime));
|
|
// Try again after waiting
|
|
return getActiveProxy();
|
|
}
|
|
console.log('⚠️ No active proxies available');
|
|
return null;
|
|
}
|
|
async function getSettings() {
|
|
const result = await migrate_1.pool.query(`
|
|
SELECT key, value FROM settings
|
|
WHERE key IN ('proxy_timeout_ms', 'proxy_test_url')
|
|
`);
|
|
const settings = {};
|
|
result.rows.forEach((row) => {
|
|
settings[row.key] = row.value;
|
|
});
|
|
return {
|
|
timeout: parseInt(settings.proxy_timeout_ms || '3000'),
|
|
testUrl: settings.proxy_test_url || 'https://httpbin.org/ip'
|
|
};
|
|
}
|
|
async function testProxy(host, port, protocol, username, password) {
|
|
try {
|
|
const { timeout, testUrl } = await getSettings();
|
|
const startTime = Date.now();
|
|
// Construct proxy URL
|
|
let proxyUrl;
|
|
if (username && password) {
|
|
proxyUrl = `${protocol}://${username}:${password}@${host}:${port}`;
|
|
}
|
|
else {
|
|
proxyUrl = `${protocol}://${host}:${port}`;
|
|
}
|
|
// Create appropriate agent based on protocol
|
|
let agent;
|
|
if (protocol === 'socks5' || protocol === 'socks') {
|
|
agent = new socks_proxy_agent_1.SocksProxyAgent(proxyUrl);
|
|
}
|
|
else if (protocol === 'http' || protocol === 'https') {
|
|
agent = new https_proxy_agent_1.HttpsProxyAgent(proxyUrl);
|
|
}
|
|
else {
|
|
return {
|
|
success: false,
|
|
error: `Unsupported protocol: ${protocol}`
|
|
};
|
|
}
|
|
// Make test request
|
|
const response = await axios_1.default.get(testUrl, {
|
|
httpAgent: agent,
|
|
httpsAgent: agent,
|
|
timeout,
|
|
});
|
|
const responseTimeMs = Date.now() - startTime;
|
|
// Check anonymity - the test URL should return our IP
|
|
// If it returns the proxy's IP, we're anonymous
|
|
let isAnonymous = false;
|
|
if (response.data && response.data.origin) {
|
|
// If the returned IP is different from our actual IP, the proxy is working
|
|
// For simplicity, we'll consider it anonymous if we get a response
|
|
isAnonymous = true;
|
|
}
|
|
return {
|
|
success: true,
|
|
responseTimeMs,
|
|
isAnonymous
|
|
};
|
|
}
|
|
catch (error) {
|
|
return {
|
|
success: false,
|
|
error: error.message || 'Unknown error'
|
|
};
|
|
}
|
|
}
|
|
async function saveProxyTestResult(proxyId, result) {
|
|
await migrate_1.pool.query(`
|
|
UPDATE proxies
|
|
SET last_tested_at = CURRENT_TIMESTAMP,
|
|
test_result = $1,
|
|
response_time_ms = $2,
|
|
is_anonymous = $3,
|
|
active = $4,
|
|
updated_at = CURRENT_TIMESTAMP
|
|
WHERE id = $5
|
|
`, [
|
|
result.success ? 'success' : 'failed',
|
|
result.responseTimeMs || null,
|
|
result.isAnonymous || false,
|
|
result.success,
|
|
proxyId
|
|
]);
|
|
}
|
|
async function testAllProxies() {
|
|
console.log('🔍 Testing all proxies...');
|
|
const result = await migrate_1.pool.query(`
|
|
SELECT id, host, port, protocol, username, password
|
|
FROM proxies
|
|
`);
|
|
for (const proxy of result.rows) {
|
|
console.log(`Testing proxy: ${proxy.protocol}://${proxy.host}:${proxy.port}`);
|
|
const testResult = await testProxy(proxy.host, proxy.port, proxy.protocol, proxy.username, proxy.password);
|
|
await saveProxyTestResult(proxy.id, testResult);
|
|
if (testResult.success) {
|
|
console.log(`✅ Proxy OK (${testResult.responseTimeMs}ms, anonymous: ${testResult.isAnonymous})`);
|
|
}
|
|
else {
|
|
console.log(`❌ Proxy failed: ${testResult.error}`);
|
|
}
|
|
// Small delay between tests
|
|
await new Promise(resolve => setTimeout(resolve, 500));
|
|
}
|
|
console.log('✅ Proxy testing complete');
|
|
}
|
|
async function addProxy(host, port, protocol, username, password) {
|
|
// Test the proxy first
|
|
const testResult = await testProxy(host, port, protocol, username, password);
|
|
if (!testResult.success) {
|
|
throw new Error(`Proxy test failed: ${testResult.error}`);
|
|
}
|
|
// Insert into database
|
|
const result = await migrate_1.pool.query(`
|
|
INSERT INTO proxies (host, port, protocol, username, password, active, is_anonymous, test_result, response_time_ms, last_tested_at)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, CURRENT_TIMESTAMP)
|
|
RETURNING id
|
|
`, [
|
|
host,
|
|
port,
|
|
protocol,
|
|
username,
|
|
password,
|
|
testResult.success,
|
|
testResult.isAnonymous,
|
|
'success',
|
|
testResult.responseTimeMs
|
|
]);
|
|
return result.rows[0].id;
|
|
}
|
|
async function addProxiesFromList(proxies) {
|
|
let added = 0;
|
|
let failed = 0;
|
|
let duplicates = 0;
|
|
const errors = [];
|
|
console.log(`📥 Importing ${proxies.length} proxies without testing...`);
|
|
for (const proxy of proxies) {
|
|
try {
|
|
// Insert without testing first
|
|
await migrate_1.pool.query(`
|
|
INSERT INTO proxies (host, port, protocol, username, password, active)
|
|
VALUES ($1, $2, $3, $4, $5, false)
|
|
ON CONFLICT (host, port, protocol) DO NOTHING
|
|
`, [
|
|
proxy.host,
|
|
proxy.port,
|
|
proxy.protocol,
|
|
proxy.username,
|
|
proxy.password
|
|
]);
|
|
// Check if it was actually inserted
|
|
const result = await migrate_1.pool.query(`
|
|
SELECT id FROM proxies
|
|
WHERE host = $1 AND port = $2 AND protocol = $3
|
|
`, [proxy.host, proxy.port, proxy.protocol]);
|
|
if (result.rows.length > 0) {
|
|
// Check if it was just inserted (no last_tested_at means new)
|
|
const checkResult = await migrate_1.pool.query(`
|
|
SELECT last_tested_at FROM proxies
|
|
WHERE host = $1 AND port = $2 AND protocol = $3
|
|
`, [proxy.host, proxy.port, proxy.protocol]);
|
|
if (checkResult.rows[0].last_tested_at === null) {
|
|
added++;
|
|
if (added % 100 === 0) {
|
|
console.log(`📥 Imported ${added} proxies...`);
|
|
}
|
|
}
|
|
else {
|
|
duplicates++;
|
|
}
|
|
}
|
|
}
|
|
catch (error) {
|
|
failed++;
|
|
const errorMsg = `${proxy.host}:${proxy.port} - ${error.message}`;
|
|
errors.push(errorMsg);
|
|
console.log(`❌ Failed to add proxy: ${errorMsg}`);
|
|
}
|
|
}
|
|
console.log(`✅ Import complete: ${added} added, ${duplicates} duplicates, ${failed} failed`);
|
|
return { added, failed, duplicates, errors };
|
|
}
|
|
async function moveProxyToFailed(proxyId, errorMsg) {
|
|
// Get proxy details
|
|
const proxyResult = await migrate_1.pool.query(`
|
|
SELECT host, port, protocol, username, password, failure_count
|
|
FROM proxies
|
|
WHERE id = $1
|
|
`, [proxyId]);
|
|
if (proxyResult.rows.length === 0) {
|
|
return;
|
|
}
|
|
const proxy = proxyResult.rows[0];
|
|
// Insert into failed_proxies table
|
|
await migrate_1.pool.query(`
|
|
INSERT INTO failed_proxies (host, port, protocol, username, password, failure_count, last_error)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
|
ON CONFLICT (host, port, protocol)
|
|
DO UPDATE SET
|
|
failure_count = $6,
|
|
last_error = $7,
|
|
failed_at = CURRENT_TIMESTAMP
|
|
`, [
|
|
proxy.host,
|
|
proxy.port,
|
|
proxy.protocol,
|
|
proxy.username,
|
|
proxy.password,
|
|
proxy.failure_count,
|
|
errorMsg
|
|
]);
|
|
// Delete from active proxies
|
|
await migrate_1.pool.query(`DELETE FROM proxies WHERE id = $1`, [proxyId]);
|
|
console.log(`🔴 Moved proxy to failed: ${proxy.protocol}://${proxy.host}:${proxy.port} (${proxy.failure_count} failures)`);
|
|
}
|
|
async function incrementProxyFailure(proxyId, errorMsg) {
|
|
// Increment failure count
|
|
const result = await migrate_1.pool.query(`
|
|
UPDATE proxies
|
|
SET failure_count = failure_count + 1,
|
|
active = false,
|
|
updated_at = CURRENT_TIMESTAMP
|
|
WHERE id = $1
|
|
RETURNING failure_count, host, port, protocol
|
|
`, [proxyId]);
|
|
if (result.rows.length === 0) {
|
|
return false;
|
|
}
|
|
const proxy = result.rows[0];
|
|
const failureCount = proxy.failure_count;
|
|
console.log(`⚠️ Proxy failure #${failureCount}: ${proxy.protocol}://${proxy.host}:${proxy.port}`);
|
|
// If failed 3 times, move to failed table
|
|
if (failureCount >= 3) {
|
|
await moveProxyToFailed(proxyId, errorMsg);
|
|
return true; // Moved to failed
|
|
}
|
|
return false; // Still in active proxies
|
|
}
|