Files
cannaiq/backend/dist/services/proxy.js
Kelly 66e07b2009 fix(monitor): remove non-existent worker columns from job_run_logs query
The job_run_logs table tracks scheduled job orchestration, not individual
worker jobs. Worker info (worker_id, worker_hostname) belongs on
dispensary_crawl_jobs, not job_run_logs.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-03 18:45:05 -07:00

324 lines
12 KiB
JavaScript

"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.isBotDetectionError = isBotDetectionError;
exports.putProxyInTimeout = putProxyInTimeout;
exports.isProxyInTimeout = isProxyInTimeout;
exports.getActiveProxy = getActiveProxy;
exports.testProxy = testProxy;
exports.saveProxyTestResult = saveProxyTestResult;
exports.testAllProxies = testAllProxies;
exports.addProxy = addProxy;
exports.addProxiesFromList = addProxiesFromList;
exports.moveProxyToFailed = moveProxyToFailed;
exports.incrementProxyFailure = incrementProxyFailure;
const axios_1 = __importDefault(require("axios"));
const socks_proxy_agent_1 = require("socks-proxy-agent");
const https_proxy_agent_1 = require("https-proxy-agent");
const migrate_1 = require("../db/migrate");
// In-memory proxy timeout tracking
// Maps proxy ID to timestamp when timeout expires
const proxyTimeouts = new Map();
const PROXY_TIMEOUT_MS = 35000; // 35 seconds timeout for bot-detected proxies
// Check if error message indicates bot detection
function isBotDetectionError(errorMsg) {
const botPatterns = [
/bot detection/i,
/captcha/i,
/challenge/i,
/cloudflare/i,
/access denied/i,
/rate limit/i,
/too many requests/i,
/temporarily blocked/i,
/suspicious activity/i,
];
return botPatterns.some(pattern => pattern.test(errorMsg));
}
// Put proxy in timeout (bot detection cooldown)
function putProxyInTimeout(proxyId, reason) {
const timeoutUntil = Date.now() + PROXY_TIMEOUT_MS;
proxyTimeouts.set(proxyId, timeoutUntil);
console.log(`🚫 Proxy ${proxyId} in timeout for ${PROXY_TIMEOUT_MS / 1000}s: ${reason}`);
}
// Check if proxy is currently in timeout
function isProxyInTimeout(proxyId) {
const timeoutUntil = proxyTimeouts.get(proxyId);
if (!timeoutUntil)
return false;
if (Date.now() >= timeoutUntil) {
// Timeout expired, remove it
proxyTimeouts.delete(proxyId);
console.log(`✅ Proxy ${proxyId} timeout expired, back in rotation`);
return false;
}
return true;
}
// Get active proxy that's not in timeout
async function getActiveProxy() {
const result = await migrate_1.pool.query(`
SELECT id, host, port, protocol, username, password
FROM proxies
WHERE active = true
ORDER BY RANDOM()
`);
// Filter out proxies in timeout
for (const proxy of result.rows) {
if (!isProxyInTimeout(proxy.id)) {
return proxy;
}
}
// All proxies are in timeout, wait for first one to expire
if (proxyTimeouts.size > 0) {
const nextAvailable = Math.min(...Array.from(proxyTimeouts.values()));
const waitTime = Math.max(0, nextAvailable - Date.now());
console.log(`⏳ All proxies in timeout, waiting ${Math.ceil(waitTime / 1000)}s for next available...`);
await new Promise(resolve => setTimeout(resolve, waitTime));
// Try again after waiting
return getActiveProxy();
}
console.log('⚠️ No active proxies available');
return null;
}
async function getSettings() {
const result = await migrate_1.pool.query(`
SELECT key, value FROM settings
WHERE key IN ('proxy_timeout_ms', 'proxy_test_url')
`);
const settings = {};
result.rows.forEach((row) => {
settings[row.key] = row.value;
});
return {
timeout: parseInt(settings.proxy_timeout_ms || '3000'),
testUrl: settings.proxy_test_url || 'https://httpbin.org/ip'
};
}
async function testProxy(host, port, protocol, username, password) {
try {
const { timeout, testUrl } = await getSettings();
const startTime = Date.now();
// Construct proxy URL
let proxyUrl;
if (username && password) {
proxyUrl = `${protocol}://${username}:${password}@${host}:${port}`;
}
else {
proxyUrl = `${protocol}://${host}:${port}`;
}
// Create appropriate agent based on protocol
let agent;
if (protocol === 'socks5' || protocol === 'socks') {
agent = new socks_proxy_agent_1.SocksProxyAgent(proxyUrl);
}
else if (protocol === 'http' || protocol === 'https') {
agent = new https_proxy_agent_1.HttpsProxyAgent(proxyUrl);
}
else {
return {
success: false,
error: `Unsupported protocol: ${protocol}`
};
}
// Make test request
const response = await axios_1.default.get(testUrl, {
httpAgent: agent,
httpsAgent: agent,
timeout,
});
const responseTimeMs = Date.now() - startTime;
// Check anonymity - the test URL should return our IP
// If it returns the proxy's IP, we're anonymous
let isAnonymous = false;
if (response.data && response.data.origin) {
// If the returned IP is different from our actual IP, the proxy is working
// For simplicity, we'll consider it anonymous if we get a response
isAnonymous = true;
}
return {
success: true,
responseTimeMs,
isAnonymous
};
}
catch (error) {
return {
success: false,
error: error.message || 'Unknown error'
};
}
}
async function saveProxyTestResult(proxyId, result) {
await migrate_1.pool.query(`
UPDATE proxies
SET last_tested_at = CURRENT_TIMESTAMP,
test_result = $1,
response_time_ms = $2,
is_anonymous = $3,
active = $4,
updated_at = CURRENT_TIMESTAMP
WHERE id = $5
`, [
result.success ? 'success' : 'failed',
result.responseTimeMs || null,
result.isAnonymous || false,
result.success,
proxyId
]);
}
async function testAllProxies() {
console.log('🔍 Testing all proxies...');
const result = await migrate_1.pool.query(`
SELECT id, host, port, protocol, username, password
FROM proxies
`);
for (const proxy of result.rows) {
console.log(`Testing proxy: ${proxy.protocol}://${proxy.host}:${proxy.port}`);
const testResult = await testProxy(proxy.host, proxy.port, proxy.protocol, proxy.username, proxy.password);
await saveProxyTestResult(proxy.id, testResult);
if (testResult.success) {
console.log(`✅ Proxy OK (${testResult.responseTimeMs}ms, anonymous: ${testResult.isAnonymous})`);
}
else {
console.log(`❌ Proxy failed: ${testResult.error}`);
}
// Small delay between tests
await new Promise(resolve => setTimeout(resolve, 500));
}
console.log('✅ Proxy testing complete');
}
async function addProxy(host, port, protocol, username, password) {
// Test the proxy first
const testResult = await testProxy(host, port, protocol, username, password);
if (!testResult.success) {
throw new Error(`Proxy test failed: ${testResult.error}`);
}
// Insert into database
const result = await migrate_1.pool.query(`
INSERT INTO proxies (host, port, protocol, username, password, active, is_anonymous, test_result, response_time_ms, last_tested_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, CURRENT_TIMESTAMP)
RETURNING id
`, [
host,
port,
protocol,
username,
password,
testResult.success,
testResult.isAnonymous,
'success',
testResult.responseTimeMs
]);
return result.rows[0].id;
}
async function addProxiesFromList(proxies) {
let added = 0;
let failed = 0;
let duplicates = 0;
const errors = [];
console.log(`📥 Importing ${proxies.length} proxies without testing...`);
for (const proxy of proxies) {
try {
// Insert without testing first
await migrate_1.pool.query(`
INSERT INTO proxies (host, port, protocol, username, password, active)
VALUES ($1, $2, $3, $4, $5, false)
ON CONFLICT (host, port, protocol) DO NOTHING
`, [
proxy.host,
proxy.port,
proxy.protocol,
proxy.username,
proxy.password
]);
// Check if it was actually inserted
const result = await migrate_1.pool.query(`
SELECT id FROM proxies
WHERE host = $1 AND port = $2 AND protocol = $3
`, [proxy.host, proxy.port, proxy.protocol]);
if (result.rows.length > 0) {
// Check if it was just inserted (no last_tested_at means new)
const checkResult = await migrate_1.pool.query(`
SELECT last_tested_at FROM proxies
WHERE host = $1 AND port = $2 AND protocol = $3
`, [proxy.host, proxy.port, proxy.protocol]);
if (checkResult.rows[0].last_tested_at === null) {
added++;
if (added % 100 === 0) {
console.log(`📥 Imported ${added} proxies...`);
}
}
else {
duplicates++;
}
}
}
catch (error) {
failed++;
const errorMsg = `${proxy.host}:${proxy.port} - ${error.message}`;
errors.push(errorMsg);
console.log(`❌ Failed to add proxy: ${errorMsg}`);
}
}
console.log(`✅ Import complete: ${added} added, ${duplicates} duplicates, ${failed} failed`);
return { added, failed, duplicates, errors };
}
async function moveProxyToFailed(proxyId, errorMsg) {
// Get proxy details
const proxyResult = await migrate_1.pool.query(`
SELECT host, port, protocol, username, password, failure_count
FROM proxies
WHERE id = $1
`, [proxyId]);
if (proxyResult.rows.length === 0) {
return;
}
const proxy = proxyResult.rows[0];
// Insert into failed_proxies table
await migrate_1.pool.query(`
INSERT INTO failed_proxies (host, port, protocol, username, password, failure_count, last_error)
VALUES ($1, $2, $3, $4, $5, $6, $7)
ON CONFLICT (host, port, protocol)
DO UPDATE SET
failure_count = $6,
last_error = $7,
failed_at = CURRENT_TIMESTAMP
`, [
proxy.host,
proxy.port,
proxy.protocol,
proxy.username,
proxy.password,
proxy.failure_count,
errorMsg
]);
// Delete from active proxies
await migrate_1.pool.query(`DELETE FROM proxies WHERE id = $1`, [proxyId]);
console.log(`🔴 Moved proxy to failed: ${proxy.protocol}://${proxy.host}:${proxy.port} (${proxy.failure_count} failures)`);
}
async function incrementProxyFailure(proxyId, errorMsg) {
// Increment failure count
const result = await migrate_1.pool.query(`
UPDATE proxies
SET failure_count = failure_count + 1,
active = false,
updated_at = CURRENT_TIMESTAMP
WHERE id = $1
RETURNING failure_count, host, port, protocol
`, [proxyId]);
if (result.rows.length === 0) {
return false;
}
const proxy = result.rows[0];
const failureCount = proxy.failure_count;
console.log(`⚠️ Proxy failure #${failureCount}: ${proxy.protocol}://${proxy.host}:${proxy.port}`);
// If failed 3 times, move to failed table
if (failureCount >= 3) {
await moveProxyToFailed(proxyId, errorMsg);
return true; // Moved to failed
}
return false; // Still in active proxies
}