feat: Auto-retry failed proxies after cooldown period
- Add last_failed_at column to track failure time - Failed proxies auto-retry after 4 hours (configurable) - Proxies permanently failed after 10 failures - Add /retry-stats and /reenable-failed API endpoints - markProxySuccess() re-enables recovered proxies 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
81
backend/migrations/095_proxy_auto_retry.sql
Normal file
81
backend/migrations/095_proxy_auto_retry.sql
Normal file
@@ -0,0 +1,81 @@
|
||||
-- Migration: Auto-retry failed proxies after cooldown period
|
||||
-- Proxies that fail will be retried after a configurable interval
|
||||
|
||||
-- Add last_failed_at column to track when proxy last failed
|
||||
ALTER TABLE proxies ADD COLUMN IF NOT EXISTS last_failed_at TIMESTAMP;
|
||||
|
||||
-- Add retry settings
|
||||
INSERT INTO settings (key, value, description)
|
||||
VALUES
|
||||
('proxy_retry_interval_hours', '4', 'Hours to wait before retrying a failed proxy'),
|
||||
('proxy_max_failures_before_permanent', '10', 'Max failures before proxy is permanently disabled')
|
||||
ON CONFLICT (key) DO NOTHING;
|
||||
|
||||
-- Create function to get eligible proxies (active OR failed but past retry interval)
|
||||
CREATE OR REPLACE FUNCTION get_eligible_proxy_ids()
|
||||
RETURNS TABLE(proxy_id INT) AS $$
|
||||
DECLARE
|
||||
retry_hours INT;
|
||||
BEGIN
|
||||
-- Get retry interval from settings (default 4 hours)
|
||||
SELECT COALESCE(value::int, 4) INTO retry_hours
|
||||
FROM settings WHERE key = 'proxy_retry_interval_hours';
|
||||
|
||||
RETURN QUERY
|
||||
SELECT p.id
|
||||
FROM proxies p
|
||||
WHERE p.active = true
|
||||
OR (
|
||||
p.active = false
|
||||
AND p.last_failed_at IS NOT NULL
|
||||
AND p.last_failed_at < NOW() - (retry_hours || ' hours')::interval
|
||||
AND p.failure_count < 10 -- Don't retry if too many failures
|
||||
)
|
||||
ORDER BY
|
||||
p.active DESC, -- Prefer active proxies
|
||||
p.failure_count ASC, -- Then prefer proxies with fewer failures
|
||||
RANDOM();
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Create scheduled job to periodically re-enable proxies past their retry window
|
||||
-- This runs every hour and marks proxies as active if they're past retry interval
|
||||
CREATE OR REPLACE FUNCTION auto_reenable_proxies()
|
||||
RETURNS INT AS $$
|
||||
DECLARE
|
||||
retry_hours INT;
|
||||
max_failures INT;
|
||||
reenabled_count INT;
|
||||
BEGIN
|
||||
-- Get settings
|
||||
SELECT COALESCE(value::int, 4) INTO retry_hours
|
||||
FROM settings WHERE key = 'proxy_retry_interval_hours';
|
||||
|
||||
SELECT COALESCE(value::int, 10) INTO max_failures
|
||||
FROM settings WHERE key = 'proxy_max_failures_before_permanent';
|
||||
|
||||
-- Re-enable proxies that have cooled down
|
||||
UPDATE proxies
|
||||
SET active = true,
|
||||
updated_at = NOW()
|
||||
WHERE active = false
|
||||
AND last_failed_at IS NOT NULL
|
||||
AND last_failed_at < NOW() - (retry_hours || ' hours')::interval
|
||||
AND failure_count < max_failures;
|
||||
|
||||
GET DIAGNOSTICS reenabled_count = ROW_COUNT;
|
||||
|
||||
IF reenabled_count > 0 THEN
|
||||
RAISE NOTICE 'Auto-reenabled % proxies after % hour cooldown', reenabled_count, retry_hours;
|
||||
END IF;
|
||||
|
||||
RETURN reenabled_count;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Add index for efficient querying
|
||||
CREATE INDEX IF NOT EXISTS idx_proxies_retry
|
||||
ON proxies(active, last_failed_at, failure_count);
|
||||
|
||||
COMMENT ON COLUMN proxies.last_failed_at IS 'Timestamp of last failure - used for auto-retry logic';
|
||||
COMMENT ON FUNCTION auto_reenable_proxies() IS 'Call periodically to re-enable failed proxies that have cooled down';
|
||||
@@ -288,4 +288,56 @@ router.post('/update-locations', requireRole('superadmin', 'admin'), async (req,
|
||||
}
|
||||
});
|
||||
|
||||
// Get proxy retry stats
|
||||
router.get('/retry-stats', async (req, res) => {
|
||||
try {
|
||||
const stats = await pool.query(`
|
||||
SELECT
|
||||
COUNT(*) FILTER (WHERE active = true) as active_count,
|
||||
COUNT(*) FILTER (WHERE active = false) as inactive_count,
|
||||
COUNT(*) FILTER (WHERE active = false AND last_failed_at IS NOT NULL
|
||||
AND last_failed_at < NOW() - INTERVAL '4 hours' AND failure_count < 10) as ready_for_retry,
|
||||
COUNT(*) FILTER (WHERE failure_count >= 10) as permanently_failed
|
||||
FROM proxies
|
||||
`);
|
||||
|
||||
res.json(stats.rows[0]);
|
||||
} catch (error) {
|
||||
console.error('Error fetching retry stats:', error);
|
||||
res.status(500).json({ error: 'Failed to fetch retry stats' });
|
||||
}
|
||||
});
|
||||
|
||||
// Manually re-enable proxies that have passed their retry interval
|
||||
router.post('/reenable-failed', requireRole('superadmin', 'admin'), async (req, res) => {
|
||||
try {
|
||||
// Get retry interval from settings
|
||||
const settingsResult = await pool.query(`
|
||||
SELECT value::int as hours FROM settings WHERE key = 'proxy_retry_interval_hours'
|
||||
`);
|
||||
const retryHours = settingsResult.rows[0]?.hours || 4;
|
||||
|
||||
// Re-enable proxies
|
||||
const result = await pool.query(`
|
||||
UPDATE proxies
|
||||
SET active = true,
|
||||
updated_at = NOW()
|
||||
WHERE active = false
|
||||
AND last_failed_at IS NOT NULL
|
||||
AND last_failed_at < NOW() - ($1 || ' hours')::interval
|
||||
AND failure_count < 10
|
||||
RETURNING id
|
||||
`, [retryHours]);
|
||||
|
||||
res.json({
|
||||
message: `Re-enabled ${result.rowCount} proxies`,
|
||||
count: result.rowCount,
|
||||
retryIntervalHours: retryHours
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error re-enabling proxies:', error);
|
||||
res.status(500).json({ error: 'Failed to re-enable proxies' });
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
|
||||
@@ -54,18 +54,57 @@ export function isProxyInTimeout(proxyId: number): boolean {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Get retry interval from settings (cached for 5 minutes)
|
||||
let cachedRetryHours: number | null = null;
|
||||
let retryHoursCacheTime = 0;
|
||||
const RETRY_CACHE_TTL = 5 * 60 * 1000; // 5 minutes
|
||||
|
||||
async function getRetryIntervalHours(): Promise<number> {
|
||||
const now = Date.now();
|
||||
if (cachedRetryHours !== null && now - retryHoursCacheTime < RETRY_CACHE_TTL) {
|
||||
return cachedRetryHours;
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await pool.query(`
|
||||
SELECT value::int FROM settings WHERE key = 'proxy_retry_interval_hours'
|
||||
`);
|
||||
cachedRetryHours = result.rows[0]?.value || 4;
|
||||
retryHoursCacheTime = now;
|
||||
} catch {
|
||||
cachedRetryHours = 4; // default
|
||||
}
|
||||
return cachedRetryHours as number;
|
||||
}
|
||||
|
||||
// Get active proxy that's not in timeout
|
||||
// Also includes failed proxies that have passed their retry interval
|
||||
export async function getActiveProxy(): Promise<{ id: number; host: string; port: number; protocol: string; username?: string; password?: string } | null> {
|
||||
const retryHours = await getRetryIntervalHours();
|
||||
|
||||
const result = await pool.query(`
|
||||
SELECT id, host, port, protocol, username, password
|
||||
SELECT id, host, port, protocol, username, password, active, failure_count
|
||||
FROM proxies
|
||||
WHERE active = true
|
||||
ORDER BY RANDOM()
|
||||
`);
|
||||
OR (
|
||||
active = false
|
||||
AND last_failed_at IS NOT NULL
|
||||
AND last_failed_at < NOW() - ($1 || ' hours')::interval
|
||||
AND failure_count < 10
|
||||
)
|
||||
ORDER BY
|
||||
active DESC,
|
||||
failure_count ASC,
|
||||
RANDOM()
|
||||
`, [retryHours]);
|
||||
|
||||
// Filter out proxies in timeout
|
||||
for (const proxy of result.rows) {
|
||||
if (!isProxyInTimeout(proxy.id)) {
|
||||
// If this is a retry of a failed proxy, log it
|
||||
if (!proxy.active) {
|
||||
console.log(`🔄 Retrying previously failed proxy ${proxy.id} (${proxy.failure_count} failures)`);
|
||||
}
|
||||
return proxy;
|
||||
}
|
||||
}
|
||||
@@ -341,11 +380,12 @@ export async function moveProxyToFailed(proxyId: number, errorMsg: string): Prom
|
||||
}
|
||||
|
||||
export async function incrementProxyFailure(proxyId: number, errorMsg: string): Promise<boolean> {
|
||||
// Increment failure count
|
||||
// Increment failure count and set last_failed_at
|
||||
const result = await pool.query(`
|
||||
UPDATE proxies
|
||||
SET failure_count = failure_count + 1,
|
||||
active = false,
|
||||
last_failed_at = CURRENT_TIMESTAMP,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
WHERE id = $1
|
||||
RETURNING failure_count, host, port, protocol
|
||||
@@ -358,13 +398,26 @@ export async function incrementProxyFailure(proxyId: number, errorMsg: string):
|
||||
const proxy = result.rows[0];
|
||||
const failureCount = proxy.failure_count;
|
||||
|
||||
console.log(`⚠️ Proxy failure #${failureCount}: ${proxy.protocol}://${proxy.host}:${proxy.port}`);
|
||||
console.log(`⚠️ Proxy failure #${failureCount}: ${proxy.protocol}://${proxy.host}:${proxy.port} (will retry after cooldown)`);
|
||||
|
||||
// If failed 3 times, move to failed table
|
||||
if (failureCount >= 3) {
|
||||
// If failed 10+ times, move to permanently failed table
|
||||
if (failureCount >= 10) {
|
||||
await moveProxyToFailed(proxyId, errorMsg);
|
||||
return true; // Moved to failed
|
||||
}
|
||||
|
||||
return false; // Still in active proxies
|
||||
return false; // Still in proxies table, will be retried after cooldown
|
||||
}
|
||||
|
||||
// Mark proxy as successful (re-enable if it was being retried)
|
||||
export async function markProxySuccess(proxyId: number): Promise<void> {
|
||||
await pool.query(`
|
||||
UPDATE proxies
|
||||
SET active = true,
|
||||
failure_count = GREATEST(0, failure_count - 1),
|
||||
success_count = success_count + 1,
|
||||
last_failed_at = NULL,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
WHERE id = $1
|
||||
`, [proxyId]);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user