Files
cannaiq/backend/populate-proxy-locations.ts
2025-11-28 19:45:44 -07:00

156 lines
4.4 KiB
TypeScript

import { pool } from './src/db/migrate';
import { logger } from './src/services/logger';
interface GeoLocation {
status: string;
country: string;
countryCode: string;
region: string;
regionName: string;
city: string;
lat: number;
lon: number;
query: string;
}
/**
* Fetch geolocation data from ip-api.com (free, 45 req/min)
*/
async function fetchGeoLocation(ip: string): Promise<GeoLocation | null> {
try {
const response = await fetch(`http://ip-api.com/json/${ip}?fields=status,country,countryCode,region,regionName,city,lat,lon,query`);
const data = await response.json();
if (data.status === 'success') {
return data as GeoLocation;
}
logger.warn('geo', `Failed to lookup ${ip}: ${data.message || 'Unknown error'}`);
return null;
} catch (error) {
logger.error('geo', `Error fetching geolocation for ${ip}: ${error}`);
return null;
}
}
/**
* Fetch geolocation data in batches (up to 100 IPs at once)
*/
async function fetchGeoLocationBatch(ips: string[]): Promise<Map<string, GeoLocation>> {
try {
const response = await fetch('http://ip-api.com/batch?fields=status,country,countryCode,region,regionName,city,lat,lon,query', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(ips),
});
const data = await response.json() as GeoLocation[];
const results = new Map<string, GeoLocation>();
for (const item of data) {
if (item.status === 'success') {
results.set(item.query, item);
}
}
return results;
} catch (error) {
logger.error('geo', `Error fetching batch geolocation: ${error}`);
return new Map();
}
}
/**
* Sleep for specified milliseconds
*/
function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function populateProxyLocations() {
console.log('🌍 Populating proxy geolocation data...\n');
try {
// Get all proxies that don't have location data
const result = await pool.query(
'SELECT id, host FROM proxies WHERE city IS NULL OR country IS NULL ORDER BY id'
);
const proxies = result.rows;
console.log(`Found ${proxies.length} proxies without geolocation data\n`);
if (proxies.length === 0) {
console.log('✅ All proxies already have geolocation data!');
await pool.end();
return;
}
// Process in batches of 100 (API limit)
const batchSize = 100;
let processed = 0;
let updated = 0;
for (let i = 0; i < proxies.length; i += batchSize) {
const batch = proxies.slice(i, i + batchSize);
const ips = batch.map(p => p.host);
console.log(`📍 Processing batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(proxies.length / batchSize)} (${ips.length} IPs)...`);
const geoData = await fetchGeoLocationBatch(ips);
// Update database for each successful lookup
for (const proxy of batch) {
const geo = geoData.get(proxy.host);
if (geo) {
await pool.query(
`UPDATE proxies
SET city = $1, state = $2, country = $3, country_code = $4, latitude = $5, longitude = $6, updated_at = NOW()
WHERE id = $7`,
[geo.city, geo.regionName, geo.country, geo.countryCode, geo.lat, geo.lon, proxy.id]
);
updated++;
console.log(`${proxy.host} -> ${geo.city}, ${geo.regionName}, ${geo.country}`);
} else {
console.log(`${proxy.host} -> Lookup failed`);
}
processed++;
}
// Rate limiting: wait 1.5 seconds between batches (40 requests/min max)
if (i + batchSize < proxies.length) {
console.log('⏳ Waiting 1.5s to respect rate limits...\n');
await sleep(1500);
}
}
console.log(`\n✅ Completed!`);
console.log(` Processed: ${processed} proxies`);
console.log(` Updated: ${updated} proxies`);
console.log(` Failed: ${processed - updated} proxies\n`);
// Show location distribution
const locationStats = await pool.query(`
SELECT country, state, city, COUNT(*) as count
FROM proxies
WHERE country IS NOT NULL
GROUP BY country, state, city
ORDER BY count DESC
LIMIT 20
`);
console.log('📊 Top 20 proxy locations:');
console.table(locationStats.rows);
} catch (error) {
console.error('❌ Error:', error);
} finally {
await pool.end();
}
}
populateProxyLocations();