Initial commit - Dutchie dispensary scraper

This commit is contained in:
Kelly
2025-11-28 19:45:44 -07:00
commit 5757a8e9bd
23375 changed files with 3788799 additions and 0 deletions

View File

@@ -0,0 +1,211 @@
import { pool } from '../db/migrate';
import { testProxy, saveProxyTestResult } from './proxy';
interface ProxyTestJob {
id: number;
status: string;
total_proxies: number;
tested_proxies: number;
passed_proxies: number;
failed_proxies: number;
}
// Simple in-memory queue - could be replaced with Bull/Bee-Queue for production
const activeJobs = new Map<number, { cancelled: boolean }>();
// Clean up orphaned jobs on server startup
export async function cleanupOrphanedJobs(): Promise<void> {
try {
const result = await pool.query(`
UPDATE proxy_test_jobs
SET status = 'cancelled',
completed_at = CURRENT_TIMESTAMP,
updated_at = CURRENT_TIMESTAMP
WHERE status IN ('pending', 'running')
RETURNING id
`);
if (result.rows.length > 0) {
console.log(`🧹 Cleaned up ${result.rows.length} orphaned proxy test jobs`);
}
} catch (error) {
console.error('Error cleaning up orphaned jobs:', error);
}
}
export async function createProxyTestJob(): Promise<number> {
// Check for existing running jobs first
const existingJob = await getActiveProxyTestJob();
if (existingJob) {
throw new Error('A proxy test job is already running. Please cancel it first.');
}
const result = await pool.query(`
SELECT COUNT(*) as count FROM proxies
`);
const totalProxies = parseInt(result.rows[0].count);
const jobResult = await pool.query(`
INSERT INTO proxy_test_jobs (status, total_proxies)
VALUES ('pending', $1)
RETURNING id
`, [totalProxies]);
const jobId = jobResult.rows[0].id;
// Start job in background
runProxyTestJob(jobId).catch(err => {
console.error(`❌ Proxy test job ${jobId} failed:`, err);
});
return jobId;
}
export async function getProxyTestJob(jobId: number): Promise<ProxyTestJob | null> {
const result = await pool.query(`
SELECT id, status, total_proxies, tested_proxies, passed_proxies, failed_proxies
FROM proxy_test_jobs
WHERE id = $1
`, [jobId]);
if (result.rows.length === 0) {
return null;
}
return result.rows[0];
}
export async function getActiveProxyTestJob(): Promise<ProxyTestJob | null> {
const result = await pool.query(`
SELECT id, status, total_proxies, tested_proxies, passed_proxies, failed_proxies
FROM proxy_test_jobs
WHERE status IN ('pending', 'running')
ORDER BY created_at DESC
LIMIT 1
`);
if (result.rows.length === 0) {
return null;
}
return result.rows[0];
}
export async function cancelProxyTestJob(jobId: number): Promise<boolean> {
// Try to cancel in-memory job first
const jobControl = activeJobs.get(jobId);
if (jobControl) {
jobControl.cancelled = true;
}
// Always update database to handle orphaned jobs
const result = await pool.query(`
UPDATE proxy_test_jobs
SET status = 'cancelled',
completed_at = CURRENT_TIMESTAMP,
updated_at = CURRENT_TIMESTAMP
WHERE id = $1 AND status IN ('pending', 'running')
RETURNING id
`, [jobId]);
return result.rows.length > 0;
}
async function runProxyTestJob(jobId: number): Promise<void> {
// Register job as active
activeJobs.set(jobId, { cancelled: false });
try {
// Update status to running
await pool.query(`
UPDATE proxy_test_jobs
SET status = 'running',
started_at = CURRENT_TIMESTAMP,
updated_at = CURRENT_TIMESTAMP
WHERE id = $1
`, [jobId]);
console.log(`🔍 Starting proxy test job ${jobId}...`);
// Get all proxies
const result = await pool.query(`
SELECT id, host, port, protocol, username, password
FROM proxies
ORDER BY id
`);
let tested = 0;
let passed = 0;
let failed = 0;
for (const proxy of result.rows) {
// Check if job was cancelled
const jobControl = activeJobs.get(jobId);
if (jobControl?.cancelled) {
console.log(`⏸️ Proxy test job ${jobId} cancelled`);
break;
}
// Test the proxy
const testResult = await testProxy(
proxy.host,
proxy.port,
proxy.protocol,
proxy.username,
proxy.password
);
// Save result
await saveProxyTestResult(proxy.id, testResult);
tested++;
if (testResult.success) {
passed++;
} else {
failed++;
}
// Update job progress
await pool.query(`
UPDATE proxy_test_jobs
SET tested_proxies = $1,
passed_proxies = $2,
failed_proxies = $3,
updated_at = CURRENT_TIMESTAMP
WHERE id = $4
`, [tested, passed, failed, jobId]);
// Log progress every 10 proxies
if (tested % 10 === 0) {
console.log(`📊 Job ${jobId}: ${tested}/${result.rows.length} proxies tested (${passed} passed, ${failed} failed)`);
}
}
// Mark job as completed
const jobControl = activeJobs.get(jobId);
const finalStatus = jobControl?.cancelled ? 'cancelled' : 'completed';
await pool.query(`
UPDATE proxy_test_jobs
SET status = $1,
completed_at = CURRENT_TIMESTAMP,
updated_at = CURRENT_TIMESTAMP
WHERE id = $2
`, [finalStatus, jobId]);
console.log(`✅ Proxy test job ${jobId} ${finalStatus}: ${tested} tested, ${passed} passed, ${failed} failed`);
} catch (error) {
console.error(`❌ Proxy test job ${jobId} error:`, error);
await pool.query(`
UPDATE proxy_test_jobs
SET status = 'failed',
completed_at = CURRENT_TIMESTAMP,
updated_at = CURRENT_TIMESTAMP
WHERE id = $1
`, [jobId]);
} finally {
// Remove from active jobs
activeJobs.delete(jobId);
}
}