feat: Stealth worker system with mandatory proxy rotation
## Worker System - Role-agnostic workers that can handle any task type - Pod-based architecture with StatefulSet (5-15 pods, 5 workers each) - Custom pod names (Aethelgard, Xylos, Kryll, etc.) - Worker registry with friendly names and resource monitoring - Hub-and-spoke visualization on JobQueue page ## Stealth & Anti-Detection (REQUIRED) - Proxies are MANDATORY - workers fail to start without active proxies - CrawlRotator initializes on worker startup - Loads proxies from `proxies` table - Auto-rotates proxy + fingerprint on 403 errors - 12 browser fingerprints (Chrome, Firefox, Safari, Edge) - Locale/timezone matching for geographic consistency ## Task System - Renamed product_resync → product_refresh - Task chaining: store_discovery → entry_point → product_discovery - Priority-based claiming with FOR UPDATE SKIP LOCKED - Heartbeat and stale task recovery ## UI Updates - JobQueue: Pod visualization, resource monitoring on hover - WorkersDashboard: Simplified worker list - Removed unused filters from task list ## Other - IP2Location service for visitor analytics - Findagram consumer features scaffolding - Documentation updates 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -14,7 +14,7 @@ export type TaskRole =
|
||||
| 'store_discovery'
|
||||
| 'entry_point_discovery'
|
||||
| 'product_discovery'
|
||||
| 'product_resync'
|
||||
| 'product_refresh'
|
||||
| 'analytics_refresh';
|
||||
|
||||
export type TaskStatus =
|
||||
@@ -29,6 +29,8 @@ export interface WorkerTask {
|
||||
id: number;
|
||||
role: TaskRole;
|
||||
dispensary_id: number | null;
|
||||
dispensary_name?: string; // JOINed from dispensaries
|
||||
dispensary_slug?: string; // JOINed from dispensaries
|
||||
platform: string | null;
|
||||
status: TaskStatus;
|
||||
priority: number;
|
||||
@@ -128,13 +130,42 @@ class TaskService {
|
||||
|
||||
/**
|
||||
* Claim a task atomically for a worker
|
||||
* Uses the SQL function for proper locking
|
||||
* If role is null, claims ANY available task (role-agnostic worker)
|
||||
*/
|
||||
async claimTask(role: TaskRole, workerId: string): Promise<WorkerTask | null> {
|
||||
const result = await pool.query(
|
||||
`SELECT * FROM claim_task($1, $2)`,
|
||||
[role, workerId]
|
||||
);
|
||||
async claimTask(role: TaskRole | null, workerId: string): Promise<WorkerTask | null> {
|
||||
if (role) {
|
||||
// Role-specific claiming - use the SQL function
|
||||
const result = await pool.query(
|
||||
`SELECT * FROM claim_task($1, $2)`,
|
||||
[role, workerId]
|
||||
);
|
||||
return (result.rows[0] as WorkerTask) || null;
|
||||
}
|
||||
|
||||
// Role-agnostic claiming - claim ANY pending task
|
||||
const result = await pool.query(`
|
||||
UPDATE worker_tasks
|
||||
SET
|
||||
status = 'claimed',
|
||||
worker_id = $1,
|
||||
claimed_at = NOW()
|
||||
WHERE id = (
|
||||
SELECT id FROM worker_tasks
|
||||
WHERE status = 'pending'
|
||||
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
|
||||
-- Exclude stores that already have an active task
|
||||
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
|
||||
SELECT dispensary_id FROM worker_tasks
|
||||
WHERE status IN ('claimed', 'running')
|
||||
AND dispensary_id IS NOT NULL
|
||||
))
|
||||
ORDER BY priority DESC, created_at ASC
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING *
|
||||
`, [workerId]);
|
||||
|
||||
return (result.rows[0] as WorkerTask) || null;
|
||||
}
|
||||
|
||||
@@ -206,27 +237,27 @@ class TaskService {
|
||||
let paramIndex = 1;
|
||||
|
||||
if (filter.role) {
|
||||
conditions.push(`role = $${paramIndex++}`);
|
||||
conditions.push(`t.role = $${paramIndex++}`);
|
||||
params.push(filter.role);
|
||||
}
|
||||
|
||||
if (filter.status) {
|
||||
if (Array.isArray(filter.status)) {
|
||||
conditions.push(`status = ANY($${paramIndex++})`);
|
||||
conditions.push(`t.status = ANY($${paramIndex++})`);
|
||||
params.push(filter.status);
|
||||
} else {
|
||||
conditions.push(`status = $${paramIndex++}`);
|
||||
conditions.push(`t.status = $${paramIndex++}`);
|
||||
params.push(filter.status);
|
||||
}
|
||||
}
|
||||
|
||||
if (filter.dispensary_id) {
|
||||
conditions.push(`dispensary_id = $${paramIndex++}`);
|
||||
conditions.push(`t.dispensary_id = $${paramIndex++}`);
|
||||
params.push(filter.dispensary_id);
|
||||
}
|
||||
|
||||
if (filter.worker_id) {
|
||||
conditions.push(`worker_id = $${paramIndex++}`);
|
||||
conditions.push(`t.worker_id = $${paramIndex++}`);
|
||||
params.push(filter.worker_id);
|
||||
}
|
||||
|
||||
@@ -235,9 +266,14 @@ class TaskService {
|
||||
const offset = filter.offset ?? 0;
|
||||
|
||||
const result = await pool.query(
|
||||
`SELECT * FROM worker_tasks
|
||||
`SELECT
|
||||
t.*,
|
||||
d.name as dispensary_name,
|
||||
d.slug as dispensary_slug
|
||||
FROM worker_tasks t
|
||||
LEFT JOIN dispensaries d ON d.id = t.dispensary_id
|
||||
${whereClause}
|
||||
ORDER BY created_at DESC
|
||||
ORDER BY t.created_at DESC
|
||||
LIMIT ${limit} OFFSET ${offset}`,
|
||||
params
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user