feat: Stealth worker system with mandatory proxy rotation

## Worker System
- Role-agnostic workers that can handle any task type
- Pod-based architecture with StatefulSet (5-15 pods, 5 workers each)
- Custom pod names (Aethelgard, Xylos, Kryll, etc.)
- Worker registry with friendly names and resource monitoring
- Hub-and-spoke visualization on JobQueue page

## Stealth & Anti-Detection (REQUIRED)
- Proxies are MANDATORY - workers fail to start without active proxies
- CrawlRotator initializes on worker startup
- Loads proxies from `proxies` table
- Auto-rotates proxy + fingerprint on 403 errors
- 12 browser fingerprints (Chrome, Firefox, Safari, Edge)
- Locale/timezone matching for geographic consistency

## Task System
- Renamed product_resync → product_refresh
- Task chaining: store_discovery → entry_point → product_discovery
- Priority-based claiming with FOR UPDATE SKIP LOCKED
- Heartbeat and stale task recovery

## UI Updates
- JobQueue: Pod visualization, resource monitoring on hover
- WorkersDashboard: Simplified worker list
- Removed unused filters from task list

## Other
- IP2Location service for visitor analytics
- Findagram consumer features scaffolding
- Documentation updates

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-10 00:44:59 -07:00
parent 0295637ed6
commit 56cc171287
61 changed files with 8591 additions and 2076 deletions

View File

@@ -14,7 +14,7 @@ export type TaskRole =
| 'store_discovery'
| 'entry_point_discovery'
| 'product_discovery'
| 'product_resync'
| 'product_refresh'
| 'analytics_refresh';
export type TaskStatus =
@@ -29,6 +29,8 @@ export interface WorkerTask {
id: number;
role: TaskRole;
dispensary_id: number | null;
dispensary_name?: string; // JOINed from dispensaries
dispensary_slug?: string; // JOINed from dispensaries
platform: string | null;
status: TaskStatus;
priority: number;
@@ -128,13 +130,42 @@ class TaskService {
/**
* Claim a task atomically for a worker
* Uses the SQL function for proper locking
* If role is null, claims ANY available task (role-agnostic worker)
*/
async claimTask(role: TaskRole, workerId: string): Promise<WorkerTask | null> {
const result = await pool.query(
`SELECT * FROM claim_task($1, $2)`,
[role, workerId]
);
async claimTask(role: TaskRole | null, workerId: string): Promise<WorkerTask | null> {
if (role) {
// Role-specific claiming - use the SQL function
const result = await pool.query(
`SELECT * FROM claim_task($1, $2)`,
[role, workerId]
);
return (result.rows[0] as WorkerTask) || null;
}
// Role-agnostic claiming - claim ANY pending task
const result = await pool.query(`
UPDATE worker_tasks
SET
status = 'claimed',
worker_id = $1,
claimed_at = NOW()
WHERE id = (
SELECT id FROM worker_tasks
WHERE status = 'pending'
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
-- Exclude stores that already have an active task
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
SELECT dispensary_id FROM worker_tasks
WHERE status IN ('claimed', 'running')
AND dispensary_id IS NOT NULL
))
ORDER BY priority DESC, created_at ASC
LIMIT 1
FOR UPDATE SKIP LOCKED
)
RETURNING *
`, [workerId]);
return (result.rows[0] as WorkerTask) || null;
}
@@ -206,27 +237,27 @@ class TaskService {
let paramIndex = 1;
if (filter.role) {
conditions.push(`role = $${paramIndex++}`);
conditions.push(`t.role = $${paramIndex++}`);
params.push(filter.role);
}
if (filter.status) {
if (Array.isArray(filter.status)) {
conditions.push(`status = ANY($${paramIndex++})`);
conditions.push(`t.status = ANY($${paramIndex++})`);
params.push(filter.status);
} else {
conditions.push(`status = $${paramIndex++}`);
conditions.push(`t.status = $${paramIndex++}`);
params.push(filter.status);
}
}
if (filter.dispensary_id) {
conditions.push(`dispensary_id = $${paramIndex++}`);
conditions.push(`t.dispensary_id = $${paramIndex++}`);
params.push(filter.dispensary_id);
}
if (filter.worker_id) {
conditions.push(`worker_id = $${paramIndex++}`);
conditions.push(`t.worker_id = $${paramIndex++}`);
params.push(filter.worker_id);
}
@@ -235,9 +266,14 @@ class TaskService {
const offset = filter.offset ?? 0;
const result = await pool.query(
`SELECT * FROM worker_tasks
`SELECT
t.*,
d.name as dispensary_name,
d.slug as dispensary_slug
FROM worker_tasks t
LEFT JOIN dispensaries d ON d.id = t.dispensary_id
${whereClause}
ORDER BY created_at DESC
ORDER BY t.created_at DESC
LIMIT ${limit} OFFSET ${offset}`,
params
);