feat(tasks): Consolidate schedule management into task_schedules
- Add schedule CRUD endpoints to /api/tasks/schedules - Add Schedules section to TasksDashboard with edit/delete/bulk actions - Deprecate job_schedules table (entries disabled in DB) - Mark CrawlSchedulePage as deprecated (removed from menu) - Add deprecation comments to legacy schedule methods in api.ts - Add migration comments to workers.ts explaining consolidation Key changes: - Schedule management now at /admin/tasks instead of /admin/schedule - task_schedules uses interval_hours (simpler than base_interval_minutes + jitter) - All schedule routes placed before /:id to avoid Express route conflicts 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -3,6 +3,24 @@
|
||||
*
|
||||
* Endpoints for managing worker tasks, viewing capacity metrics,
|
||||
* and generating batch tasks.
|
||||
*
|
||||
* SCHEDULE MANAGEMENT (added 2025-12-12):
|
||||
* This file now contains the canonical schedule management endpoints.
|
||||
* The job_schedules table has been deprecated and all schedule management
|
||||
* is now consolidated into task_schedules:
|
||||
*
|
||||
* Schedule endpoints:
|
||||
* GET /api/tasks/schedules - List all schedules
|
||||
* POST /api/tasks/schedules - Create new schedule
|
||||
* GET /api/tasks/schedules/:id - Get schedule by ID
|
||||
* PUT /api/tasks/schedules/:id - Update schedule
|
||||
* DELETE /api/tasks/schedules/:id - Delete schedule
|
||||
* DELETE /api/tasks/schedules - Bulk delete schedules
|
||||
* POST /api/tasks/schedules/:id/run-now - Trigger schedule immediately
|
||||
* POST /api/tasks/schedules/:id/toggle - Toggle schedule enabled/disabled
|
||||
*
|
||||
* Note: Schedule routes are defined BEFORE /:id to avoid route conflicts
|
||||
* (Express matches routes in order, and "schedules" would match /:id otherwise)
|
||||
*/
|
||||
|
||||
import { Router, Request, Response } from 'express';
|
||||
@@ -131,6 +149,366 @@ router.get('/capacity/:role', async (req: Request, res: Response) => {
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// SCHEDULE MANAGEMENT ROUTES
|
||||
// (Must be before /:id to avoid route conflicts)
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* GET /api/tasks/schedules
|
||||
* List all task schedules
|
||||
*/
|
||||
router.get('/schedules', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const enabledOnly = req.query.enabled === 'true';
|
||||
|
||||
let query = `
|
||||
SELECT id, name, role, description, enabled, interval_hours,
|
||||
priority, state_code, platform, last_run_at, next_run_at,
|
||||
last_task_count, last_error, created_at, updated_at
|
||||
FROM task_schedules
|
||||
`;
|
||||
|
||||
if (enabledOnly) {
|
||||
query += ` WHERE enabled = true`;
|
||||
}
|
||||
|
||||
query += ` ORDER BY name`;
|
||||
|
||||
const result = await pool.query(query);
|
||||
res.json({ schedules: result.rows });
|
||||
} catch (error: unknown) {
|
||||
console.error('Error listing schedules:', error);
|
||||
res.status(500).json({ error: 'Failed to list schedules' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* DELETE /api/tasks/schedules
|
||||
* Bulk delete schedules
|
||||
*
|
||||
* Body:
|
||||
* - ids: number[] (required) - array of schedule IDs to delete
|
||||
* - all: boolean (optional) - if true, delete all schedules (ids ignored)
|
||||
*/
|
||||
router.delete('/schedules', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { ids, all } = req.body;
|
||||
|
||||
let result;
|
||||
|
||||
if (all === true) {
|
||||
// Delete all schedules
|
||||
result = await pool.query(`
|
||||
DELETE FROM task_schedules RETURNING id, name
|
||||
`);
|
||||
} else if (Array.isArray(ids) && ids.length > 0) {
|
||||
// Delete specific schedules by IDs
|
||||
result = await pool.query(`
|
||||
DELETE FROM task_schedules WHERE id = ANY($1) RETURNING id, name
|
||||
`, [ids]);
|
||||
} else {
|
||||
return res.status(400).json({
|
||||
error: 'Either provide ids array or set all=true',
|
||||
});
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
deleted_count: result.rowCount,
|
||||
deleted: result.rows,
|
||||
message: `Deleted ${result.rowCount} schedule(s)`,
|
||||
});
|
||||
} catch (error: unknown) {
|
||||
console.error('Error bulk deleting schedules:', error);
|
||||
res.status(500).json({ error: 'Failed to delete schedules' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/tasks/schedules
|
||||
* Create a new schedule
|
||||
*
|
||||
* Body:
|
||||
* - name: string (required, unique)
|
||||
* - role: TaskRole (required)
|
||||
* - description: string (optional)
|
||||
* - enabled: boolean (default true)
|
||||
* - interval_hours: number (required)
|
||||
* - priority: number (default 0)
|
||||
* - state_code: string (optional)
|
||||
* - platform: string (optional)
|
||||
*/
|
||||
router.post('/schedules', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const {
|
||||
name,
|
||||
role,
|
||||
description,
|
||||
enabled = true,
|
||||
interval_hours,
|
||||
priority = 0,
|
||||
state_code,
|
||||
platform,
|
||||
} = req.body;
|
||||
|
||||
if (!name || !role || !interval_hours) {
|
||||
return res.status(400).json({
|
||||
error: 'name, role, and interval_hours are required',
|
||||
});
|
||||
}
|
||||
|
||||
// Calculate next_run_at based on interval
|
||||
const nextRunAt = new Date(Date.now() + interval_hours * 60 * 60 * 1000);
|
||||
|
||||
const result = await pool.query(`
|
||||
INSERT INTO task_schedules
|
||||
(name, role, description, enabled, interval_hours, priority, state_code, platform, next_run_at)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
|
||||
RETURNING id, name, role, description, enabled, interval_hours,
|
||||
priority, state_code, platform, last_run_at, next_run_at,
|
||||
last_task_count, last_error, created_at, updated_at
|
||||
`, [name, role, description, enabled, interval_hours, priority, state_code, platform, nextRunAt]);
|
||||
|
||||
res.status(201).json(result.rows[0]);
|
||||
} catch (error: any) {
|
||||
if (error.code === '23505') {
|
||||
// Unique constraint violation
|
||||
return res.status(409).json({ error: 'A schedule with this name already exists' });
|
||||
}
|
||||
console.error('Error creating schedule:', error);
|
||||
res.status(500).json({ error: 'Failed to create schedule' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/tasks/schedules/:id
|
||||
* Get a specific schedule by ID
|
||||
*/
|
||||
router.get('/schedules/:id', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const scheduleId = parseInt(req.params.id, 10);
|
||||
|
||||
const result = await pool.query(`
|
||||
SELECT id, name, role, description, enabled, interval_hours,
|
||||
priority, state_code, platform, last_run_at, next_run_at,
|
||||
last_task_count, last_error, created_at, updated_at
|
||||
FROM task_schedules
|
||||
WHERE id = $1
|
||||
`, [scheduleId]);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return res.status(404).json({ error: 'Schedule not found' });
|
||||
}
|
||||
|
||||
res.json(result.rows[0]);
|
||||
} catch (error: unknown) {
|
||||
console.error('Error getting schedule:', error);
|
||||
res.status(500).json({ error: 'Failed to get schedule' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* PUT /api/tasks/schedules/:id
|
||||
* Update an existing schedule
|
||||
*/
|
||||
router.put('/schedules/:id', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const scheduleId = parseInt(req.params.id, 10);
|
||||
const {
|
||||
name,
|
||||
role,
|
||||
description,
|
||||
enabled,
|
||||
interval_hours,
|
||||
priority,
|
||||
state_code,
|
||||
platform,
|
||||
} = req.body;
|
||||
|
||||
// Build dynamic update query
|
||||
const updates: string[] = [];
|
||||
const values: any[] = [];
|
||||
let paramIndex = 1;
|
||||
|
||||
if (name !== undefined) {
|
||||
updates.push(`name = $${paramIndex++}`);
|
||||
values.push(name);
|
||||
}
|
||||
if (role !== undefined) {
|
||||
updates.push(`role = $${paramIndex++}`);
|
||||
values.push(role);
|
||||
}
|
||||
if (description !== undefined) {
|
||||
updates.push(`description = $${paramIndex++}`);
|
||||
values.push(description);
|
||||
}
|
||||
if (enabled !== undefined) {
|
||||
updates.push(`enabled = $${paramIndex++}`);
|
||||
values.push(enabled);
|
||||
}
|
||||
if (interval_hours !== undefined) {
|
||||
updates.push(`interval_hours = $${paramIndex++}`);
|
||||
values.push(interval_hours);
|
||||
|
||||
// Recalculate next_run_at if interval changed
|
||||
const nextRunAt = new Date(Date.now() + interval_hours * 60 * 60 * 1000);
|
||||
updates.push(`next_run_at = $${paramIndex++}`);
|
||||
values.push(nextRunAt);
|
||||
}
|
||||
if (priority !== undefined) {
|
||||
updates.push(`priority = $${paramIndex++}`);
|
||||
values.push(priority);
|
||||
}
|
||||
if (state_code !== undefined) {
|
||||
updates.push(`state_code = $${paramIndex++}`);
|
||||
values.push(state_code || null);
|
||||
}
|
||||
if (platform !== undefined) {
|
||||
updates.push(`platform = $${paramIndex++}`);
|
||||
values.push(platform || null);
|
||||
}
|
||||
|
||||
if (updates.length === 0) {
|
||||
return res.status(400).json({ error: 'No fields to update' });
|
||||
}
|
||||
|
||||
updates.push('updated_at = NOW()');
|
||||
values.push(scheduleId);
|
||||
|
||||
const result = await pool.query(`
|
||||
UPDATE task_schedules
|
||||
SET ${updates.join(', ')}
|
||||
WHERE id = $${paramIndex}
|
||||
RETURNING id, name, role, description, enabled, interval_hours,
|
||||
priority, state_code, platform, last_run_at, next_run_at,
|
||||
last_task_count, last_error, created_at, updated_at
|
||||
`, values);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return res.status(404).json({ error: 'Schedule not found' });
|
||||
}
|
||||
|
||||
res.json(result.rows[0]);
|
||||
} catch (error: any) {
|
||||
if (error.code === '23505') {
|
||||
return res.status(409).json({ error: 'A schedule with this name already exists' });
|
||||
}
|
||||
console.error('Error updating schedule:', error);
|
||||
res.status(500).json({ error: 'Failed to update schedule' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* DELETE /api/tasks/schedules/:id
|
||||
* Delete a schedule
|
||||
*/
|
||||
router.delete('/schedules/:id', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const scheduleId = parseInt(req.params.id, 10);
|
||||
|
||||
const result = await pool.query(`
|
||||
DELETE FROM task_schedules WHERE id = $1 RETURNING id, name
|
||||
`, [scheduleId]);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return res.status(404).json({ error: 'Schedule not found' });
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `Schedule "${result.rows[0].name}" deleted`,
|
||||
});
|
||||
} catch (error: unknown) {
|
||||
console.error('Error deleting schedule:', error);
|
||||
res.status(500).json({ error: 'Failed to delete schedule' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/tasks/schedules/:id/run-now
|
||||
* Manually trigger a scheduled task to run immediately
|
||||
*/
|
||||
router.post('/schedules/:id/run-now', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const scheduleId = parseInt(req.params.id, 10);
|
||||
|
||||
// Get the schedule
|
||||
const scheduleResult = await pool.query(`
|
||||
SELECT id, name, role, state_code, platform, priority
|
||||
FROM task_schedules WHERE id = $1
|
||||
`, [scheduleId]);
|
||||
|
||||
if (scheduleResult.rows.length === 0) {
|
||||
return res.status(404).json({ error: 'Schedule not found' });
|
||||
}
|
||||
|
||||
const schedule = scheduleResult.rows[0];
|
||||
|
||||
// Create a task based on the schedule
|
||||
const task = await taskService.createTask({
|
||||
role: schedule.role,
|
||||
platform: schedule.platform,
|
||||
priority: schedule.priority + 10, // Boost priority for manual runs
|
||||
});
|
||||
|
||||
// Update last_run_at on the schedule
|
||||
await pool.query(`
|
||||
UPDATE task_schedules
|
||||
SET last_run_at = NOW(),
|
||||
next_run_at = NOW() + (interval_hours || ' hours')::interval,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
`, [scheduleId]);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `Schedule "${schedule.name}" triggered`,
|
||||
task,
|
||||
});
|
||||
} catch (error: unknown) {
|
||||
console.error('Error running schedule:', error);
|
||||
res.status(500).json({ error: 'Failed to run schedule' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/tasks/schedules/:id/toggle
|
||||
* Toggle a schedule's enabled status
|
||||
*/
|
||||
router.post('/schedules/:id/toggle', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const scheduleId = parseInt(req.params.id, 10);
|
||||
|
||||
const result = await pool.query(`
|
||||
UPDATE task_schedules
|
||||
SET enabled = NOT enabled,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
RETURNING id, name, enabled
|
||||
`, [scheduleId]);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return res.status(404).json({ error: 'Schedule not found' });
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
schedule: result.rows[0],
|
||||
message: result.rows[0].enabled
|
||||
? `Schedule "${result.rows[0].name}" enabled`
|
||||
: `Schedule "${result.rows[0].name}" disabled`,
|
||||
});
|
||||
} catch (error: unknown) {
|
||||
console.error('Error toggling schedule:', error);
|
||||
res.status(500).json({ error: 'Failed to toggle schedule' });
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// TASK-SPECIFIC ROUTES (with :id parameter)
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* GET /api/tasks/:id
|
||||
* Get a specific task by ID
|
||||
|
||||
@@ -4,10 +4,25 @@
|
||||
* Provider-agnostic worker management and job monitoring.
|
||||
* Replaces legacy /api/dutchie-az/admin/schedules and /api/dutchie-az/monitor/* routes.
|
||||
*
|
||||
* DEPRECATION NOTE (2025-12-12):
|
||||
* This file still queries job_schedules for backwards compatibility with
|
||||
* the /api/workers endpoints that display worker status. However, the
|
||||
* job_schedules table is DEPRECATED - all entries have been disabled.
|
||||
*
|
||||
* Schedule management has been consolidated into task_schedules:
|
||||
* - Use /api/tasks/schedules for schedule CRUD operations
|
||||
* - Use TasksDashboard.tsx (/admin/tasks) for schedule management UI
|
||||
* - task_schedules uses interval_hours (simpler than base_interval_minutes + jitter)
|
||||
*
|
||||
* The /api/workers endpoints remain useful for:
|
||||
* - Monitoring active workers and job status
|
||||
* - K8s scaling controls
|
||||
* - Job history and logs
|
||||
*
|
||||
* Endpoints:
|
||||
* GET /api/workers - List all workers/schedules
|
||||
* GET /api/workers/active - List currently active workers
|
||||
* GET /api/workers/schedule - Get all job schedules
|
||||
* GET /api/workers/schedule - Get all job schedules (DEPRECATED - use /api/tasks/schedules)
|
||||
* GET /api/workers/:workerName - Get specific worker details
|
||||
* GET /api/workers/:workerName/scope - Get worker's scope (states, etc.)
|
||||
* GET /api/workers/:workerName/stats - Get worker statistics
|
||||
|
||||
@@ -11,10 +11,17 @@
|
||||
* - Workers report heartbeats to worker_registry
|
||||
* - Workers are ROLE-AGNOSTIC by default (can handle any task type)
|
||||
*
|
||||
* Stealth & Anti-Detection:
|
||||
* PROXIES ARE REQUIRED - workers will fail to start if no proxies available.
|
||||
* Stealth & Anti-Detection (LAZY INITIALIZATION):
|
||||
* Workers start IMMEDIATELY without waiting for proxies.
|
||||
* Stealth systems (proxies, fingerprints, preflights) are initialized
|
||||
* on first task claim, not at worker startup.
|
||||
*
|
||||
* On startup, workers initialize the CrawlRotator which provides:
|
||||
* This allows workers to:
|
||||
* - Register and send heartbeats immediately
|
||||
* - Wait in main loop without blocking on proxy availability
|
||||
* - Initialize proxies/preflights only when tasks are actually available
|
||||
*
|
||||
* On first task claim attempt, workers initialize the CrawlRotator which provides:
|
||||
* - Proxy rotation: Loads proxies from `proxies` table, ALL requests use proxy
|
||||
* - User-Agent rotation: Cycles through realistic browser fingerprints
|
||||
* - Fingerprint rotation: Changes browser profile on blocks
|
||||
@@ -34,11 +41,16 @@
|
||||
*
|
||||
* Environment:
|
||||
* WORKER_ROLE - Which task role to process (optional, null = any task)
|
||||
* WORKER_ID - Optional custom worker ID (auto-generated if not provided)
|
||||
* POD_NAME - Kubernetes pod name (optional)
|
||||
* POD_NAME - K8s StatefulSet pod name (PRIMARY - use this for persistent identity)
|
||||
* WORKER_ID - Custom worker ID (fallback if POD_NAME not set)
|
||||
* POLL_INTERVAL_MS - How often to check for tasks (default: 5000)
|
||||
* HEARTBEAT_INTERVAL_MS - How often to update heartbeat (default: 30000)
|
||||
* API_BASE_URL - Backend API URL for registration (default: http://localhost:3010)
|
||||
*
|
||||
* Worker Identity:
|
||||
* Workers use POD_NAME as their worker_id for persistent identity across restarts.
|
||||
* In K8s StatefulSet, POD_NAME = "scraper-worker-0" through "scraper-worker-7".
|
||||
* This ensures workers re-register with the same ID instead of creating new entries.
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
@@ -209,6 +221,16 @@ export class TaskWorker {
|
||||
private preflightCurlResult: CurlPreflightResult | null = null;
|
||||
private preflightHttpResult: PuppeteerPreflightResult | null = null;
|
||||
|
||||
// ==========================================================================
|
||||
// LAZY INITIALIZATION FLAGS
|
||||
// ==========================================================================
|
||||
// Stealth/proxy initialization is deferred until first task claim.
|
||||
// Workers register immediately and enter main loop without blocking.
|
||||
// ==========================================================================
|
||||
private stealthInitialized: boolean = false;
|
||||
private preflightsCompleted: boolean = false;
|
||||
private initializingPromise: Promise<void> | null = null;
|
||||
|
||||
constructor(role: TaskRole | null = null, workerId?: string) {
|
||||
this.pool = getPool();
|
||||
this.role = role;
|
||||
@@ -293,9 +315,9 @@ export class TaskWorker {
|
||||
|
||||
/**
|
||||
* Initialize stealth systems (proxy rotation, fingerprints)
|
||||
* Called once on worker startup before processing any tasks.
|
||||
* Called LAZILY on first task claim attempt (NOT at worker startup).
|
||||
*
|
||||
* IMPORTANT: Proxies are REQUIRED. Workers will wait until proxies are available.
|
||||
* IMPORTANT: Proxies are REQUIRED to claim tasks. This method waits until proxies are available.
|
||||
* Workers listen for PostgreSQL NOTIFY 'proxy_added' to wake up immediately when proxies are added.
|
||||
*/
|
||||
private async initializeStealth(): Promise<void> {
|
||||
@@ -482,6 +504,51 @@ export class TaskWorker {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Lazy initialization of stealth systems.
|
||||
* Called BEFORE claiming first task (not at worker startup).
|
||||
* This allows workers to register and enter main loop immediately.
|
||||
*
|
||||
* Returns true if initialization succeeded, false otherwise.
|
||||
*/
|
||||
private async ensureStealthInitialized(): Promise<boolean> {
|
||||
// Already initialized
|
||||
if (this.stealthInitialized && this.preflightsCompleted) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Already initializing (prevent concurrent init attempts)
|
||||
if (this.initializingPromise) {
|
||||
await this.initializingPromise;
|
||||
return this.stealthInitialized && this.preflightsCompleted;
|
||||
}
|
||||
|
||||
console.log(`[TaskWorker] ${this.friendlyName} lazy-initializing stealth systems (first task claim)...`);
|
||||
|
||||
this.initializingPromise = (async () => {
|
||||
try {
|
||||
// Initialize proxy/fingerprint rotation
|
||||
await this.initializeStealth();
|
||||
this.stealthInitialized = true;
|
||||
|
||||
// Run dual-transport preflights
|
||||
await this.runDualPreflights();
|
||||
this.preflightsCompleted = true;
|
||||
|
||||
const preflightMsg = `curl=${this.preflightCurlPassed ? '✓' : '✗'} http=${this.preflightHttpPassed ? '✓' : '✗'}`;
|
||||
console.log(`[TaskWorker] ${this.friendlyName} stealth ready (${preflightMsg})`);
|
||||
} catch (err: any) {
|
||||
console.error(`[TaskWorker] ${this.friendlyName} stealth init failed: ${err.message}`);
|
||||
this.stealthInitialized = false;
|
||||
this.preflightsCompleted = false;
|
||||
}
|
||||
})();
|
||||
|
||||
await this.initializingPromise;
|
||||
this.initializingPromise = null;
|
||||
return this.stealthInitialized && this.preflightsCompleted;
|
||||
}
|
||||
|
||||
/**
|
||||
* Register worker with the registry (get friendly name)
|
||||
*/
|
||||
@@ -615,25 +682,22 @@ export class TaskWorker {
|
||||
|
||||
/**
|
||||
* Start the worker loop
|
||||
*
|
||||
* Workers start IMMEDIATELY without blocking on proxy/preflight init.
|
||||
* Stealth systems are lazy-initialized on first task claim.
|
||||
* This allows workers to register and send heartbeats even when proxies aren't ready.
|
||||
*/
|
||||
async start(): Promise<void> {
|
||||
this.isRunning = true;
|
||||
|
||||
// Initialize stealth systems (proxy rotation, fingerprints)
|
||||
await this.initializeStealth();
|
||||
|
||||
// Register with the API to get a friendly name
|
||||
// Register with the API to get a friendly name (non-blocking)
|
||||
await this.register();
|
||||
|
||||
// Run dual-transport preflights
|
||||
await this.runDualPreflights();
|
||||
|
||||
// Start registry heartbeat
|
||||
// Start registry heartbeat immediately
|
||||
this.startRegistryHeartbeat();
|
||||
|
||||
const roleMsg = this.role ? `for role: ${this.role}` : '(role-agnostic - any task)';
|
||||
const preflightMsg = `curl=${this.preflightCurlPassed ? '✓' : '✗'} http=${this.preflightHttpPassed ? '✓' : '✗'}`;
|
||||
console.log(`[TaskWorker] ${this.friendlyName} starting ${roleMsg} (${preflightMsg}, max ${this.maxConcurrentTasks} concurrent tasks)`);
|
||||
console.log(`[TaskWorker] ${this.friendlyName} starting ${roleMsg} (stealth=lazy, max ${this.maxConcurrentTasks} concurrent tasks)`);
|
||||
|
||||
while (this.isRunning) {
|
||||
try {
|
||||
@@ -687,6 +751,20 @@ export class TaskWorker {
|
||||
|
||||
// Try to claim more tasks if we have capacity
|
||||
if (this.canAcceptMoreTasks()) {
|
||||
// =================================================================
|
||||
// LAZY INITIALIZATION - Initialize stealth on first task claim
|
||||
// Workers start immediately and init proxies only when needed
|
||||
// =================================================================
|
||||
if (!this.stealthInitialized) {
|
||||
const initSuccess = await this.ensureStealthInitialized();
|
||||
if (!initSuccess) {
|
||||
// Init failed - wait and retry next loop
|
||||
console.log(`[TaskWorker] ${this.friendlyName} stealth init failed, waiting before retry...`);
|
||||
await this.sleep(30000);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Pass preflight capabilities to only claim compatible tasks
|
||||
const task = await taskService.claimTask(
|
||||
this.role,
|
||||
@@ -922,7 +1000,10 @@ async function main(): Promise<void> {
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const workerId = process.env.WORKER_ID;
|
||||
// Use POD_NAME for persistent identity in K8s StatefulSet
|
||||
// This ensures workers keep the same ID across restarts
|
||||
// Falls back to WORKER_ID, then generates UUID if neither is set
|
||||
const workerId = process.env.POD_NAME || process.env.WORKER_ID;
|
||||
// Pass null for role-agnostic, or the specific role
|
||||
const worker = new TaskWorker(role || null, workerId);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user