diff --git a/CLAUDE.md b/CLAUDE.md index 508e267b..579de0e2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -939,7 +939,8 @@ export default defineConfig({ 20) **Crawler Architecture** - **Scraper pod (1 replica)**: Runs the Express API server + scheduler. - - **Scraper-worker pods (5 replicas)**: Each worker runs `dist/dutchie-az/services/worker.js`, polling the job queue. + - **Scraper-worker pods (25 replicas)**: Each runs `dist/tasks/task-worker.js`, polling the job queue. + - **Worker naming**: Pods use fantasy names (Aethelgard, Xylos, Kryll, Coriolis, etc.) - see `k8s/scraper-worker.yaml` ConfigMap. Worker IDs: `{PodName}-worker-{n}` - **Job types**: `menu_detection`, `menu_detection_single`, `dutchie_product_crawl` - **Job schedules** (managed in `job_schedules` table): - `dutchie_az_menu_detection`: Runs daily with 60-min jitter diff --git a/backend/src/routes/worker-registry.ts b/backend/src/routes/worker-registry.ts index 264ed3d1..bfea79b7 100644 --- a/backend/src/routes/worker-registry.ts +++ b/backend/src/routes/worker-registry.ts @@ -70,21 +70,20 @@ router.post('/register', async (req: Request, res: Response) => { ); if (existing.rows.length > 0) { - // Re-activate existing worker + // Re-activate existing worker - keep existing pod_name (fantasy name), don't overwrite with K8s name const { rows } = await pool.query(` UPDATE worker_registry SET status = 'active', role = $1, - pod_name = $2, - hostname = $3, - ip_address = $4, + hostname = $2, + ip_address = $3, last_heartbeat_at = NOW(), started_at = NOW(), - metadata = $5, + metadata = $4, updated_at = NOW() - WHERE worker_id = $6 - RETURNING id, worker_id, friendly_name, role - `, [role, pod_name, finalHostname, clientIp, metadata, finalWorkerId]); + WHERE worker_id = $5 + RETURNING id, worker_id, friendly_name, pod_name, role + `, [role, finalHostname, clientIp, metadata, finalWorkerId]); const worker = rows[0]; const roleMsg = role ? `for ${role}` : 'as role-agnostic'; @@ -105,13 +104,13 @@ router.post('/register', async (req: Request, res: Response) => { const nameResult = await pool.query('SELECT assign_worker_name($1) as name', [finalWorkerId]); const friendlyName = nameResult.rows[0].name; - // Register the worker + // Register the worker - use friendlyName as pod_name (not K8s name) const { rows } = await pool.query(` INSERT INTO worker_registry ( worker_id, friendly_name, role, pod_name, hostname, ip_address, status, metadata ) VALUES ($1, $2, $3, $4, $5, $6, 'active', $7) - RETURNING id, worker_id, friendly_name, role - `, [finalWorkerId, friendlyName, role, pod_name, finalHostname, clientIp, metadata]); + RETURNING id, worker_id, friendly_name, pod_name, role + `, [finalWorkerId, friendlyName, role, friendlyName, finalHostname, clientIp, metadata]); const worker = rows[0]; const roleMsg = role ? `for ${role}` : 'as role-agnostic';