Compare commits
23 Commits
fix/ci-fil
...
fix/ci-rem
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
90567511dd | ||
|
|
fc7fc5ea85 | ||
|
|
ab8956b14b | ||
|
|
1d9c90641f | ||
|
|
6126b907f2 | ||
|
|
cc93d2d483 | ||
|
|
7642c17ec0 | ||
|
|
cb60dcf352 | ||
|
|
5ffe05d519 | ||
|
|
8e2f07c941 | ||
|
|
0b6e615075 | ||
|
|
be251c6fb3 | ||
|
|
efb1e89e33 | ||
|
|
529c447413 | ||
|
|
1eaf95c06b | ||
|
|
138ed17d8b | ||
|
|
a880c41d89 | ||
|
|
2a9ae61dce | ||
|
|
1f21911fa1 | ||
|
|
6f0a58f5d2 | ||
|
|
8206dce821 | ||
|
|
ced1afaa8a | ||
|
|
f5214da54c |
191
.woodpecker.yml
Normal file
191
.woodpecker.yml
Normal file
@@ -0,0 +1,191 @@
|
||||
steps:
|
||||
# ===========================================
|
||||
# PR VALIDATION: Parallel type checks (PRs only)
|
||||
# ===========================================
|
||||
typecheck-backend:
|
||||
image: code.cannabrands.app/creationshop/node:20
|
||||
commands:
|
||||
- cd backend
|
||||
- npm ci --prefer-offline
|
||||
- npx tsc --noEmit
|
||||
depends_on: []
|
||||
when:
|
||||
event: pull_request
|
||||
|
||||
typecheck-cannaiq:
|
||||
image: code.cannabrands.app/creationshop/node:20
|
||||
commands:
|
||||
- cd cannaiq
|
||||
- npm ci --prefer-offline
|
||||
- npx tsc --noEmit
|
||||
depends_on: []
|
||||
when:
|
||||
event: pull_request
|
||||
|
||||
typecheck-findadispo:
|
||||
image: code.cannabrands.app/creationshop/node:20
|
||||
commands:
|
||||
- cd findadispo/frontend
|
||||
- npm ci --prefer-offline
|
||||
- npx tsc --noEmit 2>/dev/null || true
|
||||
depends_on: []
|
||||
when:
|
||||
event: pull_request
|
||||
|
||||
typecheck-findagram:
|
||||
image: code.cannabrands.app/creationshop/node:20
|
||||
commands:
|
||||
- cd findagram/frontend
|
||||
- npm ci --prefer-offline
|
||||
- npx tsc --noEmit 2>/dev/null || true
|
||||
depends_on: []
|
||||
when:
|
||||
event: pull_request
|
||||
|
||||
# ===========================================
|
||||
# AUTO-MERGE: Merge PR after all checks pass
|
||||
# ===========================================
|
||||
auto-merge:
|
||||
image: alpine:latest
|
||||
environment:
|
||||
GITEA_TOKEN:
|
||||
from_secret: gitea_token
|
||||
commands:
|
||||
- apk add --no-cache curl
|
||||
- |
|
||||
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
|
||||
curl -s -X POST \
|
||||
-H "Authorization: token $GITEA_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"Do":"merge"}' \
|
||||
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
||||
depends_on:
|
||||
- typecheck-backend
|
||||
- typecheck-cannaiq
|
||||
- typecheck-findadispo
|
||||
- typecheck-findagram
|
||||
when:
|
||||
event: pull_request
|
||||
|
||||
# ===========================================
|
||||
# MASTER DEPLOY: Parallel Docker builds
|
||||
# ===========================================
|
||||
docker-backend:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/dispensary-scraper
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: backend/Dockerfile
|
||||
context: backend
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
build_args:
|
||||
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
|
||||
APP_GIT_SHA: ${CI_COMMIT_SHA}
|
||||
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
|
||||
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
docker-cannaiq:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/cannaiq-frontend
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: cannaiq/Dockerfile
|
||||
context: cannaiq
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
docker-findadispo:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/findadispo-frontend
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: findadispo/frontend/Dockerfile
|
||||
context: findadispo/frontend
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
docker-findagram:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/findagram-frontend
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: findagram/frontend/Dockerfile
|
||||
context: findagram/frontend
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
# ===========================================
|
||||
# STAGE 3: Deploy and Run Migrations
|
||||
# ===========================================
|
||||
deploy:
|
||||
image: bitnami/kubectl:latest
|
||||
environment:
|
||||
KUBECONFIG_CONTENT:
|
||||
from_secret: kubeconfig_data
|
||||
commands:
|
||||
- mkdir -p ~/.kube
|
||||
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
||||
- chmod 600 ~/.kube/config
|
||||
# Deploy backend first
|
||||
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
||||
# Note: Migrations run automatically at startup via auto-migrate
|
||||
# Deploy remaining services
|
||||
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
||||
depends_on:
|
||||
- docker-backend
|
||||
- docker-cannaiq
|
||||
- docker-findadispo
|
||||
- docker-findagram
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
@@ -160,32 +160,7 @@ steps:
|
||||
event: push
|
||||
|
||||
# ===========================================
|
||||
# STAGE 3: Run Database Migrations (before deploy)
|
||||
# ===========================================
|
||||
migrate:
|
||||
image: code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8}
|
||||
environment:
|
||||
CANNAIQ_DB_HOST:
|
||||
from_secret: db_host
|
||||
CANNAIQ_DB_PORT:
|
||||
from_secret: db_port
|
||||
CANNAIQ_DB_NAME:
|
||||
from_secret: db_name
|
||||
CANNAIQ_DB_USER:
|
||||
from_secret: db_user
|
||||
CANNAIQ_DB_PASS:
|
||||
from_secret: db_pass
|
||||
commands:
|
||||
- cd /app
|
||||
- node dist/db/migrate.js
|
||||
depends_on:
|
||||
- docker-backend
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
# ===========================================
|
||||
# STAGE 4: Deploy (after migrations)
|
||||
# STAGE 3: Deploy and Run Migrations
|
||||
# ===========================================
|
||||
deploy:
|
||||
image: bitnami/kubectl:latest
|
||||
@@ -196,15 +171,18 @@ steps:
|
||||
- mkdir -p ~/.kube
|
||||
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
||||
- chmod 600 ~/.kube/config
|
||||
# Deploy backend first
|
||||
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
||||
# Note: Migrations run automatically at startup via auto-migrate
|
||||
# Deploy remaining services
|
||||
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
||||
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
||||
depends_on:
|
||||
- migrate
|
||||
- docker-backend
|
||||
- docker-cannaiq
|
||||
- docker-findadispo
|
||||
- docker-findagram
|
||||
|
||||
@@ -362,6 +362,148 @@ SET status = 'pending', retry_count = retry_count + 1
|
||||
WHERE status = 'failed' AND retry_count < max_retries;
|
||||
```
|
||||
|
||||
## Concurrent Task Processing (Added 2024-12)
|
||||
|
||||
Workers can now process multiple tasks concurrently within a single worker instance. This improves throughput by utilizing async I/O efficiently.
|
||||
|
||||
### Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ Pod (K8s) │
|
||||
│ │
|
||||
│ ┌─────────────────────────────────────────────────────┐ │
|
||||
│ │ TaskWorker │ │
|
||||
│ │ │ │
|
||||
│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │
|
||||
│ │ │ Task 1 │ │ Task 2 │ │ Task 3 │ (concurrent)│ │
|
||||
│ │ └─────────┘ └─────────┘ └─────────┘ │ │
|
||||
│ │ │ │
|
||||
│ │ Resource Monitor │ │
|
||||
│ │ ├── Memory: 65% (threshold: 85%) │ │
|
||||
│ │ ├── CPU: 45% (threshold: 90%) │ │
|
||||
│ │ └── Status: Normal │ │
|
||||
│ └─────────────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `MAX_CONCURRENT_TASKS` | 3 | Maximum tasks a worker will run concurrently |
|
||||
| `MEMORY_BACKOFF_THRESHOLD` | 0.85 | Back off when heap memory exceeds 85% |
|
||||
| `CPU_BACKOFF_THRESHOLD` | 0.90 | Back off when CPU exceeds 90% |
|
||||
| `BACKOFF_DURATION_MS` | 10000 | How long to wait when backing off (10s) |
|
||||
|
||||
### How It Works
|
||||
|
||||
1. **Main Loop**: Worker continuously tries to fill up to `MAX_CONCURRENT_TASKS`
|
||||
2. **Resource Monitoring**: Before claiming a new task, worker checks memory and CPU
|
||||
3. **Backoff**: If resources exceed thresholds, worker pauses and stops claiming new tasks
|
||||
4. **Concurrent Execution**: Tasks run in parallel using `Promise` - they don't block each other
|
||||
5. **Graceful Shutdown**: On SIGTERM/decommission, worker stops claiming but waits for active tasks
|
||||
|
||||
### Resource Monitoring
|
||||
|
||||
```typescript
|
||||
// ResourceStats interface
|
||||
interface ResourceStats {
|
||||
memoryPercent: number; // Current heap usage as decimal (0.0-1.0)
|
||||
memoryMb: number; // Current heap used in MB
|
||||
memoryTotalMb: number; // Total heap available in MB
|
||||
cpuPercent: number; // CPU usage as percentage (0-100)
|
||||
isBackingOff: boolean; // True if worker is in backoff state
|
||||
backoffReason: string; // Why the worker is backing off
|
||||
}
|
||||
```
|
||||
|
||||
### Heartbeat Data
|
||||
|
||||
Workers report the following in their heartbeat:
|
||||
|
||||
```json
|
||||
{
|
||||
"worker_id": "worker-abc123",
|
||||
"current_task_id": 456,
|
||||
"current_task_ids": [456, 457, 458],
|
||||
"active_task_count": 3,
|
||||
"max_concurrent_tasks": 3,
|
||||
"status": "active",
|
||||
"resources": {
|
||||
"memory_mb": 256,
|
||||
"memory_total_mb": 512,
|
||||
"memory_rss_mb": 320,
|
||||
"memory_percent": 50,
|
||||
"cpu_user_ms": 12500,
|
||||
"cpu_system_ms": 3200,
|
||||
"cpu_percent": 45,
|
||||
"is_backing_off": false,
|
||||
"backoff_reason": null
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Backoff Behavior
|
||||
|
||||
When resources exceed thresholds:
|
||||
|
||||
1. Worker logs the backoff reason:
|
||||
```
|
||||
[TaskWorker] MyWorker backing off: Memory at 87.3% (threshold: 85%)
|
||||
```
|
||||
|
||||
2. Worker stops claiming new tasks but continues existing tasks
|
||||
|
||||
3. After `BACKOFF_DURATION_MS`, worker rechecks resources
|
||||
|
||||
4. When resources return to normal:
|
||||
```
|
||||
[TaskWorker] MyWorker resuming normal operation
|
||||
```
|
||||
|
||||
### UI Display
|
||||
|
||||
The Workers Dashboard shows:
|
||||
|
||||
- **Tasks Column**: `2/3 tasks` (active/max concurrent)
|
||||
- **Resources Column**: Memory % and CPU % with color coding
|
||||
- Green: < 50%
|
||||
- Yellow: 50-74%
|
||||
- Amber: 75-89%
|
||||
- Red: 90%+
|
||||
- **Backing Off**: Orange warning badge when worker is in backoff state
|
||||
|
||||
### Task Count Badge Details
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────┐
|
||||
│ Worker: "MyWorker" │
|
||||
│ Tasks: 2/3 tasks #456, #457 │
|
||||
│ Resources: 🧠 65% 💻 45% │
|
||||
│ Status: ● Active │
|
||||
└─────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Best Practices
|
||||
|
||||
1. **Start Conservative**: Use `MAX_CONCURRENT_TASKS=3` initially
|
||||
2. **Monitor Resources**: Watch for frequent backoffs in logs
|
||||
3. **Tune Per Workload**: I/O-bound tasks benefit from higher concurrency
|
||||
4. **Scale Horizontally**: Add more pods rather than cranking concurrency too high
|
||||
|
||||
### Code References
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `src/tasks/task-worker.ts:68-71` | Concurrency environment variables |
|
||||
| `src/tasks/task-worker.ts:104-111` | ResourceStats interface |
|
||||
| `src/tasks/task-worker.ts:149-179` | getResourceStats() method |
|
||||
| `src/tasks/task-worker.ts:184-196` | shouldBackOff() method |
|
||||
| `src/tasks/task-worker.ts:462-516` | mainLoop() with concurrent claiming |
|
||||
| `src/routes/worker-registry.ts:148-195` | Heartbeat endpoint handling |
|
||||
| `cannaiq/src/pages/WorkersDashboard.tsx:233-305` | UI components for resources |
|
||||
|
||||
## Monitoring
|
||||
|
||||
### Logs
|
||||
|
||||
27
backend/migrations/074_worker_commands.sql
Normal file
27
backend/migrations/074_worker_commands.sql
Normal file
@@ -0,0 +1,27 @@
|
||||
-- Migration: Worker Commands Table
|
||||
-- Purpose: Store commands for workers (decommission, etc.)
|
||||
-- Workers poll this table after each task to check for commands
|
||||
|
||||
CREATE TABLE IF NOT EXISTS worker_commands (
|
||||
id SERIAL PRIMARY KEY,
|
||||
worker_id TEXT NOT NULL,
|
||||
command TEXT NOT NULL, -- 'decommission', 'pause', 'resume'
|
||||
reason TEXT,
|
||||
issued_by TEXT,
|
||||
issued_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
acknowledged_at TIMESTAMPTZ,
|
||||
executed_at TIMESTAMPTZ,
|
||||
status TEXT DEFAULT 'pending' -- 'pending', 'acknowledged', 'executed', 'cancelled'
|
||||
);
|
||||
|
||||
-- Index for worker lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_commands_worker_id ON worker_commands(worker_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_commands_pending ON worker_commands(worker_id, status) WHERE status = 'pending';
|
||||
|
||||
-- Add decommission_requested column to worker_registry for quick checks
|
||||
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested BOOLEAN DEFAULT FALSE;
|
||||
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_reason TEXT;
|
||||
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested_at TIMESTAMPTZ;
|
||||
|
||||
-- Comment
|
||||
COMMENT ON TABLE worker_commands IS 'Commands issued to workers (decommission after task, pause, etc.)';
|
||||
@@ -146,6 +146,7 @@ import tasksRoutes from './routes/tasks';
|
||||
import workerRegistryRoutes from './routes/worker-registry';
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Raw payload access API
|
||||
import payloadsRoutes from './routes/payloads';
|
||||
import k8sRoutes from './routes/k8s';
|
||||
|
||||
// Mark requests from trusted domains (cannaiq.co, findagram.co, findadispo.com)
|
||||
// These domains can access the API without authentication
|
||||
@@ -230,6 +231,10 @@ console.log('[WorkerRegistry] Routes registered at /api/worker-registry');
|
||||
app.use('/api/payloads', payloadsRoutes);
|
||||
console.log('[Payloads] Routes registered at /api/payloads');
|
||||
|
||||
// K8s control routes - worker scaling from admin UI
|
||||
app.use('/api/k8s', k8sRoutes);
|
||||
console.log('[K8s] Routes registered at /api/k8s');
|
||||
|
||||
// Phase 3: Analytics V2 - Enhanced analytics with rec/med state segmentation
|
||||
try {
|
||||
const analyticsV2Router = createAnalyticsV2Router(getPool());
|
||||
|
||||
@@ -47,4 +47,27 @@ router.post('/refresh', authMiddleware, async (req: AuthRequest, res) => {
|
||||
res.json({ token });
|
||||
});
|
||||
|
||||
// Verify password for sensitive actions (requires current user to be authenticated)
|
||||
router.post('/verify-password', authMiddleware, async (req: AuthRequest, res) => {
|
||||
try {
|
||||
const { password } = req.body;
|
||||
|
||||
if (!password) {
|
||||
return res.status(400).json({ error: 'Password required' });
|
||||
}
|
||||
|
||||
// Re-authenticate the current user with the provided password
|
||||
const user = await authenticateUser(req.user!.email, password);
|
||||
|
||||
if (!user) {
|
||||
return res.status(401).json({ error: 'Invalid password', verified: false });
|
||||
}
|
||||
|
||||
res.json({ verified: true });
|
||||
} catch (error) {
|
||||
console.error('Password verification error:', error);
|
||||
res.status(500).json({ error: 'Internal server error' });
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
|
||||
140
backend/src/routes/k8s.ts
Normal file
140
backend/src/routes/k8s.ts
Normal file
@@ -0,0 +1,140 @@
|
||||
/**
|
||||
* Kubernetes Control Routes
|
||||
*
|
||||
* Provides admin UI control over k8s resources like worker scaling.
|
||||
* Uses in-cluster config when running in k8s, or kubeconfig locally.
|
||||
*/
|
||||
|
||||
import { Router, Request, Response } from 'express';
|
||||
import * as k8s from '@kubernetes/client-node';
|
||||
|
||||
const router = Router();
|
||||
|
||||
// K8s client setup - lazy initialization
|
||||
let appsApi: k8s.AppsV1Api | null = null;
|
||||
let k8sError: string | null = null;
|
||||
|
||||
function getK8sClient(): k8s.AppsV1Api | null {
|
||||
if (appsApi) return appsApi;
|
||||
if (k8sError) return null;
|
||||
|
||||
try {
|
||||
const kc = new k8s.KubeConfig();
|
||||
|
||||
// Try in-cluster config first (when running in k8s)
|
||||
try {
|
||||
kc.loadFromCluster();
|
||||
console.log('[K8s] Loaded in-cluster config');
|
||||
} catch {
|
||||
// Fall back to default kubeconfig (local dev)
|
||||
try {
|
||||
kc.loadFromDefault();
|
||||
console.log('[K8s] Loaded default kubeconfig');
|
||||
} catch (e) {
|
||||
k8sError = 'No k8s config available';
|
||||
console.log('[K8s] No config available - k8s routes disabled');
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
appsApi = kc.makeApiClient(k8s.AppsV1Api);
|
||||
return appsApi;
|
||||
} catch (e: any) {
|
||||
k8sError = e.message;
|
||||
console.error('[K8s] Failed to initialize client:', e.message);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
const NAMESPACE = process.env.K8S_NAMESPACE || 'dispensary-scraper';
|
||||
const WORKER_DEPLOYMENT = 'scraper-worker';
|
||||
|
||||
/**
|
||||
* GET /api/k8s/workers
|
||||
* Get current worker deployment status
|
||||
*/
|
||||
router.get('/workers', async (_req: Request, res: Response) => {
|
||||
const client = getK8sClient();
|
||||
|
||||
if (!client) {
|
||||
return res.json({
|
||||
success: true,
|
||||
available: false,
|
||||
error: k8sError || 'K8s not available',
|
||||
replicas: 0,
|
||||
readyReplicas: 0,
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
const deployment = await client.readNamespacedDeployment({
|
||||
name: WORKER_DEPLOYMENT,
|
||||
namespace: NAMESPACE,
|
||||
});
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
available: true,
|
||||
replicas: deployment.spec?.replicas || 0,
|
||||
readyReplicas: deployment.status?.readyReplicas || 0,
|
||||
availableReplicas: deployment.status?.availableReplicas || 0,
|
||||
updatedReplicas: deployment.status?.updatedReplicas || 0,
|
||||
});
|
||||
} catch (e: any) {
|
||||
console.error('[K8s] Error getting deployment:', e.message);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: e.message,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/k8s/workers/scale
|
||||
* Scale worker deployment
|
||||
* Body: { replicas: number }
|
||||
*/
|
||||
router.post('/workers/scale', async (req: Request, res: Response) => {
|
||||
const client = getK8sClient();
|
||||
|
||||
if (!client) {
|
||||
return res.status(503).json({
|
||||
success: false,
|
||||
error: k8sError || 'K8s not available',
|
||||
});
|
||||
}
|
||||
|
||||
const { replicas } = req.body;
|
||||
|
||||
if (typeof replicas !== 'number' || replicas < 0 || replicas > 50) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: 'replicas must be a number between 0 and 50',
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
// Patch the deployment to set replicas
|
||||
await client.patchNamespacedDeploymentScale({
|
||||
name: WORKER_DEPLOYMENT,
|
||||
namespace: NAMESPACE,
|
||||
body: { spec: { replicas } },
|
||||
});
|
||||
|
||||
console.log(`[K8s] Scaled ${WORKER_DEPLOYMENT} to ${replicas} replicas`);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
replicas,
|
||||
message: `Scaled to ${replicas} workers`,
|
||||
});
|
||||
} catch (e: any) {
|
||||
console.error('[K8s] Error scaling deployment:', e.message);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: e.message,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
@@ -138,17 +138,36 @@ router.post('/register', async (req: Request, res: Response) => {
|
||||
*
|
||||
* Body:
|
||||
* - worker_id: string (required)
|
||||
* - current_task_id: number (optional) - task currently being processed
|
||||
* - current_task_id: number (optional) - task currently being processed (primary task)
|
||||
* - current_task_ids: number[] (optional) - all tasks currently being processed (concurrent)
|
||||
* - active_task_count: number (optional) - number of tasks currently running
|
||||
* - max_concurrent_tasks: number (optional) - max concurrent tasks this worker can handle
|
||||
* - status: string (optional) - 'active', 'idle'
|
||||
* - resources: object (optional) - memory_mb, cpu_user_ms, cpu_system_ms, etc.
|
||||
*/
|
||||
router.post('/heartbeat', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { worker_id, current_task_id, status = 'active', resources } = req.body;
|
||||
const {
|
||||
worker_id,
|
||||
current_task_id,
|
||||
current_task_ids,
|
||||
active_task_count,
|
||||
max_concurrent_tasks,
|
||||
status = 'active',
|
||||
resources
|
||||
} = req.body;
|
||||
|
||||
if (!worker_id) {
|
||||
return res.status(400).json({ success: false, error: 'worker_id is required' });
|
||||
}
|
||||
|
||||
// Build metadata object with all the new fields
|
||||
const metadata: Record<string, unknown> = {};
|
||||
if (resources) Object.assign(metadata, resources);
|
||||
if (current_task_ids) metadata.current_task_ids = current_task_ids;
|
||||
if (active_task_count !== undefined) metadata.active_task_count = active_task_count;
|
||||
if (max_concurrent_tasks !== undefined) metadata.max_concurrent_tasks = max_concurrent_tasks;
|
||||
|
||||
// Store resources in metadata jsonb column
|
||||
const { rows } = await pool.query(`
|
||||
UPDATE worker_registry
|
||||
@@ -159,7 +178,7 @@ router.post('/heartbeat', async (req: Request, res: Response) => {
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = $3
|
||||
RETURNING id, friendly_name, status
|
||||
`, [current_task_id || null, status, worker_id, resources ? JSON.stringify(resources) : null]);
|
||||
`, [current_task_id || null, status, worker_id, Object.keys(metadata).length > 0 ? JSON.stringify(metadata) : null]);
|
||||
|
||||
if (rows.length === 0) {
|
||||
return res.status(404).json({ success: false, error: 'Worker not found - please register first' });
|
||||
@@ -330,12 +349,21 @@ router.get('/workers', async (req: Request, res: Response) => {
|
||||
tasks_completed,
|
||||
tasks_failed,
|
||||
current_task_id,
|
||||
-- Concurrent task fields from metadata
|
||||
(metadata->>'current_task_ids')::jsonb as current_task_ids,
|
||||
(metadata->>'active_task_count')::int as active_task_count,
|
||||
(metadata->>'max_concurrent_tasks')::int as max_concurrent_tasks,
|
||||
-- Decommission fields
|
||||
COALESCE(decommission_requested, false) as decommission_requested,
|
||||
decommission_reason,
|
||||
-- Full metadata for resources
|
||||
metadata,
|
||||
EXTRACT(EPOCH FROM (NOW() - last_heartbeat_at)) as seconds_since_heartbeat,
|
||||
CASE
|
||||
WHEN status = 'offline' OR status = 'terminated' THEN status
|
||||
WHEN last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
||||
WHEN current_task_id IS NOT NULL THEN 'busy'
|
||||
WHEN (metadata->>'active_task_count')::int > 0 THEN 'busy'
|
||||
ELSE 'ready'
|
||||
END as health_status,
|
||||
created_at
|
||||
@@ -672,4 +700,163 @@ router.get('/capacity', async (_req: Request, res: Response) => {
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// WORKER LIFECYCLE MANAGEMENT
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* POST /api/worker-registry/workers/:workerId/decommission
|
||||
* Request graceful decommission of a worker (will stop after current task)
|
||||
*/
|
||||
router.post('/workers/:workerId/decommission', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { workerId } = req.params;
|
||||
const { reason, issued_by } = req.body;
|
||||
|
||||
// Update worker_registry to flag for decommission
|
||||
const result = await pool.query(
|
||||
`UPDATE worker_registry
|
||||
SET decommission_requested = true,
|
||||
decommission_reason = $2,
|
||||
decommission_requested_at = NOW()
|
||||
WHERE worker_id = $1
|
||||
RETURNING friendly_name, status, current_task_id`,
|
||||
[workerId, reason || 'Manual decommission from admin']
|
||||
);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||
}
|
||||
|
||||
const worker = result.rows[0];
|
||||
|
||||
// Also log to worker_commands for audit trail
|
||||
await pool.query(
|
||||
`INSERT INTO worker_commands (worker_id, command, reason, issued_by)
|
||||
VALUES ($1, 'decommission', $2, $3)
|
||||
ON CONFLICT DO NOTHING`,
|
||||
[workerId, reason || 'Manual decommission', issued_by || 'admin']
|
||||
).catch(() => {
|
||||
// Table might not exist yet - ignore
|
||||
});
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: worker.current_task_id
|
||||
? `Worker ${worker.friendly_name} will stop after completing task #${worker.current_task_id}`
|
||||
: `Worker ${worker.friendly_name} will stop on next poll`,
|
||||
worker: {
|
||||
friendly_name: worker.friendly_name,
|
||||
status: worker.status,
|
||||
current_task_id: worker.current_task_id,
|
||||
decommission_requested: true
|
||||
}
|
||||
});
|
||||
} catch (error: any) {
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/worker-registry/workers/:workerId/cancel-decommission
|
||||
* Cancel a pending decommission request
|
||||
*/
|
||||
router.post('/workers/:workerId/cancel-decommission', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { workerId } = req.params;
|
||||
|
||||
const result = await pool.query(
|
||||
`UPDATE worker_registry
|
||||
SET decommission_requested = false,
|
||||
decommission_reason = NULL,
|
||||
decommission_requested_at = NULL
|
||||
WHERE worker_id = $1
|
||||
RETURNING friendly_name`,
|
||||
[workerId]
|
||||
);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `Decommission cancelled for ${result.rows[0].friendly_name}`
|
||||
});
|
||||
} catch (error: any) {
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/worker-registry/spawn
|
||||
* Spawn a new worker in the current pod (only works in multi-worker-per-pod mode)
|
||||
* For now, this is a placeholder - actual spawning requires the pod supervisor
|
||||
*/
|
||||
router.post('/spawn', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { pod_name, role } = req.body;
|
||||
|
||||
// For now, we can't actually spawn workers from the API
|
||||
// This would require a supervisor process in each pod that listens for spawn commands
|
||||
// Instead, return instructions for how to scale
|
||||
res.json({
|
||||
success: false,
|
||||
error: 'Direct worker spawning not yet implemented',
|
||||
instructions: 'To add workers, scale the K8s deployment: kubectl scale deployment/scraper-worker --replicas=N'
|
||||
});
|
||||
} catch (error: any) {
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/worker-registry/pods
|
||||
* Get workers grouped by pod
|
||||
*/
|
||||
router.get('/pods', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const { rows } = await pool.query(`
|
||||
SELECT
|
||||
COALESCE(pod_name, 'Unknown') as pod_name,
|
||||
COUNT(*) as worker_count,
|
||||
COUNT(*) FILTER (WHERE current_task_id IS NOT NULL) as busy_count,
|
||||
COUNT(*) FILTER (WHERE current_task_id IS NULL) as idle_count,
|
||||
SUM(tasks_completed) as total_completed,
|
||||
SUM(tasks_failed) as total_failed,
|
||||
SUM((metadata->>'memory_rss_mb')::int) as total_memory_mb,
|
||||
array_agg(json_build_object(
|
||||
'worker_id', worker_id,
|
||||
'friendly_name', friendly_name,
|
||||
'status', status,
|
||||
'current_task_id', current_task_id,
|
||||
'tasks_completed', tasks_completed,
|
||||
'tasks_failed', tasks_failed,
|
||||
'decommission_requested', COALESCE(decommission_requested, false),
|
||||
'last_heartbeat_at', last_heartbeat_at
|
||||
)) as workers
|
||||
FROM worker_registry
|
||||
WHERE status NOT IN ('offline', 'terminated')
|
||||
GROUP BY pod_name
|
||||
ORDER BY pod_name
|
||||
`);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
pods: rows.map(row => ({
|
||||
pod_name: row.pod_name,
|
||||
worker_count: parseInt(row.worker_count),
|
||||
busy_count: parseInt(row.busy_count),
|
||||
idle_count: parseInt(row.idle_count),
|
||||
total_completed: parseInt(row.total_completed) || 0,
|
||||
total_failed: parseInt(row.total_failed) || 0,
|
||||
total_memory_mb: parseInt(row.total_memory_mb) || 0,
|
||||
workers: row.workers
|
||||
}))
|
||||
});
|
||||
} catch (error: any) {
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
|
||||
@@ -35,7 +35,7 @@ const router = Router();
|
||||
// ============================================================
|
||||
|
||||
const K8S_NAMESPACE = process.env.K8S_NAMESPACE || 'dispensary-scraper';
|
||||
const K8S_STATEFULSET_NAME = process.env.K8S_WORKER_STATEFULSET || 'scraper-worker';
|
||||
const K8S_DEPLOYMENT_NAME = process.env.K8S_WORKER_DEPLOYMENT || 'scraper-worker';
|
||||
|
||||
// Initialize K8s client - uses in-cluster config when running in K8s,
|
||||
// or kubeconfig when running locally
|
||||
@@ -70,7 +70,7 @@ function getK8sClient(): k8s.AppsV1Api | null {
|
||||
|
||||
/**
|
||||
* GET /api/workers/k8s/replicas - Get current worker replica count
|
||||
* Returns current and desired replica counts from the StatefulSet
|
||||
* Returns current and desired replica counts from the Deployment
|
||||
*/
|
||||
router.get('/k8s/replicas', async (_req: Request, res: Response) => {
|
||||
const client = getK8sClient();
|
||||
@@ -84,21 +84,21 @@ router.get('/k8s/replicas', async (_req: Request, res: Response) => {
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await client.readNamespacedStatefulSet({
|
||||
name: K8S_STATEFULSET_NAME,
|
||||
const response = await client.readNamespacedDeployment({
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
});
|
||||
|
||||
const statefulSet = response;
|
||||
const deployment = response;
|
||||
res.json({
|
||||
success: true,
|
||||
replicas: {
|
||||
current: statefulSet.status?.readyReplicas || 0,
|
||||
desired: statefulSet.spec?.replicas || 0,
|
||||
available: statefulSet.status?.availableReplicas || 0,
|
||||
updated: statefulSet.status?.updatedReplicas || 0,
|
||||
current: deployment.status?.readyReplicas || 0,
|
||||
desired: deployment.spec?.replicas || 0,
|
||||
available: deployment.status?.availableReplicas || 0,
|
||||
updated: deployment.status?.updatedReplicas || 0,
|
||||
},
|
||||
statefulset: K8S_STATEFULSET_NAME,
|
||||
deployment: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
});
|
||||
} catch (err: any) {
|
||||
@@ -112,7 +112,7 @@ router.get('/k8s/replicas', async (_req: Request, res: Response) => {
|
||||
|
||||
/**
|
||||
* POST /api/workers/k8s/scale - Scale worker replicas
|
||||
* Body: { replicas: number } - desired replica count (1-20)
|
||||
* Body: { replicas: number } - desired replica count (0-20)
|
||||
*/
|
||||
router.post('/k8s/scale', async (req: Request, res: Response) => {
|
||||
const client = getK8sClient();
|
||||
@@ -136,21 +136,21 @@ router.post('/k8s/scale', async (req: Request, res: Response) => {
|
||||
|
||||
try {
|
||||
// Get current state first
|
||||
const currentResponse = await client.readNamespacedStatefulSetScale({
|
||||
name: K8S_STATEFULSET_NAME,
|
||||
const currentResponse = await client.readNamespacedDeploymentScale({
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
});
|
||||
const currentReplicas = currentResponse.spec?.replicas || 0;
|
||||
|
||||
// Update scale using replaceNamespacedStatefulSetScale
|
||||
await client.replaceNamespacedStatefulSetScale({
|
||||
name: K8S_STATEFULSET_NAME,
|
||||
// Update scale using replaceNamespacedDeploymentScale
|
||||
await client.replaceNamespacedDeploymentScale({
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
body: {
|
||||
apiVersion: 'autoscaling/v1',
|
||||
kind: 'Scale',
|
||||
metadata: {
|
||||
name: K8S_STATEFULSET_NAME,
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
},
|
||||
spec: {
|
||||
@@ -159,14 +159,14 @@ router.post('/k8s/scale', async (req: Request, res: Response) => {
|
||||
},
|
||||
});
|
||||
|
||||
console.log(`[Workers] Scaled ${K8S_STATEFULSET_NAME} from ${currentReplicas} to ${replicas} replicas`);
|
||||
console.log(`[Workers] Scaled ${K8S_DEPLOYMENT_NAME} from ${currentReplicas} to ${replicas} replicas`);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `Scaled from ${currentReplicas} to ${replicas} replicas`,
|
||||
previous: currentReplicas,
|
||||
desired: replicas,
|
||||
statefulset: K8S_STATEFULSET_NAME,
|
||||
deployment: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
});
|
||||
} catch (err: any) {
|
||||
@@ -178,6 +178,73 @@ router.post('/k8s/scale', async (req: Request, res: Response) => {
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/workers/k8s/scale-up - Scale up worker replicas by 1
|
||||
* Convenience endpoint for adding a single worker
|
||||
*/
|
||||
router.post('/k8s/scale-up', async (_req: Request, res: Response) => {
|
||||
const client = getK8sClient();
|
||||
|
||||
if (!client) {
|
||||
return res.status(503).json({
|
||||
success: false,
|
||||
error: 'K8s client not available (not running in cluster or no kubeconfig)',
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
// Get current replica count
|
||||
const currentResponse = await client.readNamespacedDeploymentScale({
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
});
|
||||
const currentReplicas = currentResponse.spec?.replicas || 0;
|
||||
const newReplicas = currentReplicas + 1;
|
||||
|
||||
// Cap at 20 replicas
|
||||
if (newReplicas > 20) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: 'Maximum replica count (20) reached',
|
||||
});
|
||||
}
|
||||
|
||||
// Scale up by 1
|
||||
await client.replaceNamespacedDeploymentScale({
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
body: {
|
||||
apiVersion: 'autoscaling/v1',
|
||||
kind: 'Scale',
|
||||
metadata: {
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
},
|
||||
spec: {
|
||||
replicas: newReplicas,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
console.log(`[Workers] Scaled up ${K8S_DEPLOYMENT_NAME} from ${currentReplicas} to ${newReplicas} replicas`);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `Added worker (${currentReplicas} → ${newReplicas} replicas)`,
|
||||
previous: currentReplicas,
|
||||
desired: newReplicas,
|
||||
deployment: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
});
|
||||
} catch (err: any) {
|
||||
console.error('[Workers] K8s scale-up error:', err.body?.message || err.message);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: err.body?.message || err.message,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// STATIC ROUTES (must come before parameterized routes)
|
||||
// ============================================================
|
||||
|
||||
@@ -64,6 +64,33 @@ const POLL_INTERVAL_MS = parseInt(process.env.POLL_INTERVAL_MS || '5000');
|
||||
const HEARTBEAT_INTERVAL_MS = parseInt(process.env.HEARTBEAT_INTERVAL_MS || '30000');
|
||||
const API_BASE_URL = process.env.API_BASE_URL || 'http://localhost:3010';
|
||||
|
||||
// =============================================================================
|
||||
// CONCURRENT TASK PROCESSING SETTINGS
|
||||
// =============================================================================
|
||||
// Workers can process multiple tasks simultaneously using async I/O.
|
||||
// This improves throughput for I/O-bound tasks (network calls, DB queries).
|
||||
//
|
||||
// Resource thresholds trigger "backoff" - the worker stops claiming new tasks
|
||||
// but continues processing existing ones until resources return to normal.
|
||||
//
|
||||
// See: docs/WORKER_TASK_ARCHITECTURE.md#concurrent-task-processing
|
||||
// =============================================================================
|
||||
|
||||
// Maximum number of tasks this worker will run concurrently
|
||||
// Tune based on workload: I/O-bound tasks benefit from higher concurrency
|
||||
const MAX_CONCURRENT_TASKS = parseInt(process.env.MAX_CONCURRENT_TASKS || '3');
|
||||
|
||||
// When heap memory usage exceeds this threshold (as decimal 0.0-1.0), stop claiming new tasks
|
||||
// Default 85% - gives headroom before OOM
|
||||
const MEMORY_BACKOFF_THRESHOLD = parseFloat(process.env.MEMORY_BACKOFF_THRESHOLD || '0.85');
|
||||
|
||||
// When CPU usage exceeds this threshold (as decimal 0.0-1.0), stop claiming new tasks
|
||||
// Default 90% - allows some burst capacity
|
||||
const CPU_BACKOFF_THRESHOLD = parseFloat(process.env.CPU_BACKOFF_THRESHOLD || '0.90');
|
||||
|
||||
// How long to wait (ms) when in backoff state before rechecking resources
|
||||
const BACKOFF_DURATION_MS = parseInt(process.env.BACKOFF_DURATION_MS || '10000');
|
||||
|
||||
export interface TaskContext {
|
||||
pool: Pool;
|
||||
workerId: string;
|
||||
@@ -94,6 +121,25 @@ const TASK_HANDLERS: Record<TaskRole, TaskHandler> = {
|
||||
analytics_refresh: handleAnalyticsRefresh,
|
||||
};
|
||||
|
||||
/**
|
||||
* Resource usage stats reported to the registry and used for backoff decisions.
|
||||
* These values are included in worker heartbeats and displayed in the UI.
|
||||
*/
|
||||
interface ResourceStats {
|
||||
/** Current heap memory usage as decimal (0.0 to 1.0) */
|
||||
memoryPercent: number;
|
||||
/** Current heap used in MB */
|
||||
memoryMb: number;
|
||||
/** Total heap available in MB */
|
||||
memoryTotalMb: number;
|
||||
/** CPU usage percentage since last check (0 to 100) */
|
||||
cpuPercent: number;
|
||||
/** True if worker is currently in backoff state */
|
||||
isBackingOff: boolean;
|
||||
/** Reason for backoff (e.g., "Memory at 87.3% (threshold: 85%)") */
|
||||
backoffReason: string | null;
|
||||
}
|
||||
|
||||
export class TaskWorker {
|
||||
private pool: Pool;
|
||||
private workerId: string;
|
||||
@@ -102,14 +148,106 @@ export class TaskWorker {
|
||||
private isRunning: boolean = false;
|
||||
private heartbeatInterval: NodeJS.Timeout | null = null;
|
||||
private registryHeartbeatInterval: NodeJS.Timeout | null = null;
|
||||
private currentTask: WorkerTask | null = null;
|
||||
private crawlRotator: CrawlRotator;
|
||||
|
||||
// ==========================================================================
|
||||
// CONCURRENT TASK TRACKING
|
||||
// ==========================================================================
|
||||
// activeTasks: Map of task ID -> task object for all currently running tasks
|
||||
// taskPromises: Map of task ID -> Promise for cleanup when task completes
|
||||
// maxConcurrentTasks: How many tasks this worker will run in parallel
|
||||
// ==========================================================================
|
||||
private activeTasks: Map<number, WorkerTask> = new Map();
|
||||
private taskPromises: Map<number, Promise<void>> = new Map();
|
||||
private maxConcurrentTasks: number = MAX_CONCURRENT_TASKS;
|
||||
|
||||
// ==========================================================================
|
||||
// RESOURCE MONITORING FOR BACKOFF
|
||||
// ==========================================================================
|
||||
// CPU tracking uses differential measurement - we track last values and
|
||||
// calculate percentage based on elapsed time since last check.
|
||||
// ==========================================================================
|
||||
private lastCpuUsage: { user: number; system: number } = { user: 0, system: 0 };
|
||||
private lastCpuCheck: number = Date.now();
|
||||
private isBackingOff: boolean = false;
|
||||
private backoffReason: string | null = null;
|
||||
|
||||
constructor(role: TaskRole | null = null, workerId?: string) {
|
||||
this.pool = getPool();
|
||||
this.role = role;
|
||||
this.workerId = workerId || `worker-${uuidv4().slice(0, 8)}`;
|
||||
this.crawlRotator = new CrawlRotator(this.pool);
|
||||
|
||||
// Initialize CPU tracking
|
||||
const cpuUsage = process.cpuUsage();
|
||||
this.lastCpuUsage = { user: cpuUsage.user, system: cpuUsage.system };
|
||||
this.lastCpuCheck = Date.now();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current resource usage
|
||||
*/
|
||||
private getResourceStats(): ResourceStats {
|
||||
const memUsage = process.memoryUsage();
|
||||
const heapUsedMb = memUsage.heapUsed / 1024 / 1024;
|
||||
const heapTotalMb = memUsage.heapTotal / 1024 / 1024;
|
||||
const memoryPercent = heapUsedMb / heapTotalMb;
|
||||
|
||||
// Calculate CPU usage since last check
|
||||
const cpuUsage = process.cpuUsage();
|
||||
const now = Date.now();
|
||||
const elapsed = now - this.lastCpuCheck;
|
||||
|
||||
let cpuPercent = 0;
|
||||
if (elapsed > 0) {
|
||||
const userDiff = (cpuUsage.user - this.lastCpuUsage.user) / 1000; // microseconds to ms
|
||||
const systemDiff = (cpuUsage.system - this.lastCpuUsage.system) / 1000;
|
||||
cpuPercent = ((userDiff + systemDiff) / elapsed) * 100;
|
||||
}
|
||||
|
||||
// Update last values
|
||||
this.lastCpuUsage = { user: cpuUsage.user, system: cpuUsage.system };
|
||||
this.lastCpuCheck = now;
|
||||
|
||||
return {
|
||||
memoryPercent,
|
||||
memoryMb: Math.round(heapUsedMb),
|
||||
memoryTotalMb: Math.round(heapTotalMb),
|
||||
cpuPercent: Math.min(100, cpuPercent), // Cap at 100%
|
||||
isBackingOff: this.isBackingOff,
|
||||
backoffReason: this.backoffReason,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if we should back off from taking new tasks
|
||||
*/
|
||||
private shouldBackOff(): { backoff: boolean; reason: string | null } {
|
||||
const stats = this.getResourceStats();
|
||||
|
||||
if (stats.memoryPercent > MEMORY_BACKOFF_THRESHOLD) {
|
||||
return { backoff: true, reason: `Memory at ${(stats.memoryPercent * 100).toFixed(1)}% (threshold: ${MEMORY_BACKOFF_THRESHOLD * 100}%)` };
|
||||
}
|
||||
|
||||
if (stats.cpuPercent > CPU_BACKOFF_THRESHOLD * 100) {
|
||||
return { backoff: true, reason: `CPU at ${stats.cpuPercent.toFixed(1)}% (threshold: ${CPU_BACKOFF_THRESHOLD * 100}%)` };
|
||||
}
|
||||
|
||||
return { backoff: false, reason: null };
|
||||
}
|
||||
|
||||
/**
|
||||
* Get count of currently running tasks
|
||||
*/
|
||||
get activeTaskCount(): number {
|
||||
return this.activeTasks.size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if we can accept more tasks
|
||||
*/
|
||||
private canAcceptMoreTasks(): boolean {
|
||||
return this.activeTasks.size < this.maxConcurrentTasks;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -252,21 +390,32 @@ export class TaskWorker {
|
||||
const memUsage = process.memoryUsage();
|
||||
const cpuUsage = process.cpuUsage();
|
||||
const proxyLocation = this.crawlRotator.getProxyLocation();
|
||||
const resourceStats = this.getResourceStats();
|
||||
|
||||
// Get array of active task IDs
|
||||
const activeTaskIds = Array.from(this.activeTasks.keys());
|
||||
|
||||
await fetch(`${API_BASE_URL}/api/worker-registry/heartbeat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
worker_id: this.workerId,
|
||||
current_task_id: this.currentTask?.id || null,
|
||||
status: this.currentTask ? 'active' : 'idle',
|
||||
current_task_id: activeTaskIds[0] || null, // Primary task for backwards compat
|
||||
current_task_ids: activeTaskIds, // All active tasks
|
||||
active_task_count: this.activeTasks.size,
|
||||
max_concurrent_tasks: this.maxConcurrentTasks,
|
||||
status: this.activeTasks.size > 0 ? 'active' : 'idle',
|
||||
resources: {
|
||||
memory_mb: Math.round(memUsage.heapUsed / 1024 / 1024),
|
||||
memory_total_mb: Math.round(memUsage.heapTotal / 1024 / 1024),
|
||||
memory_rss_mb: Math.round(memUsage.rss / 1024 / 1024),
|
||||
memory_percent: Math.round(resourceStats.memoryPercent * 100),
|
||||
cpu_user_ms: Math.round(cpuUsage.user / 1000),
|
||||
cpu_system_ms: Math.round(cpuUsage.system / 1000),
|
||||
cpu_percent: Math.round(resourceStats.cpuPercent),
|
||||
proxy_location: proxyLocation,
|
||||
is_backing_off: this.isBackingOff,
|
||||
backoff_reason: this.backoffReason,
|
||||
}
|
||||
})
|
||||
});
|
||||
@@ -328,20 +477,85 @@ export class TaskWorker {
|
||||
this.startRegistryHeartbeat();
|
||||
|
||||
const roleMsg = this.role ? `for role: ${this.role}` : '(role-agnostic - any task)';
|
||||
console.log(`[TaskWorker] ${this.friendlyName} starting ${roleMsg}`);
|
||||
console.log(`[TaskWorker] ${this.friendlyName} starting ${roleMsg} (max ${this.maxConcurrentTasks} concurrent tasks)`);
|
||||
|
||||
while (this.isRunning) {
|
||||
try {
|
||||
await this.processNextTask();
|
||||
await this.mainLoop();
|
||||
} catch (error: any) {
|
||||
console.error(`[TaskWorker] Loop error:`, error.message);
|
||||
await this.sleep(POLL_INTERVAL_MS);
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for any remaining tasks to complete
|
||||
if (this.taskPromises.size > 0) {
|
||||
console.log(`[TaskWorker] Waiting for ${this.taskPromises.size} active tasks to complete...`);
|
||||
await Promise.allSettled(this.taskPromises.values());
|
||||
}
|
||||
|
||||
console.log(`[TaskWorker] Worker ${this.workerId} stopped`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Main loop - tries to fill up to maxConcurrentTasks
|
||||
*/
|
||||
private async mainLoop(): Promise<void> {
|
||||
// Check resource usage and backoff if needed
|
||||
const { backoff, reason } = this.shouldBackOff();
|
||||
if (backoff) {
|
||||
if (!this.isBackingOff) {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} backing off: ${reason}`);
|
||||
}
|
||||
this.isBackingOff = true;
|
||||
this.backoffReason = reason;
|
||||
await this.sleep(BACKOFF_DURATION_MS);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clear backoff state
|
||||
if (this.isBackingOff) {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} resuming normal operation`);
|
||||
this.isBackingOff = false;
|
||||
this.backoffReason = null;
|
||||
}
|
||||
|
||||
// Check for decommission signal
|
||||
const shouldDecommission = await this.checkDecommission();
|
||||
if (shouldDecommission) {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} received decommission signal - waiting for ${this.activeTasks.size} tasks to complete`);
|
||||
// Stop accepting new tasks, wait for current to finish
|
||||
this.isRunning = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Try to claim more tasks if we have capacity
|
||||
if (this.canAcceptMoreTasks()) {
|
||||
const task = await taskService.claimTask(this.role, this.workerId);
|
||||
|
||||
if (task) {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} claimed task ${task.id} (${task.role}) [${this.activeTasks.size + 1}/${this.maxConcurrentTasks}]`);
|
||||
this.activeTasks.set(task.id, task);
|
||||
|
||||
// Start task in background (don't await)
|
||||
const taskPromise = this.executeTask(task);
|
||||
this.taskPromises.set(task.id, taskPromise);
|
||||
|
||||
// Clean up when done
|
||||
taskPromise.finally(() => {
|
||||
this.activeTasks.delete(task.id);
|
||||
this.taskPromises.delete(task.id);
|
||||
});
|
||||
|
||||
// Immediately try to claim more tasks (don't wait for poll interval)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// No task claimed or at capacity - wait before next poll
|
||||
await this.sleep(POLL_INTERVAL_MS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the worker
|
||||
*/
|
||||
@@ -354,23 +568,10 @@ export class TaskWorker {
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the next available task
|
||||
* Execute a single task (runs concurrently with other tasks)
|
||||
*/
|
||||
private async processNextTask(): Promise<void> {
|
||||
// Try to claim a task
|
||||
const task = await taskService.claimTask(this.role, this.workerId);
|
||||
|
||||
if (!task) {
|
||||
// No tasks available, wait and retry
|
||||
await this.sleep(POLL_INTERVAL_MS);
|
||||
return;
|
||||
}
|
||||
|
||||
this.currentTask = task;
|
||||
console.log(`[TaskWorker] Claimed task ${task.id} (${task.role}) for dispensary ${task.dispensary_id || 'N/A'}`);
|
||||
|
||||
// Start heartbeat
|
||||
this.startHeartbeat(task.id);
|
||||
private async executeTask(task: WorkerTask): Promise<void> {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} starting task ${task.id} (${task.role}) for dispensary ${task.dispensary_id || 'N/A'}`);
|
||||
|
||||
try {
|
||||
// Mark as running
|
||||
@@ -399,7 +600,7 @@ export class TaskWorker {
|
||||
// Mark as completed
|
||||
await taskService.completeTask(task.id, result);
|
||||
await this.reportTaskCompletion(true);
|
||||
console.log(`[TaskWorker] ${this.friendlyName} completed task ${task.id}`);
|
||||
console.log(`[TaskWorker] ${this.friendlyName} completed task ${task.id} [${this.activeTasks.size}/${this.maxConcurrentTasks} active]`);
|
||||
|
||||
// Chain next task if applicable
|
||||
const chainedTask = await taskService.chainNextTask({
|
||||
@@ -421,9 +622,35 @@ export class TaskWorker {
|
||||
await taskService.failTask(task.id, error.message);
|
||||
await this.reportTaskCompletion(false);
|
||||
console.error(`[TaskWorker] ${this.friendlyName} task ${task.id} error:`, error.message);
|
||||
} finally {
|
||||
this.stopHeartbeat();
|
||||
this.currentTask = null;
|
||||
}
|
||||
// Note: cleanup (removing from activeTasks) is handled in mainLoop's finally block
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if this worker has been flagged for decommission
|
||||
* Returns true if worker should stop after current task
|
||||
*/
|
||||
private async checkDecommission(): Promise<boolean> {
|
||||
try {
|
||||
// Check worker_registry for decommission flag
|
||||
const result = await this.pool.query(
|
||||
`SELECT decommission_requested, decommission_reason
|
||||
FROM worker_registry
|
||||
WHERE worker_id = $1`,
|
||||
[this.workerId]
|
||||
);
|
||||
|
||||
if (result.rows.length > 0 && result.rows[0].decommission_requested) {
|
||||
const reason = result.rows[0].decommission_reason || 'No reason provided';
|
||||
console.log(`[TaskWorker] Decommission requested: ${reason}`);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
} catch (error: any) {
|
||||
// If we can't check, continue running
|
||||
console.warn(`[TaskWorker] Could not check decommission status: ${error.message}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -460,12 +687,25 @@ export class TaskWorker {
|
||||
/**
|
||||
* Get worker info
|
||||
*/
|
||||
getInfo(): { workerId: string; role: TaskRole | null; isRunning: boolean; currentTaskId: number | null } {
|
||||
getInfo(): {
|
||||
workerId: string;
|
||||
role: TaskRole | null;
|
||||
isRunning: boolean;
|
||||
activeTaskIds: number[];
|
||||
activeTaskCount: number;
|
||||
maxConcurrentTasks: number;
|
||||
isBackingOff: boolean;
|
||||
backoffReason: string | null;
|
||||
} {
|
||||
return {
|
||||
workerId: this.workerId,
|
||||
role: this.role,
|
||||
isRunning: this.isRunning,
|
||||
currentTaskId: this.currentTask?.id || null,
|
||||
activeTaskIds: Array.from(this.activeTasks.keys()),
|
||||
activeTaskCount: this.activeTasks.size,
|
||||
maxConcurrentTasks: this.maxConcurrentTasks,
|
||||
isBackingOff: this.isBackingOff,
|
||||
backoffReason: this.backoffReason,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { ReactNode, useEffect, useState } from 'react';
|
||||
import { useNavigate, useLocation } from 'react-router-dom';
|
||||
import { ReactNode, useEffect, useState, useRef } from 'react';
|
||||
import { useNavigate, useLocation, Link } from 'react-router-dom';
|
||||
import { useAuthStore } from '../store/authStore';
|
||||
import { api } from '../lib/api';
|
||||
import { StateSelector } from './StateSelector';
|
||||
@@ -48,8 +48,8 @@ interface NavLinkProps {
|
||||
|
||||
function NavLink({ to, icon, label, isActive }: NavLinkProps) {
|
||||
return (
|
||||
<a
|
||||
href={to}
|
||||
<Link
|
||||
to={to}
|
||||
className={`flex items-center gap-3 px-3 py-2 rounded-lg text-sm font-medium transition-colors ${
|
||||
isActive
|
||||
? 'bg-emerald-50 text-emerald-700'
|
||||
@@ -58,7 +58,7 @@ function NavLink({ to, icon, label, isActive }: NavLinkProps) {
|
||||
>
|
||||
<span className={`flex-shrink-0 ${isActive ? 'text-emerald-600' : 'text-gray-400'}`}>{icon}</span>
|
||||
<span>{label}</span>
|
||||
</a>
|
||||
</Link>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -86,6 +86,8 @@ export function Layout({ children }: LayoutProps) {
|
||||
const { user, logout } = useAuthStore();
|
||||
const [versionInfo, setVersionInfo] = useState<VersionInfo | null>(null);
|
||||
const [sidebarOpen, setSidebarOpen] = useState(false);
|
||||
const navRef = useRef<HTMLElement>(null);
|
||||
const scrollPositionRef = useRef<number>(0);
|
||||
|
||||
useEffect(() => {
|
||||
const fetchVersion = async () => {
|
||||
@@ -111,9 +113,27 @@ export function Layout({ children }: LayoutProps) {
|
||||
return location.pathname.startsWith(path);
|
||||
};
|
||||
|
||||
// Close sidebar on route change (mobile)
|
||||
// Save scroll position before route change
|
||||
useEffect(() => {
|
||||
const nav = navRef.current;
|
||||
if (nav) {
|
||||
const handleScroll = () => {
|
||||
scrollPositionRef.current = nav.scrollTop;
|
||||
};
|
||||
nav.addEventListener('scroll', handleScroll);
|
||||
return () => nav.removeEventListener('scroll', handleScroll);
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Restore scroll position after route change and close mobile sidebar
|
||||
useEffect(() => {
|
||||
setSidebarOpen(false);
|
||||
// Restore scroll position after render
|
||||
requestAnimationFrame(() => {
|
||||
if (navRef.current) {
|
||||
navRef.current.scrollTop = scrollPositionRef.current;
|
||||
}
|
||||
});
|
||||
}, [location.pathname]);
|
||||
|
||||
const sidebarContent = (
|
||||
@@ -145,7 +165,7 @@ export function Layout({ children }: LayoutProps) {
|
||||
</div>
|
||||
|
||||
{/* Navigation */}
|
||||
<nav className="flex-1 px-3 py-4 space-y-6 overflow-y-auto">
|
||||
<nav ref={navRef} className="flex-1 px-3 py-4 space-y-6 overflow-y-auto">
|
||||
<NavSection title="Main">
|
||||
<NavLink to="/dashboard" icon={<LayoutDashboard className="w-4 h-4" />} label="Dashboard" isActive={isActive('/dashboard', true)} />
|
||||
<NavLink to="/dispensaries" icon={<Building2 className="w-4 h-4" />} label="Dispensaries" isActive={isActive('/dispensaries')} />
|
||||
|
||||
138
cannaiq/src/components/PasswordConfirmModal.tsx
Normal file
138
cannaiq/src/components/PasswordConfirmModal.tsx
Normal file
@@ -0,0 +1,138 @@
|
||||
import { useState, useEffect, useRef } from 'react';
|
||||
import { api } from '../lib/api';
|
||||
import { Shield, X, Loader2 } from 'lucide-react';
|
||||
|
||||
interface PasswordConfirmModalProps {
|
||||
isOpen: boolean;
|
||||
onClose: () => void;
|
||||
onConfirm: () => void;
|
||||
title: string;
|
||||
description: string;
|
||||
}
|
||||
|
||||
export function PasswordConfirmModal({
|
||||
isOpen,
|
||||
onClose,
|
||||
onConfirm,
|
||||
title,
|
||||
description,
|
||||
}: PasswordConfirmModalProps) {
|
||||
const [password, setPassword] = useState('');
|
||||
const [error, setError] = useState('');
|
||||
const [loading, setLoading] = useState(false);
|
||||
const inputRef = useRef<HTMLInputElement>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (isOpen) {
|
||||
setPassword('');
|
||||
setError('');
|
||||
// Focus the input when modal opens
|
||||
setTimeout(() => inputRef.current?.focus(), 100);
|
||||
}
|
||||
}, [isOpen]);
|
||||
|
||||
const handleSubmit = async (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
if (!password.trim()) {
|
||||
setError('Password is required');
|
||||
return;
|
||||
}
|
||||
|
||||
setLoading(true);
|
||||
setError('');
|
||||
|
||||
try {
|
||||
const result = await api.verifyPassword(password);
|
||||
if (result.verified) {
|
||||
onConfirm();
|
||||
onClose();
|
||||
} else {
|
||||
setError('Invalid password');
|
||||
}
|
||||
} catch (err: any) {
|
||||
setError(err.message || 'Verification failed');
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
if (!isOpen) return null;
|
||||
|
||||
return (
|
||||
<div className="fixed inset-0 z-50 flex items-center justify-center">
|
||||
{/* Backdrop */}
|
||||
<div
|
||||
className="absolute inset-0 bg-black bg-opacity-50"
|
||||
onClick={onClose}
|
||||
/>
|
||||
|
||||
{/* Modal */}
|
||||
<div className="relative bg-white rounded-lg shadow-xl max-w-md w-full mx-4">
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between px-6 py-4 border-b border-gray-200">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="p-2 bg-amber-100 rounded-lg">
|
||||
<Shield className="w-5 h-5 text-amber-600" />
|
||||
</div>
|
||||
<h3 className="text-lg font-semibold text-gray-900">{title}</h3>
|
||||
</div>
|
||||
<button
|
||||
onClick={onClose}
|
||||
className="p-1 hover:bg-gray-100 rounded-lg transition-colors"
|
||||
>
|
||||
<X className="w-5 h-5 text-gray-500" />
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Body */}
|
||||
<form onSubmit={handleSubmit}>
|
||||
<div className="px-6 py-4">
|
||||
<p className="text-gray-600 mb-4">{description}</p>
|
||||
|
||||
<div className="space-y-2">
|
||||
<label
|
||||
htmlFor="password"
|
||||
className="block text-sm font-medium text-gray-700"
|
||||
>
|
||||
Enter your password to continue
|
||||
</label>
|
||||
<input
|
||||
ref={inputRef}
|
||||
type="password"
|
||||
id="password"
|
||||
value={password}
|
||||
onChange={(e) => setPassword(e.target.value)}
|
||||
className="w-full px-4 py-2 border border-gray-300 rounded-lg focus:ring-2 focus:ring-emerald-500 focus:border-emerald-500"
|
||||
placeholder="Password"
|
||||
disabled={loading}
|
||||
/>
|
||||
{error && (
|
||||
<p className="text-sm text-red-600">{error}</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Footer */}
|
||||
<div className="flex justify-end gap-3 px-6 py-4 border-t border-gray-200 bg-gray-50 rounded-b-lg">
|
||||
<button
|
||||
type="button"
|
||||
onClick={onClose}
|
||||
disabled={loading}
|
||||
className="px-4 py-2 text-gray-700 hover:bg-gray-100 rounded-lg transition-colors"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
<button
|
||||
type="submit"
|
||||
disabled={loading}
|
||||
className="px-4 py-2 bg-emerald-600 text-white rounded-lg hover:bg-emerald-700 transition-colors disabled:opacity-50 flex items-center gap-2"
|
||||
>
|
||||
{loading && <Loader2 className="w-4 h-4 animate-spin" />}
|
||||
Confirm
|
||||
</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -84,6 +84,13 @@ class ApiClient {
|
||||
});
|
||||
}
|
||||
|
||||
async verifyPassword(password: string) {
|
||||
return this.request<{ verified: boolean; error?: string }>('/api/auth/verify-password', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ password }),
|
||||
});
|
||||
}
|
||||
|
||||
async getMe() {
|
||||
return this.request<{ user: any }>('/api/auth/me');
|
||||
}
|
||||
@@ -2950,6 +2957,25 @@ class ApiClient {
|
||||
{ method: 'POST' }
|
||||
);
|
||||
}
|
||||
|
||||
// K8s Worker Control
|
||||
async getK8sWorkers() {
|
||||
return this.request<{
|
||||
success: boolean;
|
||||
available: boolean;
|
||||
replicas: number;
|
||||
readyReplicas: number;
|
||||
availableReplicas?: number;
|
||||
error?: string;
|
||||
}>('/api/k8s/workers');
|
||||
}
|
||||
|
||||
async scaleK8sWorkers(replicas: number) {
|
||||
return this.request<{ success: boolean; replicas: number; message?: string; error?: string }>(
|
||||
'/api/k8s/workers/scale',
|
||||
{ method: 'POST', body: JSON.stringify({ replicas }) }
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export const api = new ApiClient(API_URL);
|
||||
|
||||
@@ -11,7 +11,6 @@ import {
|
||||
ChevronRight,
|
||||
Users,
|
||||
Inbox,
|
||||
Zap,
|
||||
Timer,
|
||||
Plus,
|
||||
X,
|
||||
@@ -566,122 +565,6 @@ function PriorityBadge({ priority }: { priority: number }) {
|
||||
);
|
||||
}
|
||||
|
||||
// Pod visualization - shows pod as hub with worker nodes radiating out
|
||||
function PodVisualization({ podName, workers }: { podName: string; workers: Worker[] }) {
|
||||
const busyCount = workers.filter(w => w.current_task_id !== null).length;
|
||||
const allBusy = busyCount === workers.length;
|
||||
const allIdle = busyCount === 0;
|
||||
|
||||
// Aggregate resource stats for the pod
|
||||
const totalMemoryMb = workers.reduce((sum, w) => sum + (w.metadata?.memory_rss_mb || 0), 0);
|
||||
const totalCpuUserMs = workers.reduce((sum, w) => sum + (w.metadata?.cpu_user_ms || 0), 0);
|
||||
const totalCpuSystemMs = workers.reduce((sum, w) => sum + (w.metadata?.cpu_system_ms || 0), 0);
|
||||
const totalCompleted = workers.reduce((sum, w) => sum + w.tasks_completed, 0);
|
||||
const totalFailed = workers.reduce((sum, w) => sum + w.tasks_failed, 0);
|
||||
|
||||
// Format CPU time
|
||||
const formatCpuTime = (ms: number) => {
|
||||
if (ms < 1000) return `${ms}ms`;
|
||||
if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
|
||||
return `${(ms / 60000).toFixed(1)}m`;
|
||||
};
|
||||
|
||||
// Pod color based on worker status
|
||||
const podColor = allBusy ? 'bg-blue-500' : allIdle ? 'bg-emerald-500' : 'bg-yellow-500';
|
||||
const podBorder = allBusy ? 'border-blue-400' : allIdle ? 'border-emerald-400' : 'border-yellow-400';
|
||||
const podGlow = allBusy ? 'shadow-blue-200' : allIdle ? 'shadow-emerald-200' : 'shadow-yellow-200';
|
||||
|
||||
// Build pod tooltip
|
||||
const podTooltip = [
|
||||
`Pod: ${podName}`,
|
||||
`Workers: ${busyCount}/${workers.length} busy`,
|
||||
`Memory: ${totalMemoryMb} MB (RSS)`,
|
||||
`CPU: ${formatCpuTime(totalCpuUserMs)} user, ${formatCpuTime(totalCpuSystemMs)} system`,
|
||||
`Tasks: ${totalCompleted} completed, ${totalFailed} failed`,
|
||||
].join('\n');
|
||||
|
||||
return (
|
||||
<div className="flex flex-col items-center p-4">
|
||||
{/* Pod hub */}
|
||||
<div className="relative">
|
||||
{/* Center pod circle */}
|
||||
<div
|
||||
className={`w-20 h-20 rounded-full ${podColor} border-4 ${podBorder} shadow-lg ${podGlow} flex items-center justify-center text-white font-bold text-xs text-center leading-tight z-10 relative cursor-help`}
|
||||
title={podTooltip}
|
||||
>
|
||||
<span className="px-1">{podName}</span>
|
||||
</div>
|
||||
|
||||
{/* Worker nodes radiating out */}
|
||||
{workers.map((worker, index) => {
|
||||
const angle = (index * 360) / workers.length - 90; // Start from top
|
||||
const radians = (angle * Math.PI) / 180;
|
||||
const radius = 55; // Distance from center
|
||||
const x = Math.cos(radians) * radius;
|
||||
const y = Math.sin(radians) * radius;
|
||||
|
||||
const isBusy = worker.current_task_id !== null;
|
||||
const workerColor = isBusy ? 'bg-blue-500' : 'bg-emerald-500';
|
||||
const workerBorder = isBusy ? 'border-blue-300' : 'border-emerald-300';
|
||||
|
||||
// Line from center to worker
|
||||
const lineLength = radius - 10;
|
||||
const lineX = Math.cos(radians) * (lineLength / 2 + 10);
|
||||
const lineY = Math.sin(radians) * (lineLength / 2 + 10);
|
||||
|
||||
return (
|
||||
<div key={worker.id}>
|
||||
{/* Connection line */}
|
||||
<div
|
||||
className={`absolute w-0.5 ${isBusy ? 'bg-blue-300' : 'bg-emerald-300'}`}
|
||||
style={{
|
||||
height: `${lineLength}px`,
|
||||
left: '50%',
|
||||
top: '50%',
|
||||
transform: `translate(-50%, -50%) translate(${lineX}px, ${lineY}px) rotate(${angle + 90}deg)`,
|
||||
transformOrigin: 'center',
|
||||
}}
|
||||
/>
|
||||
{/* Worker node */}
|
||||
<div
|
||||
className={`absolute w-6 h-6 rounded-full ${workerColor} border-2 ${workerBorder} flex items-center justify-center text-white text-xs font-bold cursor-pointer hover:scale-110 transition-transform`}
|
||||
style={{
|
||||
left: '50%',
|
||||
top: '50%',
|
||||
transform: `translate(-50%, -50%) translate(${x}px, ${y}px)`,
|
||||
}}
|
||||
title={`${worker.friendly_name}\nStatus: ${isBusy ? `Working on task #${worker.current_task_id}` : 'Idle - waiting for tasks'}\nMemory: ${worker.metadata?.memory_rss_mb || 0} MB\nCPU: ${formatCpuTime(worker.metadata?.cpu_user_ms || 0)} user, ${formatCpuTime(worker.metadata?.cpu_system_ms || 0)} sys\nCompleted: ${worker.tasks_completed} | Failed: ${worker.tasks_failed}\nLast heartbeat: ${new Date(worker.last_heartbeat_at).toLocaleTimeString()}`}
|
||||
>
|
||||
{index + 1}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
|
||||
{/* Pod stats */}
|
||||
<div className="mt-12 text-center">
|
||||
<p className="text-xs text-gray-500">
|
||||
{busyCount}/{workers.length} busy
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// Group workers by pod
|
||||
function groupWorkersByPod(workers: Worker[]): Map<string, Worker[]> {
|
||||
const pods = new Map<string, Worker[]>();
|
||||
for (const worker of workers) {
|
||||
const podName = worker.pod_name || 'Unknown';
|
||||
if (!pods.has(podName)) {
|
||||
pods.set(podName, []);
|
||||
}
|
||||
pods.get(podName)!.push(worker);
|
||||
}
|
||||
return pods;
|
||||
}
|
||||
|
||||
export function JobQueue() {
|
||||
const [workers, setWorkers] = useState<Worker[]>([]);
|
||||
const [tasks, setTasks] = useState<Task[]>([]);
|
||||
@@ -768,7 +651,6 @@ export function JobQueue() {
|
||||
|
||||
// Get active workers (for display)
|
||||
const activeWorkers = workers.filter(w => w.status !== 'offline' && w.status !== 'terminated');
|
||||
const busyWorkers = workers.filter(w => w.current_task_id !== null);
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
@@ -874,46 +756,6 @@ export function JobQueue() {
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Pods & Workers Section */}
|
||||
<div className="bg-white rounded-lg border border-gray-200 overflow-hidden">
|
||||
<div className="px-4 py-3 border-b border-gray-200 bg-gray-50">
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<h3 className="text-sm font-semibold text-gray-900 flex items-center gap-2">
|
||||
<Zap className="w-4 h-4 text-emerald-500" />
|
||||
Worker Pods ({Array.from(groupWorkersByPod(workers)).length} pods, {activeWorkers.length} workers)
|
||||
</h3>
|
||||
<p className="text-xs text-gray-500 mt-0.5">
|
||||
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-emerald-500"></span> idle</span>
|
||||
<span className="mx-2">|</span>
|
||||
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-blue-500"></span> busy</span>
|
||||
<span className="mx-2">|</span>
|
||||
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-yellow-500"></span> mixed</span>
|
||||
</p>
|
||||
</div>
|
||||
<div className="text-sm text-gray-500">
|
||||
{busyWorkers.length} busy, {activeWorkers.length - busyWorkers.length} idle
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{workers.length === 0 ? (
|
||||
<div className="px-4 py-12 text-center text-gray-500">
|
||||
<Users className="w-12 h-12 mx-auto mb-3 text-gray-300" />
|
||||
<p className="font-medium">No worker pods running</p>
|
||||
<p className="text-xs mt-1">Start pods to process tasks from the queue</p>
|
||||
</div>
|
||||
) : (
|
||||
<div className="p-6">
|
||||
<div className="flex flex-wrap justify-center gap-8">
|
||||
{Array.from(groupWorkersByPod(workers)).map(([podName, podWorkers]) => (
|
||||
<PodVisualization key={podName} podName={podName} workers={podWorkers} />
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Task Pool Section */}
|
||||
<div className="bg-white rounded-lg border border-gray-200 overflow-hidden">
|
||||
<div className="px-4 py-3 border-b border-gray-200 bg-gray-50">
|
||||
|
||||
@@ -275,7 +275,7 @@ export default function NationalDashboard() {
|
||||
<>
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4">
|
||||
<MetricCard
|
||||
title="States"
|
||||
title="Regions (US + CA)"
|
||||
value={summary.activeStates}
|
||||
icon={Globe}
|
||||
/>
|
||||
|
||||
@@ -14,7 +14,6 @@ import {
|
||||
ChevronUp,
|
||||
Gauge,
|
||||
Users,
|
||||
Power,
|
||||
Play,
|
||||
Square,
|
||||
} from 'lucide-react';
|
||||
@@ -225,21 +224,22 @@ export default function TasksDashboard() {
|
||||
return (
|
||||
<Layout>
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div className="flex flex-col sm:flex-row sm:items-center sm:justify-between gap-4">
|
||||
<div>
|
||||
<h1 className="text-2xl font-bold text-gray-900 flex items-center gap-2">
|
||||
<ListChecks className="w-7 h-7 text-emerald-600" />
|
||||
Task Queue
|
||||
</h1>
|
||||
<p className="text-gray-500 mt-1">
|
||||
{totalActive} active, {totalPending} pending tasks
|
||||
</p>
|
||||
</div>
|
||||
{/* Sticky Header */}
|
||||
<div className="sticky top-0 z-10 bg-white pb-4 -mx-6 px-6 pt-2 border-b border-gray-200 shadow-sm">
|
||||
<div className="flex flex-col sm:flex-row sm:items-center sm:justify-between gap-4">
|
||||
<div>
|
||||
<h1 className="text-2xl font-bold text-gray-900 flex items-center gap-2">
|
||||
<ListChecks className="w-7 h-7 text-emerald-600" />
|
||||
Task Queue
|
||||
</h1>
|
||||
<p className="text-gray-500 mt-1">
|
||||
{totalActive} active, {totalPending} pending tasks
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center gap-4">
|
||||
{/* Pool Toggle */}
|
||||
<button
|
||||
<div className="flex items-center gap-4">
|
||||
{/* Pool Toggle */}
|
||||
<button
|
||||
onClick={togglePool}
|
||||
disabled={poolLoading}
|
||||
className={`flex items-center gap-2 px-4 py-2 rounded-lg font-medium transition-colors ${
|
||||
@@ -261,6 +261,7 @@ export default function TasksDashboard() {
|
||||
)}
|
||||
</button>
|
||||
<span className="text-sm text-gray-400">Auto-refreshes every 15s</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -18,9 +18,11 @@ import {
|
||||
Server,
|
||||
MapPin,
|
||||
Trash2,
|
||||
PowerOff,
|
||||
Undo2,
|
||||
Plus,
|
||||
Minus,
|
||||
Loader2,
|
||||
MemoryStick,
|
||||
AlertTriangle,
|
||||
} from 'lucide-react';
|
||||
|
||||
// Worker from registry
|
||||
@@ -39,16 +41,25 @@ interface Worker {
|
||||
tasks_completed: number;
|
||||
tasks_failed: number;
|
||||
current_task_id: number | null;
|
||||
current_task_ids?: number[]; // Multiple concurrent tasks
|
||||
active_task_count?: number;
|
||||
max_concurrent_tasks?: number;
|
||||
health_status: string;
|
||||
seconds_since_heartbeat: number;
|
||||
decommission_requested?: boolean;
|
||||
decommission_reason?: string;
|
||||
metadata: {
|
||||
cpu?: number;
|
||||
memory?: number;
|
||||
memoryTotal?: number;
|
||||
memory_mb?: number;
|
||||
memory_total_mb?: number;
|
||||
memory_percent?: number; // NEW: memory as percentage
|
||||
cpu_user_ms?: number;
|
||||
cpu_system_ms?: number;
|
||||
cpu_percent?: number; // NEW: CPU percentage
|
||||
is_backing_off?: boolean; // NEW: resource backoff state
|
||||
backoff_reason?: string; // NEW: why backing off
|
||||
proxy_location?: {
|
||||
city?: string;
|
||||
state?: string;
|
||||
@@ -72,14 +83,6 @@ interface Task {
|
||||
worker_id: string | null;
|
||||
}
|
||||
|
||||
// K8s replica info (added 2024-12-10)
|
||||
interface K8sReplicas {
|
||||
current: number;
|
||||
desired: number;
|
||||
available: number;
|
||||
updated: number;
|
||||
}
|
||||
|
||||
function formatRelativeTime(dateStr: string | null): string {
|
||||
if (!dateStr) return '-';
|
||||
const date = new Date(dateStr);
|
||||
@@ -220,69 +223,257 @@ function HealthBadge({ status, healthStatus }: { status: string; healthStatus: s
|
||||
);
|
||||
}
|
||||
|
||||
// Format CPU time for display
|
||||
function formatCpuTime(ms: number): string {
|
||||
if (ms < 1000) return `${ms}ms`;
|
||||
if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
|
||||
return `${(ms / 60000).toFixed(1)}m`;
|
||||
}
|
||||
|
||||
// Resource usage badge showing memory%, CPU%, and backoff status
|
||||
function ResourceBadge({ worker }: { worker: Worker }) {
|
||||
const memPercent = worker.metadata?.memory_percent;
|
||||
const cpuPercent = worker.metadata?.cpu_percent;
|
||||
const isBackingOff = worker.metadata?.is_backing_off;
|
||||
const backoffReason = worker.metadata?.backoff_reason;
|
||||
|
||||
if (isBackingOff) {
|
||||
return (
|
||||
<div className="flex items-center gap-1.5" title={backoffReason || 'Backing off due to resource pressure'}>
|
||||
<AlertTriangle className="w-4 h-4 text-amber-500 animate-pulse" />
|
||||
<span className="text-xs text-amber-600 font-medium">Backing off</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// No data yet
|
||||
if (memPercent === undefined && cpuPercent === undefined) {
|
||||
return <span className="text-gray-400 text-xs">-</span>;
|
||||
}
|
||||
|
||||
// Color based on usage level
|
||||
const getColor = (pct: number) => {
|
||||
if (pct >= 90) return 'text-red-600';
|
||||
if (pct >= 75) return 'text-amber-600';
|
||||
if (pct >= 50) return 'text-yellow-600';
|
||||
return 'text-emerald-600';
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-0.5 text-xs">
|
||||
{memPercent !== undefined && (
|
||||
<div className="flex items-center gap-1" title={`Memory: ${worker.metadata?.memory_mb || 0}MB / ${worker.metadata?.memory_total_mb || 0}MB`}>
|
||||
<MemoryStick className={`w-3 h-3 ${getColor(memPercent)}`} />
|
||||
<span className={getColor(memPercent)}>{memPercent}%</span>
|
||||
</div>
|
||||
)}
|
||||
{cpuPercent !== undefined && (
|
||||
<div className="flex items-center gap-1">
|
||||
<Cpu className={`w-3 h-3 ${getColor(cpuPercent)}`} />
|
||||
<span className={getColor(cpuPercent)}>{cpuPercent}%</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// Task count badge showing active/max concurrent tasks
|
||||
function TaskCountBadge({ worker, tasks }: { worker: Worker; tasks: Task[] }) {
|
||||
const activeCount = worker.active_task_count ?? (worker.current_task_id ? 1 : 0);
|
||||
const maxCount = worker.max_concurrent_tasks ?? 1;
|
||||
const taskIds = worker.current_task_ids ?? (worker.current_task_id ? [worker.current_task_id] : []);
|
||||
|
||||
if (activeCount === 0) {
|
||||
return <span className="text-gray-400 text-sm">Idle</span>;
|
||||
}
|
||||
|
||||
// Get task names for tooltip
|
||||
const taskNames = taskIds.map(id => {
|
||||
const task = tasks.find(t => t.id === id);
|
||||
return task ? `#${id}: ${task.role}${task.dispensary_name ? ` (${task.dispensary_name})` : ''}` : `#${id}`;
|
||||
}).join('\n');
|
||||
|
||||
return (
|
||||
<div className="flex items-center gap-2" title={taskNames}>
|
||||
<span className="text-sm font-medium text-blue-600">
|
||||
{activeCount}/{maxCount} tasks
|
||||
</span>
|
||||
{taskIds.length === 1 && (
|
||||
<span className="text-xs text-gray-500">#{taskIds[0]}</span>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// Pod visualization - shows pod as hub with worker nodes radiating out
|
||||
function PodVisualization({
|
||||
podName,
|
||||
workers,
|
||||
isSelected = false,
|
||||
onSelect
|
||||
}: {
|
||||
podName: string;
|
||||
workers: Worker[];
|
||||
isSelected?: boolean;
|
||||
onSelect?: () => void;
|
||||
}) {
|
||||
const busyCount = workers.filter(w => w.current_task_id !== null).length;
|
||||
const allBusy = busyCount === workers.length;
|
||||
const allIdle = busyCount === 0;
|
||||
|
||||
// Aggregate resource stats for the pod
|
||||
const totalMemoryMb = workers.reduce((sum, w) => sum + (w.metadata?.memory_mb || 0), 0);
|
||||
const totalCpuUserMs = workers.reduce((sum, w) => sum + (w.metadata?.cpu_user_ms || 0), 0);
|
||||
const totalCpuSystemMs = workers.reduce((sum, w) => sum + (w.metadata?.cpu_system_ms || 0), 0);
|
||||
const totalCompleted = workers.reduce((sum, w) => sum + w.tasks_completed, 0);
|
||||
const totalFailed = workers.reduce((sum, w) => sum + w.tasks_failed, 0);
|
||||
|
||||
// Pod color based on worker status
|
||||
const podColor = allBusy ? 'bg-blue-500' : allIdle ? 'bg-emerald-500' : 'bg-yellow-500';
|
||||
const podBorder = allBusy ? 'border-blue-400' : allIdle ? 'border-emerald-400' : 'border-yellow-400';
|
||||
const podGlow = allBusy ? 'shadow-blue-200' : allIdle ? 'shadow-emerald-200' : 'shadow-yellow-200';
|
||||
|
||||
// Selection ring
|
||||
const selectionRing = isSelected ? 'ring-4 ring-purple-400 ring-offset-2' : '';
|
||||
|
||||
// Build pod tooltip
|
||||
const podTooltip = [
|
||||
`Pod: ${podName}`,
|
||||
`Workers: ${busyCount}/${workers.length} busy`,
|
||||
`Memory: ${totalMemoryMb} MB (RSS)`,
|
||||
`CPU: ${formatCpuTime(totalCpuUserMs)} user, ${formatCpuTime(totalCpuSystemMs)} system`,
|
||||
`Tasks: ${totalCompleted} completed, ${totalFailed} failed`,
|
||||
'Click to select',
|
||||
].join('\n');
|
||||
|
||||
return (
|
||||
<div className="flex flex-col items-center p-4">
|
||||
{/* Pod hub */}
|
||||
<div className="relative">
|
||||
{/* Center pod circle */}
|
||||
<div
|
||||
className={`w-20 h-20 rounded-full ${podColor} border-4 ${podBorder} shadow-lg ${podGlow} ${selectionRing} flex items-center justify-center text-white font-bold text-xs text-center leading-tight z-10 relative cursor-pointer hover:scale-105 transition-all`}
|
||||
title={podTooltip}
|
||||
onClick={onSelect}
|
||||
>
|
||||
<span className="px-1">{podName}</span>
|
||||
</div>
|
||||
|
||||
{/* Worker nodes radiating out */}
|
||||
{workers.map((worker, index) => {
|
||||
const angle = (index * 360) / workers.length - 90; // Start from top
|
||||
const radians = (angle * Math.PI) / 180;
|
||||
const radius = 55; // Distance from center
|
||||
const x = Math.cos(radians) * radius;
|
||||
const y = Math.sin(radians) * radius;
|
||||
|
||||
const isBusy = worker.current_task_id !== null;
|
||||
const isDecommissioning = worker.decommission_requested;
|
||||
const workerColor = isDecommissioning ? 'bg-orange-500' : isBusy ? 'bg-blue-500' : 'bg-emerald-500';
|
||||
const workerBorder = isDecommissioning ? 'border-orange-300' : isBusy ? 'border-blue-300' : 'border-emerald-300';
|
||||
|
||||
// Line from center to worker
|
||||
const lineLength = radius - 10;
|
||||
const lineX = Math.cos(radians) * (lineLength / 2 + 10);
|
||||
const lineY = Math.sin(radians) * (lineLength / 2 + 10);
|
||||
|
||||
return (
|
||||
<div key={worker.id}>
|
||||
{/* Connection line */}
|
||||
<div
|
||||
className={`absolute w-0.5 ${isDecommissioning ? 'bg-orange-300' : isBusy ? 'bg-blue-300' : 'bg-emerald-300'}`}
|
||||
style={{
|
||||
height: `${lineLength}px`,
|
||||
left: '50%',
|
||||
top: '50%',
|
||||
transform: `translate(-50%, -50%) translate(${lineX}px, ${lineY}px) rotate(${angle + 90}deg)`,
|
||||
transformOrigin: 'center',
|
||||
}}
|
||||
/>
|
||||
{/* Worker node */}
|
||||
<div
|
||||
className={`absolute w-6 h-6 rounded-full ${workerColor} border-2 ${workerBorder} flex items-center justify-center text-white text-xs font-bold cursor-pointer hover:scale-110 transition-transform`}
|
||||
style={{
|
||||
left: '50%',
|
||||
top: '50%',
|
||||
transform: `translate(-50%, -50%) translate(${x}px, ${y}px)`,
|
||||
}}
|
||||
title={`${worker.friendly_name}\nStatus: ${isDecommissioning ? 'Stopping after current task' : isBusy ? `Working on task #${worker.current_task_id}` : 'Idle - waiting for tasks'}\nMemory: ${worker.metadata?.memory_mb || 0} MB\nCPU: ${formatCpuTime(worker.metadata?.cpu_user_ms || 0)} user, ${formatCpuTime(worker.metadata?.cpu_system_ms || 0)} sys\nCompleted: ${worker.tasks_completed} | Failed: ${worker.tasks_failed}\nLast heartbeat: ${new Date(worker.last_heartbeat_at).toLocaleTimeString()}`}
|
||||
>
|
||||
{index + 1}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
|
||||
{/* Pod stats */}
|
||||
<div className="mt-12 text-center">
|
||||
<p className="text-xs text-gray-500">
|
||||
{busyCount}/{workers.length} busy
|
||||
</p>
|
||||
{isSelected && (
|
||||
<p className="text-xs text-purple-600 font-medium mt-1">Selected</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// Group workers by pod
|
||||
function groupWorkersByPod(workers: Worker[]): Map<string, Worker[]> {
|
||||
const pods = new Map<string, Worker[]>();
|
||||
for (const worker of workers) {
|
||||
const podName = worker.pod_name || 'Unknown';
|
||||
if (!pods.has(podName)) {
|
||||
pods.set(podName, []);
|
||||
}
|
||||
pods.get(podName)!.push(worker);
|
||||
}
|
||||
return pods;
|
||||
}
|
||||
|
||||
// Format estimated time remaining
|
||||
function formatEstimatedTime(hours: number): string {
|
||||
if (hours < 1) {
|
||||
return `${Math.round(hours * 60)} minutes`;
|
||||
}
|
||||
if (hours < 24) {
|
||||
return `${hours.toFixed(1)} hours`;
|
||||
}
|
||||
const days = hours / 24;
|
||||
if (days < 7) {
|
||||
return `${days.toFixed(1)} days`;
|
||||
}
|
||||
return `${(days / 7).toFixed(1)} weeks`;
|
||||
}
|
||||
|
||||
export function WorkersDashboard() {
|
||||
const [workers, setWorkers] = useState<Worker[]>([]);
|
||||
const [tasks, setTasks] = useState<Task[]>([]);
|
||||
const [pendingTaskCount, setPendingTaskCount] = useState<number>(0);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
// K8s scaling state (added 2024-12-10)
|
||||
const [k8sReplicas, setK8sReplicas] = useState<K8sReplicas | null>(null);
|
||||
const [k8sError, setK8sError] = useState<string | null>(null);
|
||||
const [scaling, setScaling] = useState(false);
|
||||
const [targetReplicas, setTargetReplicas] = useState<number | null>(null);
|
||||
// Pod selection state
|
||||
const [selectedPod, setSelectedPod] = useState<string | null>(null);
|
||||
|
||||
// Pagination
|
||||
const [page, setPage] = useState(0);
|
||||
const workersPerPage = 15;
|
||||
|
||||
// Fetch K8s replica count (added 2024-12-10)
|
||||
const fetchK8sReplicas = useCallback(async () => {
|
||||
try {
|
||||
const res = await api.get('/api/workers/k8s/replicas');
|
||||
if (res.data.success && res.data.replicas) {
|
||||
setK8sReplicas(res.data.replicas);
|
||||
if (targetReplicas === null) {
|
||||
setTargetReplicas(res.data.replicas.desired);
|
||||
}
|
||||
setK8sError(null);
|
||||
}
|
||||
} catch (err: any) {
|
||||
// K8s not available (local dev or no RBAC)
|
||||
setK8sError(err.response?.data?.error || 'K8s not available');
|
||||
setK8sReplicas(null);
|
||||
}
|
||||
}, [targetReplicas]);
|
||||
|
||||
// Scale workers (added 2024-12-10)
|
||||
const handleScale = useCallback(async (replicas: number) => {
|
||||
if (replicas < 0 || replicas > 20) return;
|
||||
setScaling(true);
|
||||
try {
|
||||
const res = await api.post('/api/workers/k8s/scale', { replicas });
|
||||
if (res.data.success) {
|
||||
setTargetReplicas(replicas);
|
||||
// Refresh after a short delay to see the change
|
||||
setTimeout(fetchK8sReplicas, 1000);
|
||||
}
|
||||
} catch (err: any) {
|
||||
console.error('Scale error:', err);
|
||||
setK8sError(err.response?.data?.error || 'Failed to scale');
|
||||
} finally {
|
||||
setScaling(false);
|
||||
}
|
||||
}, [fetchK8sReplicas]);
|
||||
|
||||
const fetchData = useCallback(async () => {
|
||||
try {
|
||||
// Fetch workers from registry
|
||||
const workersRes = await api.get('/api/worker-registry/workers');
|
||||
|
||||
// Fetch running tasks to get current task details
|
||||
const tasksRes = await api.get('/api/tasks?status=running&limit=100');
|
||||
// Fetch workers from registry, running tasks, and task counts
|
||||
const [workersRes, tasksRes, countsRes] = await Promise.all([
|
||||
api.get('/api/worker-registry/workers'),
|
||||
api.get('/api/tasks?status=running&limit=100'),
|
||||
api.get('/api/tasks/counts'),
|
||||
]);
|
||||
|
||||
setWorkers(workersRes.data.workers || []);
|
||||
setTasks(tasksRes.data.tasks || []);
|
||||
setPendingTaskCount(countsRes.data?.pending || 0);
|
||||
setError(null);
|
||||
} catch (err: any) {
|
||||
console.error('Fetch error:', err);
|
||||
@@ -303,16 +494,51 @@ export function WorkersDashboard() {
|
||||
}
|
||||
};
|
||||
|
||||
// Decommission a worker (graceful shutdown after current task)
|
||||
const handleDecommissionWorker = async (workerId: string, friendlyName: string) => {
|
||||
if (!confirm(`Decommission ${friendlyName}? Worker will stop after completing its current task.`)) return;
|
||||
try {
|
||||
const res = await api.post(`/api/worker-registry/workers/${workerId}/decommission`, {
|
||||
reason: 'Manual decommission from admin UI'
|
||||
});
|
||||
if (res.data.success) {
|
||||
fetchData();
|
||||
}
|
||||
} catch (err: any) {
|
||||
console.error('Decommission error:', err);
|
||||
alert(err.response?.data?.error || 'Failed to decommission worker');
|
||||
}
|
||||
};
|
||||
|
||||
// Cancel decommission
|
||||
const handleCancelDecommission = async (workerId: string) => {
|
||||
try {
|
||||
await api.post(`/api/worker-registry/workers/${workerId}/cancel-decommission`);
|
||||
fetchData();
|
||||
} catch (err: any) {
|
||||
console.error('Cancel decommission error:', err);
|
||||
}
|
||||
};
|
||||
|
||||
// Add a worker by scaling up the K8s deployment
|
||||
const handleAddWorker = async () => {
|
||||
try {
|
||||
const res = await api.post('/api/workers/k8s/scale-up');
|
||||
if (res.data.success) {
|
||||
// Refresh after a short delay to see the new worker
|
||||
setTimeout(fetchData, 2000);
|
||||
}
|
||||
} catch (err: any) {
|
||||
console.error('Add worker error:', err);
|
||||
alert(err.response?.data?.error || 'Failed to add worker. K8s scaling may not be available.');
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
fetchData();
|
||||
fetchK8sReplicas(); // Added 2024-12-10
|
||||
const interval = setInterval(fetchData, 5000);
|
||||
const k8sInterval = setInterval(fetchK8sReplicas, 10000); // K8s refresh every 10s
|
||||
return () => {
|
||||
clearInterval(interval);
|
||||
clearInterval(k8sInterval);
|
||||
};
|
||||
}, [fetchData, fetchK8sReplicas]);
|
||||
return () => clearInterval(interval);
|
||||
}, [fetchData]);
|
||||
|
||||
// Paginated workers
|
||||
const paginatedWorkers = workers.slice(
|
||||
@@ -352,15 +578,9 @@ export function WorkersDashboard() {
|
||||
<h1 className="text-2xl font-bold text-gray-900">Workers</h1>
|
||||
<p className="text-gray-500 mt-1">
|
||||
{workers.length} registered workers ({busyWorkers.length} busy, {idleWorkers.length} idle)
|
||||
<span className="text-xs text-gray-400 ml-2">(auto-refresh 5s)</span>
|
||||
</p>
|
||||
</div>
|
||||
<button
|
||||
onClick={() => fetchData()}
|
||||
className="flex items-center gap-2 px-4 py-2 bg-emerald-600 text-white rounded-lg hover:bg-emerald-700 transition-colors"
|
||||
>
|
||||
<RefreshCw className="w-4 h-4" />
|
||||
Refresh
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{error && (
|
||||
@@ -369,68 +589,6 @@ export function WorkersDashboard() {
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* K8s Scaling Card (added 2024-12-10) */}
|
||||
{k8sReplicas && (
|
||||
<div className="bg-white rounded-lg border border-gray-200 p-4">
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="w-10 h-10 bg-purple-100 rounded-lg flex items-center justify-center">
|
||||
<Server className="w-5 h-5 text-purple-600" />
|
||||
</div>
|
||||
<div>
|
||||
<p className="text-sm text-gray-500">K8s Worker Pods</p>
|
||||
<p className="text-xl font-semibold">
|
||||
{k8sReplicas.current} / {k8sReplicas.desired}
|
||||
{k8sReplicas.current !== k8sReplicas.desired && (
|
||||
<span className="text-sm font-normal text-yellow-600 ml-2">scaling...</span>
|
||||
)}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<button
|
||||
onClick={() => handleScale((targetReplicas || k8sReplicas.desired) - 1)}
|
||||
disabled={scaling || (targetReplicas || k8sReplicas.desired) <= 0}
|
||||
className="w-8 h-8 flex items-center justify-center bg-gray-100 text-gray-700 rounded-lg hover:bg-gray-200 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
|
||||
title="Scale down"
|
||||
>
|
||||
<Minus className="w-4 h-4" />
|
||||
</button>
|
||||
<input
|
||||
type="number"
|
||||
min="0"
|
||||
max="20"
|
||||
value={targetReplicas ?? k8sReplicas.desired}
|
||||
onChange={(e) => setTargetReplicas(Math.max(0, Math.min(20, parseInt(e.target.value) || 0)))}
|
||||
onBlur={() => {
|
||||
if (targetReplicas !== null && targetReplicas !== k8sReplicas.desired) {
|
||||
handleScale(targetReplicas);
|
||||
}
|
||||
}}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === 'Enter' && targetReplicas !== null && targetReplicas !== k8sReplicas.desired) {
|
||||
handleScale(targetReplicas);
|
||||
}
|
||||
}}
|
||||
className="w-16 text-center border border-gray-300 rounded-lg px-2 py-1 text-lg font-semibold"
|
||||
/>
|
||||
<button
|
||||
onClick={() => handleScale((targetReplicas || k8sReplicas.desired) + 1)}
|
||||
disabled={scaling || (targetReplicas || k8sReplicas.desired) >= 20}
|
||||
className="w-8 h-8 flex items-center justify-center bg-gray-100 text-gray-700 rounded-lg hover:bg-gray-200 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
|
||||
title="Scale up"
|
||||
>
|
||||
<Plus className="w-4 h-4" />
|
||||
</button>
|
||||
{scaling && <Loader2 className="w-4 h-4 text-purple-600 animate-spin ml-2" />}
|
||||
</div>
|
||||
</div>
|
||||
{k8sError && (
|
||||
<p className="text-xs text-red-500 mt-2">{k8sError}</p>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Stats Cards */}
|
||||
<div className="grid grid-cols-5 gap-4">
|
||||
<div className="bg-white rounded-lg border border-gray-200 p-4">
|
||||
@@ -490,6 +648,197 @@ export function WorkersDashboard() {
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Estimated Completion Time Card */}
|
||||
{pendingTaskCount > 0 && activeWorkers.length > 0 && (() => {
|
||||
// Calculate average task rate across all workers
|
||||
const totalHoursUp = activeWorkers.reduce((sum, w) => {
|
||||
if (!w.started_at) return sum;
|
||||
const start = new Date(w.started_at);
|
||||
const now = new Date();
|
||||
return sum + (now.getTime() - start.getTime()) / (1000 * 60 * 60);
|
||||
}, 0);
|
||||
|
||||
const totalTasksDone = totalCompleted + totalFailed;
|
||||
const avgTasksPerHour = totalHoursUp > 0.1 ? totalTasksDone / totalHoursUp : 0;
|
||||
const estimatedHours = avgTasksPerHour > 0 ? pendingTaskCount / avgTasksPerHour : null;
|
||||
|
||||
return (
|
||||
<div className="bg-gradient-to-r from-amber-50 to-orange-50 rounded-lg border border-amber-200 p-4">
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="w-10 h-10 bg-amber-100 rounded-lg flex items-center justify-center">
|
||||
<Clock className="w-5 h-5 text-amber-600" />
|
||||
</div>
|
||||
<div>
|
||||
<p className="text-sm text-amber-700 font-medium">Estimated Time to Complete Queue</p>
|
||||
<p className="text-2xl font-bold text-amber-900">
|
||||
{estimatedHours !== null ? formatEstimatedTime(estimatedHours) : 'Calculating...'}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="text-right text-sm text-amber-700">
|
||||
<p><span className="font-semibold">{pendingTaskCount}</span> pending tasks</p>
|
||||
<p><span className="font-semibold">{activeWorkers.length}</span> active workers</p>
|
||||
{avgTasksPerHour > 0 && (
|
||||
<p className="text-xs text-amber-600 mt-1">
|
||||
~{avgTasksPerHour.toFixed(1)} tasks/hour
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
})()}
|
||||
|
||||
{/* Worker Pods Visualization */}
|
||||
<div className="bg-white rounded-lg border border-gray-200 overflow-hidden">
|
||||
<div className="px-4 py-3 border-b border-gray-200 bg-gray-50">
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<h3 className="text-sm font-semibold text-gray-900 flex items-center gap-2">
|
||||
<Zap className="w-4 h-4 text-emerald-500" />
|
||||
Worker Pods ({Array.from(groupWorkersByPod(workers)).length} pods, {activeWorkers.length} workers)
|
||||
</h3>
|
||||
<p className="text-xs text-gray-500 mt-0.5">
|
||||
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-emerald-500"></span> idle</span>
|
||||
<span className="mx-2">|</span>
|
||||
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-blue-500"></span> busy</span>
|
||||
<span className="mx-2">|</span>
|
||||
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-yellow-500"></span> mixed</span>
|
||||
<span className="mx-2">|</span>
|
||||
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-orange-500"></span> stopping</span>
|
||||
</p>
|
||||
</div>
|
||||
<div className="text-sm text-gray-500">
|
||||
{busyWorkers.length} busy, {activeWorkers.length - busyWorkers.length} idle
|
||||
{selectedPod && (
|
||||
<button
|
||||
onClick={() => setSelectedPod(null)}
|
||||
className="ml-3 text-xs text-purple-600 hover:text-purple-800 underline"
|
||||
>
|
||||
Clear selection
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{workers.length === 0 ? (
|
||||
<div className="px-4 py-12 text-center text-gray-500">
|
||||
<Users className="w-12 h-12 mx-auto mb-3 text-gray-300" />
|
||||
<p className="font-medium">No worker pods running</p>
|
||||
<p className="text-xs mt-1">Start pods to process tasks from the queue</p>
|
||||
</div>
|
||||
) : (
|
||||
<div className="p-6">
|
||||
<div className="flex flex-wrap justify-center gap-8">
|
||||
{Array.from(groupWorkersByPod(workers)).map(([podName, podWorkers]) => (
|
||||
<PodVisualization
|
||||
key={podName}
|
||||
podName={podName}
|
||||
workers={podWorkers}
|
||||
isSelected={selectedPod === podName}
|
||||
onSelect={() => setSelectedPod(selectedPod === podName ? null : podName)}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Selected Pod Control Panel */}
|
||||
{selectedPod && (() => {
|
||||
const podWorkers = groupWorkersByPod(workers).get(selectedPod) || [];
|
||||
const busyInPod = podWorkers.filter(w => w.current_task_id !== null).length;
|
||||
const idleInPod = podWorkers.filter(w => w.current_task_id === null && !w.decommission_requested).length;
|
||||
const stoppingInPod = podWorkers.filter(w => w.decommission_requested).length;
|
||||
|
||||
return (
|
||||
<div className="mt-6 border-t border-gray-200 pt-6">
|
||||
<div className="bg-purple-50 rounded-lg border border-purple-200 p-4">
|
||||
<div className="flex items-center justify-between mb-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="w-10 h-10 bg-purple-100 rounded-lg flex items-center justify-center">
|
||||
<Server className="w-5 h-5 text-purple-600" />
|
||||
</div>
|
||||
<div>
|
||||
<h4 className="font-semibold text-purple-900">{selectedPod}</h4>
|
||||
<p className="text-xs text-purple-600">
|
||||
{podWorkers.length} workers: {busyInPod} busy, {idleInPod} idle{stoppingInPod > 0 && `, ${stoppingInPod} stopping`}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Worker list in selected pod */}
|
||||
<div className="space-y-2">
|
||||
{podWorkers.map((worker) => {
|
||||
const isBusy = worker.current_task_id !== null;
|
||||
const isDecommissioning = worker.decommission_requested;
|
||||
|
||||
return (
|
||||
<div key={worker.id} className="flex items-center justify-between bg-white rounded-lg px-3 py-2 border border-purple-100">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className={`w-8 h-8 rounded-full flex items-center justify-center text-white text-sm font-bold ${
|
||||
isDecommissioning ? 'bg-orange-500' :
|
||||
isBusy ? 'bg-blue-500' : 'bg-emerald-500'
|
||||
}`}>
|
||||
{worker.friendly_name?.charAt(0) || '?'}
|
||||
</div>
|
||||
<div>
|
||||
<p className="text-sm font-medium text-gray-900">{worker.friendly_name}</p>
|
||||
<p className="text-xs text-gray-500">
|
||||
{isDecommissioning ? (
|
||||
<span className="text-orange-600">Stopping after current task...</span>
|
||||
) : isBusy ? (
|
||||
<span className="text-blue-600">Working on task #{worker.current_task_id}</span>
|
||||
) : (
|
||||
<span className="text-emerald-600">Idle - ready for tasks</span>
|
||||
)}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
{isDecommissioning ? (
|
||||
<button
|
||||
onClick={() => handleCancelDecommission(worker.worker_id)}
|
||||
className="flex items-center gap-1.5 px-3 py-1.5 text-sm bg-white border border-gray-300 text-gray-700 rounded-lg hover:bg-gray-50 transition-colors"
|
||||
title="Cancel decommission"
|
||||
>
|
||||
<Undo2 className="w-4 h-4" />
|
||||
Cancel
|
||||
</button>
|
||||
) : (
|
||||
<button
|
||||
onClick={() => handleDecommissionWorker(worker.worker_id, worker.friendly_name)}
|
||||
className="flex items-center gap-1.5 px-3 py-1.5 text-sm bg-orange-100 text-orange-700 rounded-lg hover:bg-orange-200 transition-colors"
|
||||
title={isBusy ? 'Worker will stop after completing current task' : 'Remove idle worker'}
|
||||
>
|
||||
<PowerOff className="w-4 h-4" />
|
||||
{isBusy ? 'Stop after task' : 'Remove'}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
|
||||
{/* Add Worker button */}
|
||||
<div className="mt-4 pt-4 border-t border-purple-200">
|
||||
<button
|
||||
onClick={handleAddWorker}
|
||||
className="flex items-center gap-1.5 px-3 py-2 text-sm bg-emerald-100 text-emerald-700 rounded-lg hover:bg-emerald-200 transition-colors"
|
||||
>
|
||||
<Plus className="w-4 h-4" />
|
||||
Add Worker
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
})()}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Workers Table */}
|
||||
<div className="bg-white rounded-lg border border-gray-200 overflow-hidden">
|
||||
<div className="px-4 py-3 border-b border-gray-200 bg-gray-50 flex items-center justify-between">
|
||||
@@ -532,10 +881,10 @@ export function WorkersDashboard() {
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Worker</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Role</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Status</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Exit Location</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Current Task</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Resources</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Tasks</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Duration</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Utilization</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Throughput</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Heartbeat</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase"></th>
|
||||
</tr>
|
||||
@@ -550,16 +899,29 @@ export function WorkersDashboard() {
|
||||
<tr key={worker.id} className="hover:bg-gray-50">
|
||||
<td className="px-4 py-3">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className={`w-10 h-10 rounded-full flex items-center justify-center text-white font-bold text-sm ${
|
||||
<div className={`w-10 h-10 rounded-full flex items-center justify-center text-white font-bold text-sm relative ${
|
||||
worker.decommission_requested ? 'bg-orange-500' :
|
||||
worker.health_status === 'offline' ? 'bg-gray-400' :
|
||||
worker.health_status === 'stale' ? 'bg-yellow-500' :
|
||||
worker.health_status === 'busy' ? 'bg-blue-500' :
|
||||
'bg-emerald-500'
|
||||
}`}>
|
||||
{worker.friendly_name?.charAt(0) || '?'}
|
||||
{worker.decommission_requested && (
|
||||
<div className="absolute -top-1 -right-1 w-4 h-4 bg-red-500 rounded-full flex items-center justify-center">
|
||||
<PowerOff className="w-2.5 h-2.5 text-white" />
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<div>
|
||||
<p className="font-medium text-gray-900">{worker.friendly_name}</p>
|
||||
<p className="font-medium text-gray-900 flex items-center gap-1.5">
|
||||
{worker.friendly_name}
|
||||
{worker.decommission_requested && (
|
||||
<span className="text-xs text-orange-600 bg-orange-100 px-1.5 py-0.5 rounded" title={worker.decommission_reason || 'Pending decommission'}>
|
||||
stopping
|
||||
</span>
|
||||
)}
|
||||
</p>
|
||||
<p className="text-xs text-gray-400 font-mono">{worker.worker_id.slice(0, 20)}...</p>
|
||||
</div>
|
||||
</div>
|
||||
@@ -571,45 +933,10 @@ export function WorkersDashboard() {
|
||||
<HealthBadge status={worker.status} healthStatus={worker.health_status} />
|
||||
</td>
|
||||
<td className="px-4 py-3">
|
||||
{(() => {
|
||||
const loc = worker.metadata?.proxy_location;
|
||||
if (!loc) {
|
||||
return <span className="text-gray-400 text-sm">-</span>;
|
||||
}
|
||||
const parts = [loc.city, loc.state, loc.country].filter(Boolean);
|
||||
if (parts.length === 0) {
|
||||
return loc.isRotating ? (
|
||||
<span className="text-xs text-purple-600 font-medium" title="Rotating proxy - exit location varies per request">
|
||||
Rotating
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-gray-400 text-sm">Unknown</span>
|
||||
);
|
||||
}
|
||||
return (
|
||||
<div className="flex items-center gap-1.5" title={loc.timezone || ''}>
|
||||
<MapPin className="w-3 h-3 text-gray-400" />
|
||||
<span className="text-sm text-gray-700">
|
||||
{parts.join(', ')}
|
||||
</span>
|
||||
{loc.isRotating && (
|
||||
<span className="text-xs text-purple-500" title="Rotating proxy">*</span>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
})()}
|
||||
<ResourceBadge worker={worker} />
|
||||
</td>
|
||||
<td className="px-4 py-3">
|
||||
{worker.current_task_id ? (
|
||||
<div>
|
||||
<span className="text-sm text-gray-900">Task #{worker.current_task_id}</span>
|
||||
{currentTask?.dispensary_name && (
|
||||
<p className="text-xs text-gray-500">{currentTask.dispensary_name}</p>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<span className="text-gray-400 text-sm">Idle</span>
|
||||
)}
|
||||
<TaskCountBadge worker={worker} tasks={tasks} />
|
||||
</td>
|
||||
<td className="px-4 py-3">
|
||||
{currentTask?.started_at ? (
|
||||
|
||||
36
k8s/scraper-rbac.yaml
Normal file
36
k8s/scraper-rbac.yaml
Normal file
@@ -0,0 +1,36 @@
|
||||
# RBAC configuration for scraper pod to control worker scaling
|
||||
# Allows the scraper to read and scale the scraper-worker statefulset
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: scraper-sa
|
||||
namespace: dispensary-scraper
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: worker-scaler
|
||||
namespace: dispensary-scraper
|
||||
rules:
|
||||
# Allow reading deployment and statefulset status
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["deployments", "statefulsets"]
|
||||
verbs: ["get", "list"]
|
||||
# Allow scaling deployments and statefulsets
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["deployments/scale", "statefulsets/scale"]
|
||||
verbs: ["get", "patch", "update"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: scraper-worker-scaler
|
||||
namespace: dispensary-scraper
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: scraper-sa
|
||||
namespace: dispensary-scraper
|
||||
roleRef:
|
||||
kind: Role
|
||||
name: worker-scaler
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
@@ -25,6 +25,7 @@ spec:
|
||||
labels:
|
||||
app: scraper
|
||||
spec:
|
||||
serviceAccountName: scraper-sa
|
||||
imagePullSecrets:
|
||||
- name: regcred
|
||||
containers:
|
||||
|
||||
Reference in New Issue
Block a user