Compare commits
1 Commits
fix/api-se
...
fix/ci-fil
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cdab44c757 |
194
.woodpecker.yml
194
.woodpecker.yml
@@ -1,194 +0,0 @@
|
|||||||
steps:
|
|
||||||
# ===========================================
|
|
||||||
# PR VALIDATION: Parallel type checks (PRs only)
|
|
||||||
# ===========================================
|
|
||||||
typecheck-backend:
|
|
||||||
image: code.cannabrands.app/creationshop/node:20
|
|
||||||
commands:
|
|
||||||
- cd backend
|
|
||||||
- npm ci --prefer-offline
|
|
||||||
- npx tsc --noEmit
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
event: pull_request
|
|
||||||
|
|
||||||
typecheck-cannaiq:
|
|
||||||
image: code.cannabrands.app/creationshop/node:20
|
|
||||||
commands:
|
|
||||||
- cd cannaiq
|
|
||||||
- npm ci --prefer-offline
|
|
||||||
- npx tsc --noEmit
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
event: pull_request
|
|
||||||
|
|
||||||
typecheck-findadispo:
|
|
||||||
image: code.cannabrands.app/creationshop/node:20
|
|
||||||
commands:
|
|
||||||
- cd findadispo/frontend
|
|
||||||
- npm ci --prefer-offline
|
|
||||||
- npx tsc --noEmit 2>/dev/null || true
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
event: pull_request
|
|
||||||
|
|
||||||
typecheck-findagram:
|
|
||||||
image: code.cannabrands.app/creationshop/node:20
|
|
||||||
commands:
|
|
||||||
- cd findagram/frontend
|
|
||||||
- npm ci --prefer-offline
|
|
||||||
- npx tsc --noEmit 2>/dev/null || true
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
event: pull_request
|
|
||||||
|
|
||||||
# ===========================================
|
|
||||||
# AUTO-MERGE: Merge PR after all checks pass
|
|
||||||
# ===========================================
|
|
||||||
auto-merge:
|
|
||||||
image: alpine:latest
|
|
||||||
environment:
|
|
||||||
GITEA_TOKEN:
|
|
||||||
from_secret: gitea_token
|
|
||||||
commands:
|
|
||||||
- apk add --no-cache curl
|
|
||||||
- |
|
|
||||||
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
|
|
||||||
curl -s -X POST \
|
|
||||||
-H "Authorization: token $GITEA_TOKEN" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{"Do":"merge"}' \
|
|
||||||
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
|
||||||
depends_on:
|
|
||||||
- typecheck-backend
|
|
||||||
- typecheck-cannaiq
|
|
||||||
- typecheck-findadispo
|
|
||||||
- typecheck-findagram
|
|
||||||
when:
|
|
||||||
event: pull_request
|
|
||||||
|
|
||||||
# ===========================================
|
|
||||||
# MASTER DEPLOY: Parallel Docker builds
|
|
||||||
# NOTE: cache_from/cache_to removed due to plugin bug splitting on commas
|
|
||||||
# ===========================================
|
|
||||||
docker-backend:
|
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
|
||||||
settings:
|
|
||||||
registry: code.cannabrands.app
|
|
||||||
repo: code.cannabrands.app/creationshop/dispensary-scraper
|
|
||||||
tags:
|
|
||||||
- latest
|
|
||||||
- ${CI_COMMIT_SHA:0:8}
|
|
||||||
dockerfile: backend/Dockerfile
|
|
||||||
context: backend
|
|
||||||
username:
|
|
||||||
from_secret: registry_username
|
|
||||||
password:
|
|
||||||
from_secret: registry_password
|
|
||||||
platforms: linux/amd64
|
|
||||||
provenance: false
|
|
||||||
build_args:
|
|
||||||
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
|
|
||||||
APP_GIT_SHA: ${CI_COMMIT_SHA}
|
|
||||||
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
|
|
||||||
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
branch: master
|
|
||||||
event: push
|
|
||||||
|
|
||||||
docker-cannaiq:
|
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
|
||||||
settings:
|
|
||||||
registry: code.cannabrands.app
|
|
||||||
repo: code.cannabrands.app/creationshop/cannaiq-frontend
|
|
||||||
tags:
|
|
||||||
- latest
|
|
||||||
- ${CI_COMMIT_SHA:0:8}
|
|
||||||
dockerfile: cannaiq/Dockerfile
|
|
||||||
context: cannaiq
|
|
||||||
username:
|
|
||||||
from_secret: registry_username
|
|
||||||
password:
|
|
||||||
from_secret: registry_password
|
|
||||||
platforms: linux/amd64
|
|
||||||
provenance: false
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
branch: master
|
|
||||||
event: push
|
|
||||||
|
|
||||||
docker-findadispo:
|
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
|
||||||
settings:
|
|
||||||
registry: code.cannabrands.app
|
|
||||||
repo: code.cannabrands.app/creationshop/findadispo-frontend
|
|
||||||
tags:
|
|
||||||
- latest
|
|
||||||
- ${CI_COMMIT_SHA:0:8}
|
|
||||||
dockerfile: findadispo/frontend/Dockerfile
|
|
||||||
context: findadispo/frontend
|
|
||||||
username:
|
|
||||||
from_secret: registry_username
|
|
||||||
password:
|
|
||||||
from_secret: registry_password
|
|
||||||
platforms: linux/amd64
|
|
||||||
provenance: false
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
branch: master
|
|
||||||
event: push
|
|
||||||
|
|
||||||
docker-findagram:
|
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
|
||||||
settings:
|
|
||||||
registry: code.cannabrands.app
|
|
||||||
repo: code.cannabrands.app/creationshop/findagram-frontend
|
|
||||||
tags:
|
|
||||||
- latest
|
|
||||||
- ${CI_COMMIT_SHA:0:8}
|
|
||||||
dockerfile: findagram/frontend/Dockerfile
|
|
||||||
context: findagram/frontend
|
|
||||||
username:
|
|
||||||
from_secret: registry_username
|
|
||||||
password:
|
|
||||||
from_secret: registry_password
|
|
||||||
platforms: linux/amd64
|
|
||||||
provenance: false
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
branch: master
|
|
||||||
event: push
|
|
||||||
|
|
||||||
# ===========================================
|
|
||||||
# STAGE 3: Deploy and Run Migrations
|
|
||||||
# ===========================================
|
|
||||||
deploy:
|
|
||||||
image: bitnami/kubectl:latest
|
|
||||||
environment:
|
|
||||||
KUBECONFIG_CONTENT:
|
|
||||||
from_secret: kubeconfig_data
|
|
||||||
commands:
|
|
||||||
- mkdir -p ~/.kube
|
|
||||||
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
|
||||||
- chmod 600 ~/.kube/config
|
|
||||||
# Deploy backend first
|
|
||||||
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
|
||||||
# Note: Migrations run automatically at startup via auto-migrate
|
|
||||||
# Deploy remaining services
|
|
||||||
# Resilience: ensure workers are scaled up if at 0
|
|
||||||
- REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
|
|
||||||
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
|
||||||
depends_on:
|
|
||||||
- docker-backend
|
|
||||||
- docker-cannaiq
|
|
||||||
- docker-findadispo
|
|
||||||
- docker-findagram
|
|
||||||
when:
|
|
||||||
branch: master
|
|
||||||
event: push
|
|
||||||
@@ -1,38 +1,46 @@
|
|||||||
steps:
|
steps:
|
||||||
# ===========================================
|
# ===========================================
|
||||||
# PR VALIDATION: Only typecheck changed projects
|
# PR VALIDATION: Parallel type checks (PRs only)
|
||||||
# ===========================================
|
# ===========================================
|
||||||
typecheck-backend:
|
typecheck-backend:
|
||||||
image: code.cannabrands.app/creationshop/node:20
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
commands:
|
commands:
|
||||||
- npm config set cache /npm-cache/backend --global
|
|
||||||
- cd backend
|
- cd backend
|
||||||
- npm ci --prefer-offline
|
- npm ci --prefer-offline
|
||||||
- npx tsc --noEmit
|
- npx tsc --noEmit
|
||||||
volumes:
|
|
||||||
- npm-cache:/npm-cache
|
|
||||||
depends_on: []
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
event: pull_request
|
event: pull_request
|
||||||
path:
|
|
||||||
include: ['backend/**']
|
|
||||||
|
|
||||||
typecheck-cannaiq:
|
typecheck-cannaiq:
|
||||||
image: code.cannabrands.app/creationshop/node:20
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
commands:
|
commands:
|
||||||
- npm config set cache /npm-cache/cannaiq --global
|
|
||||||
- cd cannaiq
|
- cd cannaiq
|
||||||
- npm ci --prefer-offline
|
- npm ci --prefer-offline
|
||||||
- npx tsc --noEmit
|
- npx tsc --noEmit
|
||||||
volumes:
|
|
||||||
- npm-cache:/npm-cache
|
|
||||||
depends_on: []
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
event: pull_request
|
event: pull_request
|
||||||
path:
|
|
||||||
include: ['cannaiq/**']
|
|
||||||
|
|
||||||
# findadispo/findagram typechecks skipped - they have || true anyway
|
typecheck-findadispo:
|
||||||
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
|
commands:
|
||||||
|
- cd findadispo/frontend
|
||||||
|
- npm ci --prefer-offline
|
||||||
|
- npx tsc --noEmit 2>/dev/null || true
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
|
typecheck-findagram:
|
||||||
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
|
commands:
|
||||||
|
- cd findagram/frontend
|
||||||
|
- npm ci --prefer-offline
|
||||||
|
- npx tsc --noEmit 2>/dev/null || true
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
# ===========================================
|
# ===========================================
|
||||||
# AUTO-MERGE: Merge PR after all checks pass
|
# AUTO-MERGE: Merge PR after all checks pass
|
||||||
@@ -54,6 +62,8 @@ steps:
|
|||||||
depends_on:
|
depends_on:
|
||||||
- typecheck-backend
|
- typecheck-backend
|
||||||
- typecheck-cannaiq
|
- typecheck-cannaiq
|
||||||
|
- typecheck-findadispo
|
||||||
|
- typecheck-findagram
|
||||||
when:
|
when:
|
||||||
event: pull_request
|
event: pull_request
|
||||||
|
|
||||||
@@ -76,8 +86,6 @@ steps:
|
|||||||
from_secret: registry_password
|
from_secret: registry_password
|
||||||
platforms: linux/amd64
|
platforms: linux/amd64
|
||||||
provenance: false
|
provenance: false
|
||||||
cache_from: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache
|
|
||||||
cache_to: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache,mode=max
|
|
||||||
build_args:
|
build_args:
|
||||||
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
|
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
|
||||||
APP_GIT_SHA: ${CI_COMMIT_SHA}
|
APP_GIT_SHA: ${CI_COMMIT_SHA}
|
||||||
@@ -104,8 +112,6 @@ steps:
|
|||||||
from_secret: registry_password
|
from_secret: registry_password
|
||||||
platforms: linux/amd64
|
platforms: linux/amd64
|
||||||
provenance: false
|
provenance: false
|
||||||
cache_from: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache
|
|
||||||
cache_to: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache,mode=max
|
|
||||||
depends_on: []
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: master
|
||||||
@@ -127,8 +133,6 @@ steps:
|
|||||||
from_secret: registry_password
|
from_secret: registry_password
|
||||||
platforms: linux/amd64
|
platforms: linux/amd64
|
||||||
provenance: false
|
provenance: false
|
||||||
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache
|
|
||||||
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache,mode=max
|
|
||||||
depends_on: []
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: master
|
||||||
@@ -150,15 +154,38 @@ steps:
|
|||||||
from_secret: registry_password
|
from_secret: registry_password
|
||||||
platforms: linux/amd64
|
platforms: linux/amd64
|
||||||
provenance: false
|
provenance: false
|
||||||
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache
|
|
||||||
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache,mode=max
|
|
||||||
depends_on: []
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: master
|
||||||
event: push
|
event: push
|
||||||
|
|
||||||
# ===========================================
|
# ===========================================
|
||||||
# STAGE 3: Deploy and Run Migrations
|
# STAGE 3: Run Database Migrations (before deploy)
|
||||||
|
# ===========================================
|
||||||
|
migrate:
|
||||||
|
image: code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8}
|
||||||
|
environment:
|
||||||
|
CANNAIQ_DB_HOST:
|
||||||
|
from_secret: db_host
|
||||||
|
CANNAIQ_DB_PORT:
|
||||||
|
from_secret: db_port
|
||||||
|
CANNAIQ_DB_NAME:
|
||||||
|
from_secret: db_name
|
||||||
|
CANNAIQ_DB_USER:
|
||||||
|
from_secret: db_user
|
||||||
|
CANNAIQ_DB_PASS:
|
||||||
|
from_secret: db_pass
|
||||||
|
commands:
|
||||||
|
- cd /app
|
||||||
|
- node dist/db/migrate.js
|
||||||
|
depends_on:
|
||||||
|
- docker-backend
|
||||||
|
when:
|
||||||
|
branch: master
|
||||||
|
event: push
|
||||||
|
|
||||||
|
# ===========================================
|
||||||
|
# STAGE 4: Deploy (after migrations)
|
||||||
# ===========================================
|
# ===========================================
|
||||||
deploy:
|
deploy:
|
||||||
image: bitnami/kubectl:latest
|
image: bitnami/kubectl:latest
|
||||||
@@ -169,20 +196,15 @@ steps:
|
|||||||
- mkdir -p ~/.kube
|
- mkdir -p ~/.kube
|
||||||
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
||||||
- chmod 600 ~/.kube/config
|
- chmod 600 ~/.kube/config
|
||||||
# Deploy backend first
|
|
||||||
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
|
||||||
# Note: Migrations run automatically at startup via auto-migrate
|
|
||||||
# Deploy remaining services
|
|
||||||
# Resilience: ensure workers are scaled up if at 0
|
|
||||||
- REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
|
|
||||||
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
|
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
||||||
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
||||||
depends_on:
|
depends_on:
|
||||||
- docker-backend
|
- migrate
|
||||||
- docker-cannaiq
|
- docker-cannaiq
|
||||||
- docker-findadispo
|
- docker-findadispo
|
||||||
- docker-findagram
|
- docker-findagram
|
||||||
|
|||||||
@@ -1,175 +0,0 @@
|
|||||||
# API Security Documentation
|
|
||||||
|
|
||||||
This document describes the authentication and authorization configuration for all CannaiQ API endpoints.
|
|
||||||
|
|
||||||
## Authentication Methods
|
|
||||||
|
|
||||||
### 1. Trusted Origins (No Token Required)
|
|
||||||
|
|
||||||
Requests from trusted sources are automatically authenticated with `internal` role:
|
|
||||||
|
|
||||||
**Trusted IPs:**
|
|
||||||
- `127.0.0.1` (localhost IPv4)
|
|
||||||
- `::1` (localhost IPv6)
|
|
||||||
- `::ffff:127.0.0.1` (IPv4-mapped IPv6)
|
|
||||||
|
|
||||||
**Trusted Domains:**
|
|
||||||
- `https://cannaiq.co`
|
|
||||||
- `https://www.cannaiq.co`
|
|
||||||
- `https://findadispo.com`
|
|
||||||
- `https://www.findadispo.com`
|
|
||||||
- `https://findagram.co`
|
|
||||||
- `https://www.findagram.co`
|
|
||||||
- `http://localhost:3010`
|
|
||||||
- `http://localhost:8080`
|
|
||||||
- `http://localhost:5173`
|
|
||||||
|
|
||||||
**Trusted Patterns:**
|
|
||||||
- `*.cannabrands.app`
|
|
||||||
- `*.cannaiq.co`
|
|
||||||
|
|
||||||
**Internal Header:**
|
|
||||||
- `X-Internal-Request` header matching `INTERNAL_REQUEST_SECRET` env var
|
|
||||||
|
|
||||||
### 2. Bearer Token Authentication
|
|
||||||
|
|
||||||
External requests must include a valid token:
|
|
||||||
|
|
||||||
```
|
|
||||||
Authorization: Bearer <token>
|
|
||||||
```
|
|
||||||
|
|
||||||
**Token Types:**
|
|
||||||
- **JWT Token**: User session tokens (7-day expiry)
|
|
||||||
- **API Token**: Long-lived tokens for integrations (stored in `api_tokens` table)
|
|
||||||
|
|
||||||
## Authorization Levels
|
|
||||||
|
|
||||||
### Public (No Auth)
|
|
||||||
Routes accessible without authentication:
|
|
||||||
- `GET /health` - Health check
|
|
||||||
- `GET /api/health/*` - Comprehensive health endpoints
|
|
||||||
- `GET /outbound-ip` - Server's outbound IP
|
|
||||||
- `GET /api/v1/deals` - Public deals endpoint
|
|
||||||
|
|
||||||
### Authenticated (Trusted Origin or Token)
|
|
||||||
Routes requiring authentication but no specific role:
|
|
||||||
|
|
||||||
| Route | Description |
|
|
||||||
|-------|-------------|
|
|
||||||
| `/api/payloads/*` | Raw crawl payload access |
|
|
||||||
| `/api/workers/*` | Worker monitoring |
|
|
||||||
| `/api/worker-registry/*` | Worker registration and heartbeats |
|
|
||||||
| `/api/stores/*` | Store CRUD |
|
|
||||||
| `/api/products/*` | Product listing |
|
|
||||||
| `/api/dispensaries/*` | Dispensary data |
|
|
||||||
|
|
||||||
### Admin Only (Requires `admin` or `superadmin` role)
|
|
||||||
Routes restricted to administrators:
|
|
||||||
|
|
||||||
| Route | Description |
|
|
||||||
|-------|-------------|
|
|
||||||
| `/api/job-queue/*` | Job queue management |
|
|
||||||
| `/api/k8s/*` | Kubernetes control (scaling) |
|
|
||||||
| `/api/pipeline/*` | Pipeline stage transitions |
|
|
||||||
| `/api/tasks/*` | Task queue management |
|
|
||||||
| `/api/admin/orchestrator/*` | Orchestrator dashboard |
|
|
||||||
| `/api/admin/trusted-origins/*` | Manage trusted origins |
|
|
||||||
| `/api/admin/debug/*` | Debug endpoints |
|
|
||||||
|
|
||||||
**Note:** The `internal` role (localhost/trusted origins) bypasses role checks, granting automatic admin access for local development and internal services.
|
|
||||||
|
|
||||||
## Endpoint Security Matrix
|
|
||||||
|
|
||||||
| Endpoint Group | Auth Required | Role Required | Notes |
|
|
||||||
|----------------|---------------|---------------|-------|
|
|
||||||
| `/api/payloads/*` | Yes | None | Query API for raw crawl data |
|
|
||||||
| `/api/job-queue/*` | Yes | admin | Legacy job queue (deprecated) |
|
|
||||||
| `/api/workers/*` | Yes | None | Worker status monitoring |
|
|
||||||
| `/api/worker-registry/*` | Yes | None | Workers register via trusted IPs |
|
|
||||||
| `/api/k8s/*` | Yes | admin | K8s scaling controls |
|
|
||||||
| `/api/pipeline/*` | Yes | admin | Store pipeline transitions |
|
|
||||||
| `/api/tasks/*` | Yes | admin | Task queue CRUD |
|
|
||||||
| `/api/admin/orchestrator/*` | Yes | admin | Orchestrator metrics/alerts |
|
|
||||||
| `/api/admin/trusted-origins/*` | Yes | admin | Auth bypass management |
|
|
||||||
| `/api/v1/*` | Varies | Varies | Public API (per-endpoint) |
|
|
||||||
| `/api/consumer/*` | Varies | Varies | Consumer features |
|
|
||||||
|
|
||||||
## Implementation Details
|
|
||||||
|
|
||||||
### Middleware Stack
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
// Authentication middleware - validates token or trusted origin
|
|
||||||
import { authMiddleware } from '../auth/middleware';
|
|
||||||
|
|
||||||
// Role requirement middleware - checks user role
|
|
||||||
import { requireRole } from '../auth/middleware';
|
|
||||||
|
|
||||||
// Usage in route files:
|
|
||||||
router.use(authMiddleware); // All routes need auth
|
|
||||||
router.use(requireRole('admin', 'superadmin')); // Admin-only routes
|
|
||||||
```
|
|
||||||
|
|
||||||
### Auth Middleware Flow
|
|
||||||
|
|
||||||
```
|
|
||||||
Request → Check Bearer Token
|
|
||||||
├─ Valid JWT → Set user from token → Continue
|
|
||||||
├─ Valid API Token → Set user as api_token role → Continue
|
|
||||||
└─ No Token → Check Trusted Origin
|
|
||||||
├─ Trusted → Set user as internal role → Continue
|
|
||||||
└─ Not Trusted → 401 Unauthorized
|
|
||||||
```
|
|
||||||
|
|
||||||
### Role Check Flow
|
|
||||||
|
|
||||||
```
|
|
||||||
Request → authMiddleware → requireRole('admin')
|
|
||||||
├─ role === 'internal' → Continue (bypass)
|
|
||||||
├─ role in ['admin', 'superadmin'] → Continue
|
|
||||||
└─ else → 403 Forbidden
|
|
||||||
```
|
|
||||||
|
|
||||||
## Worker Pod Authentication
|
|
||||||
|
|
||||||
Worker pods (in Kubernetes) authenticate via:
|
|
||||||
|
|
||||||
1. **Internal IP**: Pods communicate via cluster IPs, which are trusted
|
|
||||||
2. **Internal Header**: Optional `X-Internal-Request` header for explicit trust
|
|
||||||
|
|
||||||
Endpoints used by workers:
|
|
||||||
- `POST /api/worker-registry/register` - Report for duty
|
|
||||||
- `POST /api/worker-registry/heartbeat` - Stay alive
|
|
||||||
- `POST /api/worker-registry/deregister` - Graceful shutdown
|
|
||||||
- `POST /api/worker-registry/task-completed` - Report task completion
|
|
||||||
|
|
||||||
## API Token Management
|
|
||||||
|
|
||||||
API tokens are managed via:
|
|
||||||
- `GET /api/api-tokens` - List tokens
|
|
||||||
- `POST /api/api-tokens` - Create token
|
|
||||||
- `DELETE /api/api-tokens/:id` - Revoke token
|
|
||||||
|
|
||||||
Token properties:
|
|
||||||
- `token`: The bearer token value
|
|
||||||
- `name`: Human-readable identifier
|
|
||||||
- `rate_limit`: Requests per minute
|
|
||||||
- `expires_at`: Optional expiration
|
|
||||||
- `active`: Enable/disable toggle
|
|
||||||
- `allowed_endpoints`: Optional endpoint restrictions
|
|
||||||
|
|
||||||
## Security Best Practices
|
|
||||||
|
|
||||||
1. **Never expose tokens in URLs** - Use Authorization header
|
|
||||||
2. **Use HTTPS in production** - All traffic encrypted
|
|
||||||
3. **Rotate API tokens periodically** - Set expiration dates
|
|
||||||
4. **Monitor rate limits** - Prevent abuse
|
|
||||||
5. **Audit access logs** - Track API usage via `api_usage_logs` table
|
|
||||||
|
|
||||||
## Related Files
|
|
||||||
|
|
||||||
- `src/auth/middleware.ts` - Auth middleware implementation
|
|
||||||
- `src/routes/api-tokens.ts` - Token management endpoints
|
|
||||||
- `src/middleware/apiTokenTracker.ts` - Usage tracking
|
|
||||||
- `src/middleware/trustedDomains.ts` - Domain trust markers
|
|
||||||
@@ -1,218 +0,0 @@
|
|||||||
# CannaiQ Backend Codebase Map
|
|
||||||
|
|
||||||
**Last Updated:** 2025-12-12
|
|
||||||
**Purpose:** Help Claude and developers understand which code is current vs deprecated
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Quick Reference: What to Use
|
|
||||||
|
|
||||||
### For Crawling/Scraping
|
|
||||||
| Task | Use This | NOT This |
|
|
||||||
|------|----------|----------|
|
|
||||||
| Fetch products | `src/tasks/handlers/payload-fetch.ts` | `src/hydration/*` |
|
|
||||||
| Process products | `src/tasks/handlers/product-refresh.ts` | `src/scraper-v2/*` |
|
|
||||||
| GraphQL client | `src/platforms/dutchie/client.ts` | `src/dutchie-az/services/graphql-client.ts` |
|
|
||||||
| Worker system | `src/tasks/task-worker.ts` | `src/dutchie-az/services/worker.ts` |
|
|
||||||
|
|
||||||
### For Database
|
|
||||||
| Task | Use This | NOT This |
|
|
||||||
|------|----------|----------|
|
|
||||||
| Get DB pool | `src/db/pool.ts` | `src/dutchie-az/db/connection.ts` |
|
|
||||||
| Run migrations | `src/db/migrate.ts` (CLI only) | Never import at runtime |
|
|
||||||
| Query products | `store_products` table | `products`, `dutchie_products` |
|
|
||||||
| Query stores | `dispensaries` table | `stores` table |
|
|
||||||
|
|
||||||
### For Discovery
|
|
||||||
| Task | Use This |
|
|
||||||
|------|----------|
|
|
||||||
| Discover stores | `src/discovery/*.ts` |
|
|
||||||
| Run discovery | `npx tsx src/scripts/run-discovery.ts` |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Directory Status
|
|
||||||
|
|
||||||
### ACTIVE DIRECTORIES (Use These)
|
|
||||||
|
|
||||||
```
|
|
||||||
src/
|
|
||||||
├── auth/ # JWT/session auth, middleware
|
|
||||||
├── db/ # Database pool, migrations
|
|
||||||
├── discovery/ # Dutchie store discovery pipeline
|
|
||||||
├── middleware/ # Express middleware
|
|
||||||
├── multi-state/ # Multi-state query support
|
|
||||||
├── platforms/ # Platform-specific clients (Dutchie, Jane, etc)
|
|
||||||
│ └── dutchie/ # THE Dutchie client - use this one
|
|
||||||
├── routes/ # Express API routes
|
|
||||||
├── services/ # Core services (logger, scheduler, etc)
|
|
||||||
├── tasks/ # Task system (workers, handlers, scheduler)
|
|
||||||
│ └── handlers/ # Task handlers (payload_fetch, product_refresh, etc)
|
|
||||||
├── types/ # TypeScript types
|
|
||||||
└── utils/ # Utilities (storage, image processing)
|
|
||||||
```
|
|
||||||
|
|
||||||
### DEPRECATED DIRECTORIES (DO NOT USE)
|
|
||||||
|
|
||||||
```
|
|
||||||
src/
|
|
||||||
├── hydration/ # DEPRECATED - Old pipeline approach
|
|
||||||
├── scraper-v2/ # DEPRECATED - Old scraper engine
|
|
||||||
├── canonical-hydration/# DEPRECATED - Merged into tasks/handlers
|
|
||||||
├── dutchie-az/ # PARTIAL - Some parts deprecated, some active
|
|
||||||
│ ├── db/ # DEPRECATED - Use src/db/pool.ts
|
|
||||||
│ └── services/ # PARTIAL - worker.ts still runs, graphql-client.ts deprecated
|
|
||||||
├── portals/ # FUTURE - Not yet implemented
|
|
||||||
├── seo/ # PARTIAL - Settings work, templates WIP
|
|
||||||
└── system/ # DEPRECATED - Old orchestration system
|
|
||||||
```
|
|
||||||
|
|
||||||
### DEPRECATED FILES (DO NOT USE)
|
|
||||||
|
|
||||||
```
|
|
||||||
src/dutchie-az/db/connection.ts # Use src/db/pool.ts instead
|
|
||||||
src/dutchie-az/services/graphql-client.ts # Use src/platforms/dutchie/client.ts
|
|
||||||
src/hydration/*.ts # Entire directory deprecated
|
|
||||||
src/scraper-v2/*.ts # Entire directory deprecated
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Key Files Reference
|
|
||||||
|
|
||||||
### Entry Points
|
|
||||||
| File | Purpose | Status |
|
|
||||||
|------|---------|--------|
|
|
||||||
| `src/index.ts` | Main Express server | ACTIVE |
|
|
||||||
| `src/dutchie-az/services/worker.ts` | Worker process entry | ACTIVE |
|
|
||||||
| `src/tasks/task-worker.ts` | Task worker (new system) | ACTIVE |
|
|
||||||
|
|
||||||
### Dutchie Integration
|
|
||||||
| File | Purpose | Status |
|
|
||||||
|------|---------|--------|
|
|
||||||
| `src/platforms/dutchie/client.ts` | GraphQL client, hashes, curl | **PRIMARY** |
|
|
||||||
| `src/platforms/dutchie/queries.ts` | High-level query functions | ACTIVE |
|
|
||||||
| `src/platforms/dutchie/index.ts` | Re-exports | ACTIVE |
|
|
||||||
|
|
||||||
### Task Handlers
|
|
||||||
| File | Purpose | Status |
|
|
||||||
|------|---------|--------|
|
|
||||||
| `src/tasks/handlers/payload-fetch.ts` | Fetch products from Dutchie | **PRIMARY** |
|
|
||||||
| `src/tasks/handlers/product-refresh.ts` | Process payload into DB | **PRIMARY** |
|
|
||||||
| `src/tasks/handlers/menu-detection.ts` | Detect menu type | ACTIVE |
|
|
||||||
| `src/tasks/handlers/id-resolution.ts` | Resolve platform IDs | ACTIVE |
|
|
||||||
| `src/tasks/handlers/image-download.ts` | Download product images | ACTIVE |
|
|
||||||
|
|
||||||
### Database
|
|
||||||
| File | Purpose | Status |
|
|
||||||
|------|---------|--------|
|
|
||||||
| `src/db/pool.ts` | Canonical DB pool | **PRIMARY** |
|
|
||||||
| `src/db/migrate.ts` | Migration runner (CLI only) | CLI ONLY |
|
|
||||||
| `src/db/auto-migrate.ts` | Auto-run migrations on startup | ACTIVE |
|
|
||||||
|
|
||||||
### Configuration
|
|
||||||
| File | Purpose | Status |
|
|
||||||
|------|---------|--------|
|
|
||||||
| `.env` | Environment variables | ACTIVE |
|
|
||||||
| `package.json` | Dependencies | ACTIVE |
|
|
||||||
| `tsconfig.json` | TypeScript config | ACTIVE |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## GraphQL Hashes (CRITICAL)
|
|
||||||
|
|
||||||
The correct hashes are in `src/platforms/dutchie/client.ts`:
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
export const GRAPHQL_HASHES = {
|
|
||||||
FilteredProducts: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0',
|
|
||||||
GetAddressBasedDispensaryData: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b',
|
|
||||||
ConsumerDispensaries: '0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b',
|
|
||||||
GetAllCitiesByState: 'ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6',
|
|
||||||
};
|
|
||||||
```
|
|
||||||
|
|
||||||
**ALWAYS** use `Status: 'Active'` for FilteredProducts (not `null` or `'All'`).
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Scripts Reference
|
|
||||||
|
|
||||||
### Useful Scripts (in `src/scripts/`)
|
|
||||||
| Script | Purpose |
|
|
||||||
|--------|---------|
|
|
||||||
| `run-discovery.ts` | Run Dutchie discovery |
|
|
||||||
| `crawl-single-store.ts` | Test crawl a single store |
|
|
||||||
| `test-dutchie-graphql.ts` | Test GraphQL queries |
|
|
||||||
|
|
||||||
### One-Off Scripts (probably don't need)
|
|
||||||
| Script | Purpose |
|
|
||||||
|--------|---------|
|
|
||||||
| `harmonize-az-dispensaries.ts` | One-time data cleanup |
|
|
||||||
| `bootstrap-stores-for-dispensaries.ts` | One-time migration |
|
|
||||||
| `backfill-*.ts` | Historical backfill scripts |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## API Routes
|
|
||||||
|
|
||||||
### Active Routes (in `src/routes/`)
|
|
||||||
| Route File | Mount Point | Purpose |
|
|
||||||
|------------|-------------|---------|
|
|
||||||
| `auth.ts` | `/api/auth` | Login/logout/session |
|
|
||||||
| `stores.ts` | `/api/stores` | Store CRUD |
|
|
||||||
| `dashboard.ts` | `/api/dashboard` | Dashboard stats |
|
|
||||||
| `workers.ts` | `/api/workers` | Worker monitoring |
|
|
||||||
| `pipeline.ts` | `/api/pipeline` | Crawl triggers |
|
|
||||||
| `discovery.ts` | `/api/discovery` | Discovery management |
|
|
||||||
| `analytics.ts` | `/api/analytics` | Analytics queries |
|
|
||||||
| `wordpress.ts` | `/api/v1/wordpress` | WordPress plugin API |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Documentation Files
|
|
||||||
|
|
||||||
### Current Docs (in `backend/docs/`)
|
|
||||||
| Doc | Purpose | Currency |
|
|
||||||
|-----|---------|----------|
|
|
||||||
| `TASK_WORKFLOW_2024-12-10.md` | Task system architecture | CURRENT |
|
|
||||||
| `WORKER_TASK_ARCHITECTURE.md` | Worker/task design | CURRENT |
|
|
||||||
| `CRAWL_PIPELINE.md` | Crawl pipeline overview | CURRENT |
|
|
||||||
| `ORGANIC_SCRAPING_GUIDE.md` | Browser-based scraping | CURRENT |
|
|
||||||
| `CODEBASE_MAP.md` | This file | CURRENT |
|
|
||||||
| `ANALYTICS_V2_EXAMPLES.md` | Analytics API examples | CURRENT |
|
|
||||||
| `BRAND_INTELLIGENCE_API.md` | Brand API docs | CURRENT |
|
|
||||||
|
|
||||||
### Root Docs
|
|
||||||
| Doc | Purpose | Currency |
|
|
||||||
|-----|---------|----------|
|
|
||||||
| `CLAUDE.md` | Claude instructions | **PRIMARY** |
|
|
||||||
| `README.md` | Project overview | NEEDS UPDATE |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Common Mistakes to Avoid
|
|
||||||
|
|
||||||
1. **Don't use `src/hydration/`** - It's an old approach that was superseded by the task system
|
|
||||||
|
|
||||||
2. **Don't use `src/dutchie-az/db/connection.ts`** - Use `src/db/pool.ts` instead
|
|
||||||
|
|
||||||
3. **Don't import `src/db/migrate.ts` at runtime** - It will crash. Only use for CLI migrations.
|
|
||||||
|
|
||||||
4. **Don't query `stores` table** - It's empty. Use `dispensaries`.
|
|
||||||
|
|
||||||
5. **Don't query `products` table** - It's empty. Use `store_products`.
|
|
||||||
|
|
||||||
6. **Don't use wrong GraphQL hash** - Always get hash from `GRAPHQL_HASHES` in client.ts
|
|
||||||
|
|
||||||
7. **Don't use `Status: null`** - It returns 0 products. Use `Status: 'Active'`.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## When in Doubt
|
|
||||||
|
|
||||||
1. Check if the file is imported in `src/index.ts` - if not, it may be deprecated
|
|
||||||
2. Check the last modified date - older files may be stale
|
|
||||||
3. Look for `DEPRECATED` comments in the code
|
|
||||||
4. Ask: "Is there a newer version of this in `src/tasks/` or `src/platforms/`?"
|
|
||||||
5. Read the relevant doc in `docs/` before modifying code
|
|
||||||
@@ -362,148 +362,6 @@ SET status = 'pending', retry_count = retry_count + 1
|
|||||||
WHERE status = 'failed' AND retry_count < max_retries;
|
WHERE status = 'failed' AND retry_count < max_retries;
|
||||||
```
|
```
|
||||||
|
|
||||||
## Concurrent Task Processing (Added 2024-12)
|
|
||||||
|
|
||||||
Workers can now process multiple tasks concurrently within a single worker instance. This improves throughput by utilizing async I/O efficiently.
|
|
||||||
|
|
||||||
### Architecture
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────────────────────────────────────────────┐
|
|
||||||
│ Pod (K8s) │
|
|
||||||
│ │
|
|
||||||
│ ┌─────────────────────────────────────────────────────┐ │
|
|
||||||
│ │ TaskWorker │ │
|
|
||||||
│ │ │ │
|
|
||||||
│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │
|
|
||||||
│ │ │ Task 1 │ │ Task 2 │ │ Task 3 │ (concurrent)│ │
|
|
||||||
│ │ └─────────┘ └─────────┘ └─────────┘ │ │
|
|
||||||
│ │ │ │
|
|
||||||
│ │ Resource Monitor │ │
|
|
||||||
│ │ ├── Memory: 65% (threshold: 85%) │ │
|
|
||||||
│ │ ├── CPU: 45% (threshold: 90%) │ │
|
|
||||||
│ │ └── Status: Normal │ │
|
|
||||||
│ └─────────────────────────────────────────────────────┘ │
|
|
||||||
└─────────────────────────────────────────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
### Environment Variables
|
|
||||||
|
|
||||||
| Variable | Default | Description |
|
|
||||||
|----------|---------|-------------|
|
|
||||||
| `MAX_CONCURRENT_TASKS` | 3 | Maximum tasks a worker will run concurrently |
|
|
||||||
| `MEMORY_BACKOFF_THRESHOLD` | 0.85 | Back off when heap memory exceeds 85% |
|
|
||||||
| `CPU_BACKOFF_THRESHOLD` | 0.90 | Back off when CPU exceeds 90% |
|
|
||||||
| `BACKOFF_DURATION_MS` | 10000 | How long to wait when backing off (10s) |
|
|
||||||
|
|
||||||
### How It Works
|
|
||||||
|
|
||||||
1. **Main Loop**: Worker continuously tries to fill up to `MAX_CONCURRENT_TASKS`
|
|
||||||
2. **Resource Monitoring**: Before claiming a new task, worker checks memory and CPU
|
|
||||||
3. **Backoff**: If resources exceed thresholds, worker pauses and stops claiming new tasks
|
|
||||||
4. **Concurrent Execution**: Tasks run in parallel using `Promise` - they don't block each other
|
|
||||||
5. **Graceful Shutdown**: On SIGTERM/decommission, worker stops claiming but waits for active tasks
|
|
||||||
|
|
||||||
### Resource Monitoring
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
// ResourceStats interface
|
|
||||||
interface ResourceStats {
|
|
||||||
memoryPercent: number; // Current heap usage as decimal (0.0-1.0)
|
|
||||||
memoryMb: number; // Current heap used in MB
|
|
||||||
memoryTotalMb: number; // Total heap available in MB
|
|
||||||
cpuPercent: number; // CPU usage as percentage (0-100)
|
|
||||||
isBackingOff: boolean; // True if worker is in backoff state
|
|
||||||
backoffReason: string; // Why the worker is backing off
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Heartbeat Data
|
|
||||||
|
|
||||||
Workers report the following in their heartbeat:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"worker_id": "worker-abc123",
|
|
||||||
"current_task_id": 456,
|
|
||||||
"current_task_ids": [456, 457, 458],
|
|
||||||
"active_task_count": 3,
|
|
||||||
"max_concurrent_tasks": 3,
|
|
||||||
"status": "active",
|
|
||||||
"resources": {
|
|
||||||
"memory_mb": 256,
|
|
||||||
"memory_total_mb": 512,
|
|
||||||
"memory_rss_mb": 320,
|
|
||||||
"memory_percent": 50,
|
|
||||||
"cpu_user_ms": 12500,
|
|
||||||
"cpu_system_ms": 3200,
|
|
||||||
"cpu_percent": 45,
|
|
||||||
"is_backing_off": false,
|
|
||||||
"backoff_reason": null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Backoff Behavior
|
|
||||||
|
|
||||||
When resources exceed thresholds:
|
|
||||||
|
|
||||||
1. Worker logs the backoff reason:
|
|
||||||
```
|
|
||||||
[TaskWorker] MyWorker backing off: Memory at 87.3% (threshold: 85%)
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Worker stops claiming new tasks but continues existing tasks
|
|
||||||
|
|
||||||
3. After `BACKOFF_DURATION_MS`, worker rechecks resources
|
|
||||||
|
|
||||||
4. When resources return to normal:
|
|
||||||
```
|
|
||||||
[TaskWorker] MyWorker resuming normal operation
|
|
||||||
```
|
|
||||||
|
|
||||||
### UI Display
|
|
||||||
|
|
||||||
The Workers Dashboard shows:
|
|
||||||
|
|
||||||
- **Tasks Column**: `2/3 tasks` (active/max concurrent)
|
|
||||||
- **Resources Column**: Memory % and CPU % with color coding
|
|
||||||
- Green: < 50%
|
|
||||||
- Yellow: 50-74%
|
|
||||||
- Amber: 75-89%
|
|
||||||
- Red: 90%+
|
|
||||||
- **Backing Off**: Orange warning badge when worker is in backoff state
|
|
||||||
|
|
||||||
### Task Count Badge Details
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────────────────────────────┐
|
|
||||||
│ Worker: "MyWorker" │
|
|
||||||
│ Tasks: 2/3 tasks #456, #457 │
|
|
||||||
│ Resources: 🧠 65% 💻 45% │
|
|
||||||
│ Status: ● Active │
|
|
||||||
└─────────────────────────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
### Best Practices
|
|
||||||
|
|
||||||
1. **Start Conservative**: Use `MAX_CONCURRENT_TASKS=3` initially
|
|
||||||
2. **Monitor Resources**: Watch for frequent backoffs in logs
|
|
||||||
3. **Tune Per Workload**: I/O-bound tasks benefit from higher concurrency
|
|
||||||
4. **Scale Horizontally**: Add more pods rather than cranking concurrency too high
|
|
||||||
|
|
||||||
### Code References
|
|
||||||
|
|
||||||
| File | Purpose |
|
|
||||||
|------|---------|
|
|
||||||
| `src/tasks/task-worker.ts:68-71` | Concurrency environment variables |
|
|
||||||
| `src/tasks/task-worker.ts:104-111` | ResourceStats interface |
|
|
||||||
| `src/tasks/task-worker.ts:149-179` | getResourceStats() method |
|
|
||||||
| `src/tasks/task-worker.ts:184-196` | shouldBackOff() method |
|
|
||||||
| `src/tasks/task-worker.ts:462-516` | mainLoop() with concurrent claiming |
|
|
||||||
| `src/routes/worker-registry.ts:148-195` | Heartbeat endpoint handling |
|
|
||||||
| `cannaiq/src/pages/WorkersDashboard.tsx:233-305` | UI components for resources |
|
|
||||||
|
|
||||||
## Monitoring
|
## Monitoring
|
||||||
|
|
||||||
### Logs
|
### Logs
|
||||||
@@ -1,297 +0,0 @@
|
|||||||
# Organic Browser-Based Scraping Guide
|
|
||||||
|
|
||||||
**Last Updated:** 2025-12-12
|
|
||||||
**Status:** Production-ready proof of concept
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
This document describes the "organic" browser-based approach to scraping Dutchie dispensary menus. Unlike direct curl/axios requests, this method uses a real browser session to make API calls, making requests appear natural and reducing detection risk.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Why Organic Scraping?
|
|
||||||
|
|
||||||
| Approach | Detection Risk | Speed | Complexity |
|
|
||||||
|----------|---------------|-------|------------|
|
|
||||||
| Direct curl | Higher | Fast | Low |
|
|
||||||
| curl-impersonate | Medium | Fast | Medium |
|
|
||||||
| **Browser-based (organic)** | **Lowest** | Slower | Higher |
|
|
||||||
|
|
||||||
Direct curl requests can be fingerprinted via:
|
|
||||||
- TLS fingerprint (cipher suites, extensions)
|
|
||||||
- Header order and values
|
|
||||||
- Missing cookies/session data
|
|
||||||
- Request patterns
|
|
||||||
|
|
||||||
Browser-based requests inherit:
|
|
||||||
- Real Chrome TLS fingerprint
|
|
||||||
- Session cookies from page visit
|
|
||||||
- Natural header order
|
|
||||||
- JavaScript execution environment
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Implementation
|
|
||||||
|
|
||||||
### Dependencies
|
|
||||||
|
|
||||||
```bash
|
|
||||||
npm install puppeteer puppeteer-extra puppeteer-extra-plugin-stealth
|
|
||||||
```
|
|
||||||
|
|
||||||
### Core Script: `test-intercept.js`
|
|
||||||
|
|
||||||
Located at: `backend/test-intercept.js`
|
|
||||||
|
|
||||||
```javascript
|
|
||||||
const puppeteer = require('puppeteer-extra');
|
|
||||||
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
|
|
||||||
const fs = require('fs');
|
|
||||||
|
|
||||||
puppeteer.use(StealthPlugin());
|
|
||||||
|
|
||||||
async function capturePayload(config) {
|
|
||||||
const { dispensaryId, platformId, cName, outputPath } = config;
|
|
||||||
|
|
||||||
const browser = await puppeteer.launch({
|
|
||||||
headless: 'new',
|
|
||||||
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
|
||||||
});
|
|
||||||
|
|
||||||
const page = await browser.newPage();
|
|
||||||
|
|
||||||
// STEP 1: Establish session by visiting the menu
|
|
||||||
const embedUrl = `https://dutchie.com/embedded-menu/${cName}?menuType=rec`;
|
|
||||||
await page.goto(embedUrl, { waitUntil: 'networkidle2', timeout: 60000 });
|
|
||||||
|
|
||||||
// STEP 2: Fetch ALL products using GraphQL from browser context
|
|
||||||
const result = await page.evaluate(async (platformId) => {
|
|
||||||
const allProducts = [];
|
|
||||||
let pageNum = 0;
|
|
||||||
const perPage = 100;
|
|
||||||
let totalCount = 0;
|
|
||||||
const sessionId = 'browser-session-' + Date.now();
|
|
||||||
|
|
||||||
while (pageNum < 30) {
|
|
||||||
const variables = {
|
|
||||||
includeEnterpriseSpecials: false,
|
|
||||||
productsFilter: {
|
|
||||||
dispensaryId: platformId,
|
|
||||||
pricingType: 'rec',
|
|
||||||
Status: 'Active', // CRITICAL: Must be 'Active', not null
|
|
||||||
types: [],
|
|
||||||
useCache: true,
|
|
||||||
isDefaultSort: true,
|
|
||||||
sortBy: 'popularSortIdx',
|
|
||||||
sortDirection: 1,
|
|
||||||
bypassOnlineThresholds: true,
|
|
||||||
isKioskMenu: false,
|
|
||||||
removeProductsBelowOptionThresholds: false,
|
|
||||||
},
|
|
||||||
page: pageNum,
|
|
||||||
perPage: perPage,
|
|
||||||
};
|
|
||||||
|
|
||||||
const extensions = {
|
|
||||||
persistedQuery: {
|
|
||||||
version: 1,
|
|
||||||
sha256Hash: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0'
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const qs = new URLSearchParams({
|
|
||||||
operationName: 'FilteredProducts',
|
|
||||||
variables: JSON.stringify(variables),
|
|
||||||
extensions: JSON.stringify(extensions)
|
|
||||||
});
|
|
||||||
|
|
||||||
const response = await fetch(`https://dutchie.com/api-3/graphql?${qs}`, {
|
|
||||||
method: 'GET',
|
|
||||||
headers: {
|
|
||||||
'Accept': 'application/json',
|
|
||||||
'content-type': 'application/json',
|
|
||||||
'x-dutchie-session': sessionId,
|
|
||||||
'apollographql-client-name': 'Marketplace (production)',
|
|
||||||
},
|
|
||||||
credentials: 'include'
|
|
||||||
});
|
|
||||||
|
|
||||||
const json = await response.json();
|
|
||||||
const data = json?.data?.filteredProducts;
|
|
||||||
if (!data?.products) break;
|
|
||||||
|
|
||||||
allProducts.push(...data.products);
|
|
||||||
if (pageNum === 0) totalCount = data.queryInfo?.totalCount || 0;
|
|
||||||
if (allProducts.length >= totalCount) break;
|
|
||||||
|
|
||||||
pageNum++;
|
|
||||||
await new Promise(r => setTimeout(r, 200)); // Polite delay
|
|
||||||
}
|
|
||||||
|
|
||||||
return { products: allProducts, totalCount };
|
|
||||||
}, platformId);
|
|
||||||
|
|
||||||
await browser.close();
|
|
||||||
|
|
||||||
// STEP 3: Save payload
|
|
||||||
const payload = {
|
|
||||||
dispensaryId,
|
|
||||||
platformId,
|
|
||||||
cName,
|
|
||||||
fetchedAt: new Date().toISOString(),
|
|
||||||
productCount: result.products.length,
|
|
||||||
products: result.products,
|
|
||||||
};
|
|
||||||
|
|
||||||
fs.writeFileSync(outputPath, JSON.stringify(payload, null, 2));
|
|
||||||
return payload;
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Critical Parameters
|
|
||||||
|
|
||||||
### GraphQL Hash (FilteredProducts)
|
|
||||||
|
|
||||||
```
|
|
||||||
ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0
|
|
||||||
```
|
|
||||||
|
|
||||||
**WARNING:** Using the wrong hash returns HTTP 400.
|
|
||||||
|
|
||||||
### Status Parameter
|
|
||||||
|
|
||||||
| Value | Result |
|
|
||||||
|-------|--------|
|
|
||||||
| `'Active'` | Returns in-stock products (1019 in test) |
|
|
||||||
| `null` | Returns 0 products |
|
|
||||||
| `'All'` | Returns HTTP 400 |
|
|
||||||
|
|
||||||
**ALWAYS use `Status: 'Active'`**
|
|
||||||
|
|
||||||
### Required Headers
|
|
||||||
|
|
||||||
```javascript
|
|
||||||
{
|
|
||||||
'Accept': 'application/json',
|
|
||||||
'content-type': 'application/json',
|
|
||||||
'x-dutchie-session': 'unique-session-id',
|
|
||||||
'apollographql-client-name': 'Marketplace (production)',
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Endpoint
|
|
||||||
|
|
||||||
```
|
|
||||||
https://dutchie.com/api-3/graphql
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Performance Benchmarks
|
|
||||||
|
|
||||||
Test store: AZ-Deeply-Rooted (1019 products)
|
|
||||||
|
|
||||||
| Metric | Value |
|
|
||||||
|--------|-------|
|
|
||||||
| Total products | 1019 |
|
|
||||||
| Time | 18.5 seconds |
|
|
||||||
| Payload size | 11.8 MB |
|
|
||||||
| Pages fetched | 11 (100 per page) |
|
|
||||||
| Success rate | 100% |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Payload Format
|
|
||||||
|
|
||||||
The output matches the existing `payload-fetch.ts` handler format:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"dispensaryId": 123,
|
|
||||||
"platformId": "6405ef617056e8014d79101b",
|
|
||||||
"cName": "AZ-Deeply-Rooted",
|
|
||||||
"fetchedAt": "2025-12-12T05:05:19.837Z",
|
|
||||||
"productCount": 1019,
|
|
||||||
"products": [
|
|
||||||
{
|
|
||||||
"id": "6927508db4851262f629a869",
|
|
||||||
"Name": "Product Name",
|
|
||||||
"brand": { "name": "Brand Name", ... },
|
|
||||||
"type": "Flower",
|
|
||||||
"THC": "25%",
|
|
||||||
"Prices": [...],
|
|
||||||
"Options": [...],
|
|
||||||
...
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Integration Points
|
|
||||||
|
|
||||||
### As a Task Handler
|
|
||||||
|
|
||||||
The organic approach can be integrated as an alternative to curl-based fetching:
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
// In src/tasks/handlers/organic-payload-fetch.ts
|
|
||||||
export async function handleOrganicPayloadFetch(ctx: TaskContext): Promise<TaskResult> {
|
|
||||||
// Use puppeteer-based capture
|
|
||||||
// Save to same payload storage
|
|
||||||
// Queue product_refresh task
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Worker Configuration
|
|
||||||
|
|
||||||
Add to job_schedules:
|
|
||||||
```sql
|
|
||||||
INSERT INTO job_schedules (name, role, cron_expression)
|
|
||||||
VALUES ('organic_product_crawl', 'organic_payload_fetch', '0 */6 * * *');
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
### HTTP 400 Bad Request
|
|
||||||
- Check hash is correct: `ee29c060...`
|
|
||||||
- Verify Status is `'Active'` (string, not null)
|
|
||||||
|
|
||||||
### 0 Products Returned
|
|
||||||
- Status was likely `null` or `'All'` - use `'Active'`
|
|
||||||
- Check platformId is valid MongoDB ObjectId
|
|
||||||
|
|
||||||
### Session Not Established
|
|
||||||
- Increase timeout on initial page.goto()
|
|
||||||
- Check cName is valid (matches embedded-menu URL)
|
|
||||||
|
|
||||||
### Detection/Blocking
|
|
||||||
- StealthPlugin should handle most cases
|
|
||||||
- Add random delays between pages
|
|
||||||
- Use headless: 'new' (not true/false)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Files Reference
|
|
||||||
|
|
||||||
| File | Purpose |
|
|
||||||
|------|---------|
|
|
||||||
| `backend/test-intercept.js` | Proof of concept script |
|
|
||||||
| `backend/src/platforms/dutchie/client.ts` | GraphQL hashes, curl implementation |
|
|
||||||
| `backend/src/tasks/handlers/payload-fetch.ts` | Current curl-based handler |
|
|
||||||
| `backend/src/utils/payload-storage.ts` | Payload save/load utilities |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## See Also
|
|
||||||
|
|
||||||
- `DUTCHIE_CRAWL_WORKFLOW.md` - Full crawl pipeline documentation
|
|
||||||
- `TASK_WORKFLOW_2024-12-10.md` - Task system architecture
|
|
||||||
- `CLAUDE.md` - Project rules and constraints
|
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
# ARCHIVED DOCUMENTATION
|
|
||||||
|
|
||||||
**WARNING: These docs may be outdated or inaccurate.**
|
|
||||||
|
|
||||||
The code has evolved significantly. These docs are kept for historical reference only.
|
|
||||||
|
|
||||||
## What to Use Instead
|
|
||||||
|
|
||||||
**The single source of truth is:**
|
|
||||||
- `CLAUDE.md` (root) - Essential rules and quick reference
|
|
||||||
- `docs/CODEBASE_MAP.md` - Current file/directory reference
|
|
||||||
|
|
||||||
## Why Archive?
|
|
||||||
|
|
||||||
These docs were written during development iterations and may reference:
|
|
||||||
- Old file paths that no longer exist
|
|
||||||
- Deprecated approaches (hydration, scraper-v2)
|
|
||||||
- APIs that have changed
|
|
||||||
- Database schemas that evolved
|
|
||||||
|
|
||||||
## If You Need Details
|
|
||||||
|
|
||||||
1. First check CODEBASE_MAP.md for current file locations
|
|
||||||
2. Then read the actual source code
|
|
||||||
3. Only use archive docs as a last resort for historical context
|
|
||||||
@@ -1,77 +0,0 @@
|
|||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: scraper-worker
|
|
||||||
namespace: dispensary-scraper
|
|
||||||
labels:
|
|
||||||
app: scraper-worker
|
|
||||||
spec:
|
|
||||||
clusterIP: None # Headless service required for StatefulSet
|
|
||||||
selector:
|
|
||||||
app: scraper-worker
|
|
||||||
ports:
|
|
||||||
- port: 3010
|
|
||||||
name: http
|
|
||||||
---
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: StatefulSet
|
|
||||||
metadata:
|
|
||||||
name: scraper-worker
|
|
||||||
namespace: dispensary-scraper
|
|
||||||
spec:
|
|
||||||
serviceName: scraper-worker
|
|
||||||
replicas: 8
|
|
||||||
podManagementPolicy: Parallel # Start all pods at once
|
|
||||||
updateStrategy:
|
|
||||||
type: OnDelete # Pods only update when manually deleted - no automatic restarts
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: scraper-worker
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: scraper-worker
|
|
||||||
spec:
|
|
||||||
terminationGracePeriodSeconds: 60
|
|
||||||
imagePullSecrets:
|
|
||||||
- name: regcred
|
|
||||||
containers:
|
|
||||||
- name: worker
|
|
||||||
image: code.cannabrands.app/creationshop/dispensary-scraper:latest
|
|
||||||
imagePullPolicy: Always
|
|
||||||
command: ["node"]
|
|
||||||
args: ["dist/tasks/task-worker.js"]
|
|
||||||
env:
|
|
||||||
- name: WORKER_MODE
|
|
||||||
value: "true"
|
|
||||||
- name: POD_NAME
|
|
||||||
valueFrom:
|
|
||||||
fieldRef:
|
|
||||||
fieldPath: metadata.name
|
|
||||||
- name: MAX_CONCURRENT_TASKS
|
|
||||||
value: "50"
|
|
||||||
- name: API_BASE_URL
|
|
||||||
value: http://scraper
|
|
||||||
- name: NODE_OPTIONS
|
|
||||||
value: --max-old-space-size=1500
|
|
||||||
envFrom:
|
|
||||||
- configMapRef:
|
|
||||||
name: scraper-config
|
|
||||||
- secretRef:
|
|
||||||
name: scraper-secrets
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
cpu: 100m
|
|
||||||
memory: 1Gi
|
|
||||||
limits:
|
|
||||||
cpu: 500m
|
|
||||||
memory: 2Gi
|
|
||||||
livenessProbe:
|
|
||||||
exec:
|
|
||||||
command:
|
|
||||||
- /bin/sh
|
|
||||||
- -c
|
|
||||||
- pgrep -f 'task-worker' > /dev/null
|
|
||||||
initialDelaySeconds: 10
|
|
||||||
periodSeconds: 30
|
|
||||||
failureThreshold: 3
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
-- Migration: Worker Commands Table
|
|
||||||
-- Purpose: Store commands for workers (decommission, etc.)
|
|
||||||
-- Workers poll this table after each task to check for commands
|
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS worker_commands (
|
|
||||||
id SERIAL PRIMARY KEY,
|
|
||||||
worker_id TEXT NOT NULL,
|
|
||||||
command TEXT NOT NULL, -- 'decommission', 'pause', 'resume'
|
|
||||||
reason TEXT,
|
|
||||||
issued_by TEXT,
|
|
||||||
issued_at TIMESTAMPTZ DEFAULT NOW(),
|
|
||||||
acknowledged_at TIMESTAMPTZ,
|
|
||||||
executed_at TIMESTAMPTZ,
|
|
||||||
status TEXT DEFAULT 'pending' -- 'pending', 'acknowledged', 'executed', 'cancelled'
|
|
||||||
);
|
|
||||||
|
|
||||||
-- Index for worker lookups
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_worker_commands_worker_id ON worker_commands(worker_id);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_worker_commands_pending ON worker_commands(worker_id, status) WHERE status = 'pending';
|
|
||||||
|
|
||||||
-- Add decommission_requested column to worker_registry for quick checks
|
|
||||||
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested BOOLEAN DEFAULT FALSE;
|
|
||||||
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_reason TEXT;
|
|
||||||
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested_at TIMESTAMPTZ;
|
|
||||||
|
|
||||||
-- Comment
|
|
||||||
COMMENT ON TABLE worker_commands IS 'Commands issued to workers (decommission after task, pause, etc.)';
|
|
||||||
@@ -1,88 +0,0 @@
|
|||||||
-- Migration 083: Discovery Run Tracking
|
|
||||||
-- Tracks progress of store discovery runs step-by-step
|
|
||||||
|
|
||||||
-- Main discovery runs table
|
|
||||||
CREATE TABLE IF NOT EXISTS discovery_runs (
|
|
||||||
id SERIAL PRIMARY KEY,
|
|
||||||
platform VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
|
||||||
status VARCHAR(20) NOT NULL DEFAULT 'running', -- running, completed, failed
|
|
||||||
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
||||||
finished_at TIMESTAMPTZ,
|
|
||||||
task_id INTEGER REFERENCES worker_task_queue(id),
|
|
||||||
|
|
||||||
-- Totals
|
|
||||||
states_total INTEGER DEFAULT 0,
|
|
||||||
states_completed INTEGER DEFAULT 0,
|
|
||||||
locations_discovered INTEGER DEFAULT 0,
|
|
||||||
locations_promoted INTEGER DEFAULT 0,
|
|
||||||
new_store_ids INTEGER[] DEFAULT '{}',
|
|
||||||
|
|
||||||
-- Error info
|
|
||||||
error_message TEXT,
|
|
||||||
|
|
||||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
||||||
);
|
|
||||||
|
|
||||||
-- Per-state progress within a run
|
|
||||||
CREATE TABLE IF NOT EXISTS discovery_run_states (
|
|
||||||
id SERIAL PRIMARY KEY,
|
|
||||||
run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
|
|
||||||
state_code VARCHAR(2) NOT NULL,
|
|
||||||
status VARCHAR(20) NOT NULL DEFAULT 'pending', -- pending, running, completed, failed
|
|
||||||
started_at TIMESTAMPTZ,
|
|
||||||
finished_at TIMESTAMPTZ,
|
|
||||||
|
|
||||||
-- Results
|
|
||||||
cities_found INTEGER DEFAULT 0,
|
|
||||||
locations_found INTEGER DEFAULT 0,
|
|
||||||
locations_upserted INTEGER DEFAULT 0,
|
|
||||||
new_dispensary_ids INTEGER[] DEFAULT '{}',
|
|
||||||
|
|
||||||
-- Error info
|
|
||||||
error_message TEXT,
|
|
||||||
|
|
||||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
||||||
|
|
||||||
UNIQUE(run_id, state_code)
|
|
||||||
);
|
|
||||||
|
|
||||||
-- Step-by-step log for detailed progress tracking
|
|
||||||
CREATE TABLE IF NOT EXISTS discovery_run_steps (
|
|
||||||
id SERIAL PRIMARY KEY,
|
|
||||||
run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
|
|
||||||
state_code VARCHAR(2),
|
|
||||||
step_name VARCHAR(100) NOT NULL,
|
|
||||||
status VARCHAR(20) NOT NULL DEFAULT 'started', -- started, completed, failed
|
|
||||||
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
||||||
finished_at TIMESTAMPTZ,
|
|
||||||
|
|
||||||
-- Details (JSON for flexibility)
|
|
||||||
details JSONB DEFAULT '{}',
|
|
||||||
|
|
||||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
||||||
);
|
|
||||||
|
|
||||||
-- Indexes for querying
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_discovery_runs_status ON discovery_runs(status);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_discovery_runs_platform ON discovery_runs(platform);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_discovery_runs_started_at ON discovery_runs(started_at DESC);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_discovery_run_states_run_id ON discovery_run_states(run_id);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_discovery_run_steps_run_id ON discovery_run_steps(run_id);
|
|
||||||
|
|
||||||
-- View for latest run status per platform
|
|
||||||
CREATE OR REPLACE VIEW v_latest_discovery_runs AS
|
|
||||||
SELECT DISTINCT ON (platform)
|
|
||||||
id,
|
|
||||||
platform,
|
|
||||||
status,
|
|
||||||
started_at,
|
|
||||||
finished_at,
|
|
||||||
states_total,
|
|
||||||
states_completed,
|
|
||||||
locations_discovered,
|
|
||||||
locations_promoted,
|
|
||||||
array_length(new_store_ids, 1) as new_stores_count,
|
|
||||||
error_message,
|
|
||||||
EXTRACT(EPOCH FROM (COALESCE(finished_at, NOW()) - started_at)) as duration_seconds
|
|
||||||
FROM discovery_runs
|
|
||||||
ORDER BY platform, started_at DESC;
|
|
||||||
@@ -1,253 +0,0 @@
|
|||||||
-- Migration 084: Dual Transport Preflight System
|
|
||||||
-- Workers run both curl and http (Puppeteer) preflights on startup
|
|
||||||
-- Tasks can require a specific transport method
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 1: Add preflight columns to worker_registry
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
-- Preflight status for curl/axios transport (proxy-based)
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS preflight_curl_status VARCHAR(20) DEFAULT 'pending';
|
|
||||||
|
|
||||||
-- Preflight status for http/Puppeteer transport (browser-based)
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS preflight_http_status VARCHAR(20) DEFAULT 'pending';
|
|
||||||
|
|
||||||
-- Timestamps for when each preflight completed
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS preflight_curl_at TIMESTAMPTZ;
|
|
||||||
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS preflight_http_at TIMESTAMPTZ;
|
|
||||||
|
|
||||||
-- Error messages for failed preflights
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS preflight_curl_error TEXT;
|
|
||||||
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS preflight_http_error TEXT;
|
|
||||||
|
|
||||||
-- Response time for successful preflights (ms)
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS preflight_curl_ms INTEGER;
|
|
||||||
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS preflight_http_ms INTEGER;
|
|
||||||
|
|
||||||
-- Constraints for preflight status values
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
DROP CONSTRAINT IF EXISTS valid_preflight_curl_status;
|
|
||||||
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD CONSTRAINT valid_preflight_curl_status
|
|
||||||
CHECK (preflight_curl_status IN ('pending', 'passed', 'failed', 'skipped'));
|
|
||||||
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
DROP CONSTRAINT IF EXISTS valid_preflight_http_status;
|
|
||||||
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD CONSTRAINT valid_preflight_http_status
|
|
||||||
CHECK (preflight_http_status IN ('pending', 'passed', 'failed', 'skipped'));
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 2: Add method column to worker_tasks
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
-- Transport method requirement for the task
|
|
||||||
-- NULL = no preference (any worker can claim)
|
|
||||||
-- 'curl' = requires curl/axios transport (proxy-based, fast)
|
|
||||||
-- 'http' = requires http/Puppeteer transport (browser-based, anti-detect)
|
|
||||||
ALTER TABLE worker_tasks
|
|
||||||
ADD COLUMN IF NOT EXISTS method VARCHAR(10);
|
|
||||||
|
|
||||||
-- Constraint for valid method values
|
|
||||||
ALTER TABLE worker_tasks
|
|
||||||
DROP CONSTRAINT IF EXISTS valid_task_method;
|
|
||||||
|
|
||||||
ALTER TABLE worker_tasks
|
|
||||||
ADD CONSTRAINT valid_task_method
|
|
||||||
CHECK (method IS NULL OR method IN ('curl', 'http'));
|
|
||||||
|
|
||||||
-- Index for method-based task claiming
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_worker_tasks_method
|
|
||||||
ON worker_tasks(method)
|
|
||||||
WHERE status = 'pending';
|
|
||||||
|
|
||||||
-- Set default method for all existing pending tasks to 'http'
|
|
||||||
-- ALL current tasks require Puppeteer/browser-based transport
|
|
||||||
UPDATE worker_tasks
|
|
||||||
SET method = 'http'
|
|
||||||
WHERE method IS NULL;
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 3: Update claim_task function for method compatibility
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION claim_task(
|
|
||||||
p_role VARCHAR(50),
|
|
||||||
p_worker_id VARCHAR(100),
|
|
||||||
p_curl_passed BOOLEAN DEFAULT TRUE,
|
|
||||||
p_http_passed BOOLEAN DEFAULT FALSE
|
|
||||||
) RETURNS worker_tasks AS $$
|
|
||||||
DECLARE
|
|
||||||
claimed_task worker_tasks;
|
|
||||||
BEGIN
|
|
||||||
UPDATE worker_tasks
|
|
||||||
SET
|
|
||||||
status = 'claimed',
|
|
||||||
worker_id = p_worker_id,
|
|
||||||
claimed_at = NOW(),
|
|
||||||
updated_at = NOW()
|
|
||||||
WHERE id = (
|
|
||||||
SELECT id FROM worker_tasks
|
|
||||||
WHERE role = p_role
|
|
||||||
AND status = 'pending'
|
|
||||||
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
|
|
||||||
-- Method compatibility: worker must have passed the required preflight
|
|
||||||
AND (
|
|
||||||
method IS NULL -- No preference, any worker can claim
|
|
||||||
OR (method = 'curl' AND p_curl_passed = TRUE)
|
|
||||||
OR (method = 'http' AND p_http_passed = TRUE)
|
|
||||||
)
|
|
||||||
-- Exclude stores that already have an active task
|
|
||||||
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
|
|
||||||
SELECT dispensary_id FROM worker_tasks
|
|
||||||
WHERE status IN ('claimed', 'running')
|
|
||||||
AND dispensary_id IS NOT NULL
|
|
||||||
))
|
|
||||||
ORDER BY priority DESC, created_at ASC
|
|
||||||
LIMIT 1
|
|
||||||
FOR UPDATE SKIP LOCKED
|
|
||||||
)
|
|
||||||
RETURNING * INTO claimed_task;
|
|
||||||
|
|
||||||
RETURN claimed_task;
|
|
||||||
END;
|
|
||||||
$$ LANGUAGE plpgsql;
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 4: Update v_active_workers view
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
DROP VIEW IF EXISTS v_active_workers;
|
|
||||||
|
|
||||||
CREATE VIEW v_active_workers AS
|
|
||||||
SELECT
|
|
||||||
wr.id,
|
|
||||||
wr.worker_id,
|
|
||||||
wr.friendly_name,
|
|
||||||
wr.role,
|
|
||||||
wr.status,
|
|
||||||
wr.pod_name,
|
|
||||||
wr.hostname,
|
|
||||||
wr.started_at,
|
|
||||||
wr.last_heartbeat_at,
|
|
||||||
wr.last_task_at,
|
|
||||||
wr.tasks_completed,
|
|
||||||
wr.tasks_failed,
|
|
||||||
wr.current_task_id,
|
|
||||||
-- Preflight status
|
|
||||||
wr.preflight_curl_status,
|
|
||||||
wr.preflight_http_status,
|
|
||||||
wr.preflight_curl_at,
|
|
||||||
wr.preflight_http_at,
|
|
||||||
wr.preflight_curl_error,
|
|
||||||
wr.preflight_http_error,
|
|
||||||
wr.preflight_curl_ms,
|
|
||||||
wr.preflight_http_ms,
|
|
||||||
-- Computed fields
|
|
||||||
EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
|
|
||||||
CASE
|
|
||||||
WHEN wr.status = 'offline' THEN 'offline'
|
|
||||||
WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
|
||||||
WHEN wr.current_task_id IS NOT NULL THEN 'busy'
|
|
||||||
ELSE 'ready'
|
|
||||||
END as health_status,
|
|
||||||
-- Capability flags (can this worker handle curl/http tasks?)
|
|
||||||
(wr.preflight_curl_status = 'passed') as can_curl,
|
|
||||||
(wr.preflight_http_status = 'passed') as can_http
|
|
||||||
FROM worker_registry wr
|
|
||||||
WHERE wr.status != 'terminated'
|
|
||||||
ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 5: View for task queue with method info
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
DROP VIEW IF EXISTS v_task_history;
|
|
||||||
|
|
||||||
CREATE VIEW v_task_history AS
|
|
||||||
SELECT
|
|
||||||
t.id,
|
|
||||||
t.role,
|
|
||||||
t.dispensary_id,
|
|
||||||
d.name as dispensary_name,
|
|
||||||
t.platform,
|
|
||||||
t.status,
|
|
||||||
t.priority,
|
|
||||||
t.method,
|
|
||||||
t.worker_id,
|
|
||||||
t.scheduled_for,
|
|
||||||
t.claimed_at,
|
|
||||||
t.started_at,
|
|
||||||
t.completed_at,
|
|
||||||
t.error_message,
|
|
||||||
t.retry_count,
|
|
||||||
t.created_at,
|
|
||||||
EXTRACT(EPOCH FROM (t.completed_at - t.started_at)) as duration_sec
|
|
||||||
FROM worker_tasks t
|
|
||||||
LEFT JOIN dispensaries d ON d.id = t.dispensary_id
|
|
||||||
ORDER BY t.created_at DESC;
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 6: Helper function to update worker preflight status
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION update_worker_preflight(
|
|
||||||
p_worker_id VARCHAR(100),
|
|
||||||
p_transport VARCHAR(10), -- 'curl' or 'http'
|
|
||||||
p_status VARCHAR(20), -- 'passed', 'failed', 'skipped'
|
|
||||||
p_response_ms INTEGER DEFAULT NULL,
|
|
||||||
p_error TEXT DEFAULT NULL
|
|
||||||
) RETURNS VOID AS $$
|
|
||||||
BEGIN
|
|
||||||
IF p_transport = 'curl' THEN
|
|
||||||
UPDATE worker_registry
|
|
||||||
SET
|
|
||||||
preflight_curl_status = p_status,
|
|
||||||
preflight_curl_at = NOW(),
|
|
||||||
preflight_curl_ms = p_response_ms,
|
|
||||||
preflight_curl_error = p_error,
|
|
||||||
updated_at = NOW()
|
|
||||||
WHERE worker_id = p_worker_id;
|
|
||||||
ELSIF p_transport = 'http' THEN
|
|
||||||
UPDATE worker_registry
|
|
||||||
SET
|
|
||||||
preflight_http_status = p_status,
|
|
||||||
preflight_http_at = NOW(),
|
|
||||||
preflight_http_ms = p_response_ms,
|
|
||||||
preflight_http_error = p_error,
|
|
||||||
updated_at = NOW()
|
|
||||||
WHERE worker_id = p_worker_id;
|
|
||||||
END IF;
|
|
||||||
END;
|
|
||||||
$$ LANGUAGE plpgsql;
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- Comments
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
COMMENT ON COLUMN worker_registry.preflight_curl_status IS 'Status of curl/axios preflight: pending, passed, failed, skipped';
|
|
||||||
COMMENT ON COLUMN worker_registry.preflight_http_status IS 'Status of http/Puppeteer preflight: pending, passed, failed, skipped';
|
|
||||||
COMMENT ON COLUMN worker_registry.preflight_curl_at IS 'When curl preflight completed';
|
|
||||||
COMMENT ON COLUMN worker_registry.preflight_http_at IS 'When http preflight completed';
|
|
||||||
COMMENT ON COLUMN worker_registry.preflight_curl_error IS 'Error message if curl preflight failed';
|
|
||||||
COMMENT ON COLUMN worker_registry.preflight_http_error IS 'Error message if http preflight failed';
|
|
||||||
COMMENT ON COLUMN worker_registry.preflight_curl_ms IS 'Response time of successful curl preflight (ms)';
|
|
||||||
COMMENT ON COLUMN worker_registry.preflight_http_ms IS 'Response time of successful http preflight (ms)';
|
|
||||||
|
|
||||||
COMMENT ON COLUMN worker_tasks.method IS 'Transport method required: NULL=any, curl=proxy-based, http=browser-based';
|
|
||||||
|
|
||||||
COMMENT ON FUNCTION claim_task IS 'Atomically claim a task, respecting method requirements and per-store locking';
|
|
||||||
COMMENT ON FUNCTION update_worker_preflight IS 'Update a workers preflight status for a given transport';
|
|
||||||
@@ -1,168 +0,0 @@
|
|||||||
-- Migration 085: Add IP and fingerprint columns for preflight reporting
|
|
||||||
-- These columns were missing from migration 084
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 1: Add IP address columns to worker_registry
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
-- IP address detected during curl/axios preflight
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS curl_ip VARCHAR(45);
|
|
||||||
|
|
||||||
-- IP address detected during http/Puppeteer preflight
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS http_ip VARCHAR(45);
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 2: Add fingerprint data column
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
-- Browser fingerprint data captured during Puppeteer preflight
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS fingerprint_data JSONB;
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 3: Add combined preflight status/timestamp for convenience
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
-- Overall preflight status (computed from both transports)
|
|
||||||
-- Values: 'pending', 'passed', 'partial', 'failed'
|
|
||||||
-- - 'pending': neither transport tested
|
|
||||||
-- - 'passed': both transports passed (or http passed for browser-only)
|
|
||||||
-- - 'partial': at least one passed
|
|
||||||
-- - 'failed': no transport passed
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS preflight_status VARCHAR(20) DEFAULT 'pending';
|
|
||||||
|
|
||||||
-- Most recent preflight completion timestamp
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS preflight_at TIMESTAMPTZ;
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 4: Update function to set preflight status
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION update_worker_preflight(
|
|
||||||
p_worker_id VARCHAR(100),
|
|
||||||
p_transport VARCHAR(10), -- 'curl' or 'http'
|
|
||||||
p_status VARCHAR(20), -- 'passed', 'failed', 'skipped'
|
|
||||||
p_ip VARCHAR(45) DEFAULT NULL,
|
|
||||||
p_response_ms INTEGER DEFAULT NULL,
|
|
||||||
p_error TEXT DEFAULT NULL,
|
|
||||||
p_fingerprint JSONB DEFAULT NULL
|
|
||||||
) RETURNS VOID AS $$
|
|
||||||
DECLARE
|
|
||||||
v_curl_status VARCHAR(20);
|
|
||||||
v_http_status VARCHAR(20);
|
|
||||||
v_overall_status VARCHAR(20);
|
|
||||||
BEGIN
|
|
||||||
IF p_transport = 'curl' THEN
|
|
||||||
UPDATE worker_registry
|
|
||||||
SET
|
|
||||||
preflight_curl_status = p_status,
|
|
||||||
preflight_curl_at = NOW(),
|
|
||||||
preflight_curl_ms = p_response_ms,
|
|
||||||
preflight_curl_error = p_error,
|
|
||||||
curl_ip = p_ip,
|
|
||||||
updated_at = NOW()
|
|
||||||
WHERE worker_id = p_worker_id;
|
|
||||||
ELSIF p_transport = 'http' THEN
|
|
||||||
UPDATE worker_registry
|
|
||||||
SET
|
|
||||||
preflight_http_status = p_status,
|
|
||||||
preflight_http_at = NOW(),
|
|
||||||
preflight_http_ms = p_response_ms,
|
|
||||||
preflight_http_error = p_error,
|
|
||||||
http_ip = p_ip,
|
|
||||||
fingerprint_data = COALESCE(p_fingerprint, fingerprint_data),
|
|
||||||
updated_at = NOW()
|
|
||||||
WHERE worker_id = p_worker_id;
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
-- Update overall preflight status
|
|
||||||
SELECT preflight_curl_status, preflight_http_status
|
|
||||||
INTO v_curl_status, v_http_status
|
|
||||||
FROM worker_registry
|
|
||||||
WHERE worker_id = p_worker_id;
|
|
||||||
|
|
||||||
-- Compute overall status
|
|
||||||
IF v_curl_status = 'passed' AND v_http_status = 'passed' THEN
|
|
||||||
v_overall_status := 'passed';
|
|
||||||
ELSIF v_curl_status = 'passed' OR v_http_status = 'passed' THEN
|
|
||||||
v_overall_status := 'partial';
|
|
||||||
ELSIF v_curl_status = 'failed' OR v_http_status = 'failed' THEN
|
|
||||||
v_overall_status := 'failed';
|
|
||||||
ELSE
|
|
||||||
v_overall_status := 'pending';
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
UPDATE worker_registry
|
|
||||||
SET
|
|
||||||
preflight_status = v_overall_status,
|
|
||||||
preflight_at = NOW()
|
|
||||||
WHERE worker_id = p_worker_id;
|
|
||||||
END;
|
|
||||||
$$ LANGUAGE plpgsql;
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 5: Update v_active_workers view
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
DROP VIEW IF EXISTS v_active_workers;
|
|
||||||
|
|
||||||
CREATE VIEW v_active_workers AS
|
|
||||||
SELECT
|
|
||||||
wr.id,
|
|
||||||
wr.worker_id,
|
|
||||||
wr.friendly_name,
|
|
||||||
wr.role,
|
|
||||||
wr.status,
|
|
||||||
wr.pod_name,
|
|
||||||
wr.hostname,
|
|
||||||
wr.started_at,
|
|
||||||
wr.last_heartbeat_at,
|
|
||||||
wr.last_task_at,
|
|
||||||
wr.tasks_completed,
|
|
||||||
wr.tasks_failed,
|
|
||||||
wr.current_task_id,
|
|
||||||
-- IP addresses from preflights
|
|
||||||
wr.curl_ip,
|
|
||||||
wr.http_ip,
|
|
||||||
-- Combined preflight status
|
|
||||||
wr.preflight_status,
|
|
||||||
wr.preflight_at,
|
|
||||||
-- Detailed preflight status per transport
|
|
||||||
wr.preflight_curl_status,
|
|
||||||
wr.preflight_http_status,
|
|
||||||
wr.preflight_curl_at,
|
|
||||||
wr.preflight_http_at,
|
|
||||||
wr.preflight_curl_error,
|
|
||||||
wr.preflight_http_error,
|
|
||||||
wr.preflight_curl_ms,
|
|
||||||
wr.preflight_http_ms,
|
|
||||||
-- Fingerprint data
|
|
||||||
wr.fingerprint_data,
|
|
||||||
-- Computed fields
|
|
||||||
EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
|
|
||||||
CASE
|
|
||||||
WHEN wr.status = 'offline' THEN 'offline'
|
|
||||||
WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
|
||||||
WHEN wr.current_task_id IS NOT NULL THEN 'busy'
|
|
||||||
ELSE 'ready'
|
|
||||||
END as health_status,
|
|
||||||
-- Capability flags (can this worker handle curl/http tasks?)
|
|
||||||
(wr.preflight_curl_status = 'passed') as can_curl,
|
|
||||||
(wr.preflight_http_status = 'passed') as can_http
|
|
||||||
FROM worker_registry wr
|
|
||||||
WHERE wr.status != 'terminated'
|
|
||||||
ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- Comments
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
COMMENT ON COLUMN worker_registry.curl_ip IS 'IP address detected during curl/axios preflight';
|
|
||||||
COMMENT ON COLUMN worker_registry.http_ip IS 'IP address detected during Puppeteer preflight';
|
|
||||||
COMMENT ON COLUMN worker_registry.fingerprint_data IS 'Browser fingerprint captured during Puppeteer preflight';
|
|
||||||
COMMENT ON COLUMN worker_registry.preflight_status IS 'Overall preflight status: pending, passed, partial, failed';
|
|
||||||
COMMENT ON COLUMN worker_registry.preflight_at IS 'Most recent preflight completion timestamp';
|
|
||||||
@@ -1,46 +0,0 @@
|
|||||||
# DEPRECATED CODE - DO NOT USE
|
|
||||||
|
|
||||||
**These directories contain OLD, ABANDONED code.**
|
|
||||||
|
|
||||||
## What's Here
|
|
||||||
|
|
||||||
| Directory | What It Was | Why Deprecated |
|
|
||||||
|-----------|-------------|----------------|
|
|
||||||
| `hydration/` | Old pipeline for processing crawl data | Replaced by `src/tasks/handlers/` |
|
|
||||||
| `scraper-v2/` | Old Puppeteer-based scraper engine | Replaced by curl-based `src/platforms/dutchie/client.ts` |
|
|
||||||
| `canonical-hydration/` | Intermediate step toward canonical schema | Merged into task handlers |
|
|
||||||
|
|
||||||
## What to Use Instead
|
|
||||||
|
|
||||||
| Old (DONT USE) | New (USE THIS) |
|
|
||||||
|----------------|----------------|
|
|
||||||
| `hydration/normalizers/dutchie.ts` | `src/tasks/handlers/product-refresh.ts` |
|
|
||||||
| `hydration/producer.ts` | `src/tasks/handlers/payload-fetch.ts` |
|
|
||||||
| `scraper-v2/engine.ts` | `src/platforms/dutchie/client.ts` |
|
|
||||||
| `scraper-v2/scheduler.ts` | `src/services/task-scheduler.ts` |
|
|
||||||
|
|
||||||
## Why Keep This Code?
|
|
||||||
|
|
||||||
- Historical reference only
|
|
||||||
- Some patterns may be useful for debugging
|
|
||||||
- Will be deleted once confirmed not needed
|
|
||||||
|
|
||||||
## Claude Instructions
|
|
||||||
|
|
||||||
**IF YOU ARE CLAUDE:**
|
|
||||||
|
|
||||||
1. NEVER import from `src/_deprecated/`
|
|
||||||
2. NEVER reference these files as examples
|
|
||||||
3. NEVER try to "fix" or "update" code in here
|
|
||||||
4. If you see imports from these directories, suggest replacing them
|
|
||||||
|
|
||||||
**Correct imports:**
|
|
||||||
```typescript
|
|
||||||
// GOOD
|
|
||||||
import { executeGraphQL } from '../platforms/dutchie/client';
|
|
||||||
import { pool } from '../db/pool';
|
|
||||||
|
|
||||||
// BAD - DO NOT USE
|
|
||||||
import { something } from '../_deprecated/hydration/...';
|
|
||||||
import { something } from '../_deprecated/scraper-v2/...';
|
|
||||||
```
|
|
||||||
@@ -1,584 +0,0 @@
|
|||||||
/**
|
|
||||||
* System API Routes
|
|
||||||
*
|
|
||||||
* Provides REST API endpoints for system monitoring and control:
|
|
||||||
* - /api/system/sync/* - Sync orchestrator
|
|
||||||
* - /api/system/dlq/* - Dead-letter queue
|
|
||||||
* - /api/system/integrity/* - Integrity checks
|
|
||||||
* - /api/system/fix/* - Auto-fix routines
|
|
||||||
* - /api/system/alerts/* - System alerts
|
|
||||||
* - /metrics - Prometheus metrics
|
|
||||||
*
|
|
||||||
* Phase 5: Full Production Sync + Monitoring
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { Router, Request, Response } from 'express';
|
|
||||||
import { Pool } from 'pg';
|
|
||||||
import {
|
|
||||||
SyncOrchestrator,
|
|
||||||
MetricsService,
|
|
||||||
DLQService,
|
|
||||||
AlertService,
|
|
||||||
IntegrityService,
|
|
||||||
AutoFixService,
|
|
||||||
} from '../services';
|
|
||||||
|
|
||||||
export function createSystemRouter(pool: Pool): Router {
|
|
||||||
const router = Router();
|
|
||||||
|
|
||||||
// Initialize services
|
|
||||||
const metrics = new MetricsService(pool);
|
|
||||||
const dlq = new DLQService(pool);
|
|
||||||
const alerts = new AlertService(pool);
|
|
||||||
const integrity = new IntegrityService(pool, alerts);
|
|
||||||
const autoFix = new AutoFixService(pool, alerts);
|
|
||||||
const orchestrator = new SyncOrchestrator(pool, metrics, dlq, alerts);
|
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// SYNC ORCHESTRATOR ENDPOINTS
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/sync/status
|
|
||||||
* Get current sync status
|
|
||||||
*/
|
|
||||||
router.get('/sync/status', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const status = await orchestrator.getStatus();
|
|
||||||
res.json(status);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Sync status error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get sync status' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/sync/run
|
|
||||||
* Trigger a sync run
|
|
||||||
*/
|
|
||||||
router.post('/sync/run', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const triggeredBy = req.body.triggeredBy || 'api';
|
|
||||||
const result = await orchestrator.runSync();
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
triggeredBy,
|
|
||||||
metrics: result,
|
|
||||||
});
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Sync run error:', error);
|
|
||||||
res.status(500).json({
|
|
||||||
success: false,
|
|
||||||
error: error instanceof Error ? error.message : 'Sync run failed',
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/sync/queue-depth
|
|
||||||
* Get queue depth information
|
|
||||||
*/
|
|
||||||
router.get('/sync/queue-depth', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const depth = await orchestrator.getQueueDepth();
|
|
||||||
res.json(depth);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Queue depth error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get queue depth' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/sync/health
|
|
||||||
* Get sync health status
|
|
||||||
*/
|
|
||||||
router.get('/sync/health', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const health = await orchestrator.getHealth();
|
|
||||||
res.status(health.healthy ? 200 : 503).json(health);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Health check error:', error);
|
|
||||||
res.status(500).json({ healthy: false, error: 'Health check failed' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/sync/pause
|
|
||||||
* Pause the orchestrator
|
|
||||||
*/
|
|
||||||
router.post('/sync/pause', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const reason = req.body.reason || 'Manual pause';
|
|
||||||
await orchestrator.pause(reason);
|
|
||||||
res.json({ success: true, message: 'Orchestrator paused' });
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Pause error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to pause orchestrator' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/sync/resume
|
|
||||||
* Resume the orchestrator
|
|
||||||
*/
|
|
||||||
router.post('/sync/resume', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
await orchestrator.resume();
|
|
||||||
res.json({ success: true, message: 'Orchestrator resumed' });
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Resume error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to resume orchestrator' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// DLQ ENDPOINTS
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/dlq
|
|
||||||
* List DLQ payloads
|
|
||||||
*/
|
|
||||||
router.get('/dlq', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const options = {
|
|
||||||
status: req.query.status as string,
|
|
||||||
errorType: req.query.errorType as string,
|
|
||||||
dispensaryId: req.query.dispensaryId ? parseInt(req.query.dispensaryId as string) : undefined,
|
|
||||||
limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
|
|
||||||
offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
const result = await dlq.listPayloads(options);
|
|
||||||
res.json(result);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] DLQ list error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to list DLQ payloads' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/dlq/stats
|
|
||||||
* Get DLQ statistics
|
|
||||||
*/
|
|
||||||
router.get('/dlq/stats', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const stats = await dlq.getStats();
|
|
||||||
res.json(stats);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] DLQ stats error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get DLQ stats' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/dlq/summary
|
|
||||||
* Get DLQ summary by error type
|
|
||||||
*/
|
|
||||||
router.get('/dlq/summary', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const summary = await dlq.getSummary();
|
|
||||||
res.json(summary);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] DLQ summary error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get DLQ summary' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/dlq/:id
|
|
||||||
* Get a specific DLQ payload
|
|
||||||
*/
|
|
||||||
router.get('/dlq/:id', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const payload = await dlq.getPayload(req.params.id);
|
|
||||||
if (!payload) {
|
|
||||||
return res.status(404).json({ error: 'Payload not found' });
|
|
||||||
}
|
|
||||||
res.json(payload);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] DLQ get error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get DLQ payload' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/dlq/:id/retry
|
|
||||||
* Retry a DLQ payload
|
|
||||||
*/
|
|
||||||
router.post('/dlq/:id/retry', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const result = await dlq.retryPayload(req.params.id);
|
|
||||||
if (result.success) {
|
|
||||||
res.json(result);
|
|
||||||
} else {
|
|
||||||
res.status(400).json(result);
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] DLQ retry error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to retry payload' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/dlq/:id/abandon
|
|
||||||
* Abandon a DLQ payload
|
|
||||||
*/
|
|
||||||
router.post('/dlq/:id/abandon', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const reason = req.body.reason || 'Manually abandoned';
|
|
||||||
const abandonedBy = req.body.abandonedBy || 'api';
|
|
||||||
const success = await dlq.abandonPayload(req.params.id, reason, abandonedBy);
|
|
||||||
res.json({ success });
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] DLQ abandon error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to abandon payload' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/dlq/bulk-retry
|
|
||||||
* Bulk retry payloads by error type
|
|
||||||
*/
|
|
||||||
router.post('/dlq/bulk-retry', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const { errorType } = req.body;
|
|
||||||
if (!errorType) {
|
|
||||||
return res.status(400).json({ error: 'errorType is required' });
|
|
||||||
}
|
|
||||||
const result = await dlq.bulkRetryByErrorType(errorType);
|
|
||||||
res.json(result);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] DLQ bulk retry error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to bulk retry' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// INTEGRITY CHECK ENDPOINTS
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/integrity/run
|
|
||||||
* Run all integrity checks
|
|
||||||
*/
|
|
||||||
router.post('/integrity/run', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const triggeredBy = req.body.triggeredBy || 'api';
|
|
||||||
const result = await integrity.runAllChecks(triggeredBy);
|
|
||||||
res.json(result);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Integrity run error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to run integrity checks' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/integrity/runs
|
|
||||||
* Get recent integrity check runs
|
|
||||||
*/
|
|
||||||
router.get('/integrity/runs', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const limit = req.query.limit ? parseInt(req.query.limit as string) : 10;
|
|
||||||
const runs = await integrity.getRecentRuns(limit);
|
|
||||||
res.json(runs);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Integrity runs error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get integrity runs' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/integrity/runs/:runId
|
|
||||||
* Get results for a specific integrity run
|
|
||||||
*/
|
|
||||||
router.get('/integrity/runs/:runId', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const results = await integrity.getRunResults(req.params.runId);
|
|
||||||
res.json(results);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Integrity run results error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get run results' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// AUTO-FIX ENDPOINTS
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/fix/routines
|
|
||||||
* Get available fix routines
|
|
||||||
*/
|
|
||||||
router.get('/fix/routines', (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const routines = autoFix.getAvailableRoutines();
|
|
||||||
res.json(routines);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Get routines error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get routines' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/fix/:routine
|
|
||||||
* Run a fix routine
|
|
||||||
*/
|
|
||||||
router.post('/fix/:routine', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const routineName = req.params.routine;
|
|
||||||
const dryRun = req.body.dryRun === true;
|
|
||||||
const triggeredBy = req.body.triggeredBy || 'api';
|
|
||||||
|
|
||||||
const result = await autoFix.runRoutine(routineName as any, triggeredBy, { dryRun });
|
|
||||||
res.json(result);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Fix routine error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to run fix routine' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/fix/runs
|
|
||||||
* Get recent fix runs
|
|
||||||
*/
|
|
||||||
router.get('/fix/runs', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const limit = req.query.limit ? parseInt(req.query.limit as string) : 20;
|
|
||||||
const runs = await autoFix.getRecentRuns(limit);
|
|
||||||
res.json(runs);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Fix runs error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get fix runs' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// ALERTS ENDPOINTS
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/alerts
|
|
||||||
* List alerts
|
|
||||||
*/
|
|
||||||
router.get('/alerts', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const options = {
|
|
||||||
status: req.query.status as any,
|
|
||||||
severity: req.query.severity as any,
|
|
||||||
type: req.query.type as string,
|
|
||||||
limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
|
|
||||||
offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
const result = await alerts.listAlerts(options);
|
|
||||||
res.json(result);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Alerts list error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to list alerts' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/alerts/active
|
|
||||||
* Get active alerts
|
|
||||||
*/
|
|
||||||
router.get('/alerts/active', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const activeAlerts = await alerts.getActiveAlerts();
|
|
||||||
res.json(activeAlerts);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Active alerts error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get active alerts' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/alerts/summary
|
|
||||||
* Get alert summary
|
|
||||||
*/
|
|
||||||
router.get('/alerts/summary', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const summary = await alerts.getSummary();
|
|
||||||
res.json(summary);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Alerts summary error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get alerts summary' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/alerts/:id/acknowledge
|
|
||||||
* Acknowledge an alert
|
|
||||||
*/
|
|
||||||
router.post('/alerts/:id/acknowledge', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const alertId = parseInt(req.params.id);
|
|
||||||
const acknowledgedBy = req.body.acknowledgedBy || 'api';
|
|
||||||
const success = await alerts.acknowledgeAlert(alertId, acknowledgedBy);
|
|
||||||
res.json({ success });
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Acknowledge alert error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to acknowledge alert' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/alerts/:id/resolve
|
|
||||||
* Resolve an alert
|
|
||||||
*/
|
|
||||||
router.post('/alerts/:id/resolve', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const alertId = parseInt(req.params.id);
|
|
||||||
const resolvedBy = req.body.resolvedBy || 'api';
|
|
||||||
const success = await alerts.resolveAlert(alertId, resolvedBy);
|
|
||||||
res.json({ success });
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Resolve alert error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to resolve alert' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/alerts/bulk-acknowledge
|
|
||||||
* Bulk acknowledge alerts
|
|
||||||
*/
|
|
||||||
router.post('/alerts/bulk-acknowledge', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const { ids, acknowledgedBy } = req.body;
|
|
||||||
if (!ids || !Array.isArray(ids)) {
|
|
||||||
return res.status(400).json({ error: 'ids array is required' });
|
|
||||||
}
|
|
||||||
const count = await alerts.bulkAcknowledge(ids, acknowledgedBy || 'api');
|
|
||||||
res.json({ acknowledged: count });
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Bulk acknowledge error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to bulk acknowledge' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// METRICS ENDPOINTS
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/metrics
|
|
||||||
* Get all current metrics
|
|
||||||
*/
|
|
||||||
router.get('/metrics', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const allMetrics = await metrics.getAllMetrics();
|
|
||||||
res.json(allMetrics);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Metrics error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get metrics' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/metrics/:name
|
|
||||||
* Get a specific metric
|
|
||||||
*/
|
|
||||||
router.get('/metrics/:name', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const metric = await metrics.getMetric(req.params.name);
|
|
||||||
if (!metric) {
|
|
||||||
return res.status(404).json({ error: 'Metric not found' });
|
|
||||||
}
|
|
||||||
res.json(metric);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Metric error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get metric' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/metrics/:name/history
|
|
||||||
* Get metric time series
|
|
||||||
*/
|
|
||||||
router.get('/metrics/:name/history', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const hours = req.query.hours ? parseInt(req.query.hours as string) : 24;
|
|
||||||
const history = await metrics.getMetricHistory(req.params.name, hours);
|
|
||||||
res.json(history);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Metric history error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get metric history' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/errors
|
|
||||||
* Get error summary
|
|
||||||
*/
|
|
||||||
router.get('/errors', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const summary = await metrics.getErrorSummary();
|
|
||||||
res.json(summary);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Error summary error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get error summary' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/errors/recent
|
|
||||||
* Get recent errors
|
|
||||||
*/
|
|
||||||
router.get('/errors/recent', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const limit = req.query.limit ? parseInt(req.query.limit as string) : 50;
|
|
||||||
const errorType = req.query.type as string;
|
|
||||||
const errors = await metrics.getRecentErrors(limit, errorType);
|
|
||||||
res.json(errors);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Recent errors error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get recent errors' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/errors/acknowledge
|
|
||||||
* Acknowledge errors
|
|
||||||
*/
|
|
||||||
router.post('/errors/acknowledge', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const { ids, acknowledgedBy } = req.body;
|
|
||||||
if (!ids || !Array.isArray(ids)) {
|
|
||||||
return res.status(400).json({ error: 'ids array is required' });
|
|
||||||
}
|
|
||||||
const count = await metrics.acknowledgeErrors(ids, acknowledgedBy || 'api');
|
|
||||||
res.json({ acknowledged: count });
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Acknowledge errors error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to acknowledge errors' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
return router;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create Prometheus metrics endpoint (standalone)
|
|
||||||
*/
|
|
||||||
export function createPrometheusRouter(pool: Pool): Router {
|
|
||||||
const router = Router();
|
|
||||||
const metrics = new MetricsService(pool);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /metrics
|
|
||||||
* Prometheus-compatible metrics endpoint
|
|
||||||
*/
|
|
||||||
router.get('/', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const prometheusOutput = await metrics.getPrometheusMetrics();
|
|
||||||
res.set('Content-Type', 'text/plain; version=0.0.4');
|
|
||||||
res.send(prometheusOutput);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[Prometheus] Metrics error:', error);
|
|
||||||
res.status(500).send('# Error generating metrics');
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
return router;
|
|
||||||
}
|
|
||||||
@@ -109,7 +109,7 @@ import scraperMonitorRoutes from './routes/scraper-monitor';
|
|||||||
import apiTokensRoutes from './routes/api-tokens';
|
import apiTokensRoutes from './routes/api-tokens';
|
||||||
import apiPermissionsRoutes from './routes/api-permissions';
|
import apiPermissionsRoutes from './routes/api-permissions';
|
||||||
import parallelScrapeRoutes from './routes/parallel-scrape';
|
import parallelScrapeRoutes from './routes/parallel-scrape';
|
||||||
// crawler-sandbox moved to _deprecated
|
import crawlerSandboxRoutes from './routes/crawler-sandbox';
|
||||||
import versionRoutes from './routes/version';
|
import versionRoutes from './routes/version';
|
||||||
import deployStatusRoutes from './routes/deploy-status';
|
import deployStatusRoutes from './routes/deploy-status';
|
||||||
import publicApiRoutes from './routes/public-api';
|
import publicApiRoutes from './routes/public-api';
|
||||||
@@ -146,7 +146,6 @@ import tasksRoutes from './routes/tasks';
|
|||||||
import workerRegistryRoutes from './routes/worker-registry';
|
import workerRegistryRoutes from './routes/worker-registry';
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: Raw payload access API
|
// Per TASK_WORKFLOW_2024-12-10.md: Raw payload access API
|
||||||
import payloadsRoutes from './routes/payloads';
|
import payloadsRoutes from './routes/payloads';
|
||||||
import k8sRoutes from './routes/k8s';
|
|
||||||
|
|
||||||
// Mark requests from trusted domains (cannaiq.co, findagram.co, findadispo.com)
|
// Mark requests from trusted domains (cannaiq.co, findagram.co, findadispo.com)
|
||||||
// These domains can access the API without authentication
|
// These domains can access the API without authentication
|
||||||
@@ -187,7 +186,7 @@ app.use('/api/scraper-monitor', scraperMonitorRoutes);
|
|||||||
app.use('/api/api-tokens', apiTokensRoutes);
|
app.use('/api/api-tokens', apiTokensRoutes);
|
||||||
app.use('/api/api-permissions', apiPermissionsRoutes);
|
app.use('/api/api-permissions', apiPermissionsRoutes);
|
||||||
app.use('/api/parallel-scrape', parallelScrapeRoutes);
|
app.use('/api/parallel-scrape', parallelScrapeRoutes);
|
||||||
// crawler-sandbox moved to _deprecated
|
app.use('/api/crawler-sandbox', crawlerSandboxRoutes);
|
||||||
app.use('/api/version', versionRoutes);
|
app.use('/api/version', versionRoutes);
|
||||||
app.use('/api/admin/deploy-status', deployStatusRoutes);
|
app.use('/api/admin/deploy-status', deployStatusRoutes);
|
||||||
console.log('[DeployStatus] Routes registered at /api/admin/deploy-status');
|
console.log('[DeployStatus] Routes registered at /api/admin/deploy-status');
|
||||||
@@ -231,10 +230,6 @@ console.log('[WorkerRegistry] Routes registered at /api/worker-registry');
|
|||||||
app.use('/api/payloads', payloadsRoutes);
|
app.use('/api/payloads', payloadsRoutes);
|
||||||
console.log('[Payloads] Routes registered at /api/payloads');
|
console.log('[Payloads] Routes registered at /api/payloads');
|
||||||
|
|
||||||
// K8s control routes - worker scaling from admin UI
|
|
||||||
app.use('/api/k8s', k8sRoutes);
|
|
||||||
console.log('[K8s] Routes registered at /api/k8s');
|
|
||||||
|
|
||||||
// Phase 3: Analytics V2 - Enhanced analytics with rec/med state segmentation
|
// Phase 3: Analytics V2 - Enhanced analytics with rec/med state segmentation
|
||||||
try {
|
try {
|
||||||
const analyticsV2Router = createAnalyticsV2Router(getPool());
|
const analyticsV2Router = createAnalyticsV2Router(getPool());
|
||||||
|
|||||||
@@ -47,27 +47,4 @@ router.post('/refresh', authMiddleware, async (req: AuthRequest, res) => {
|
|||||||
res.json({ token });
|
res.json({ token });
|
||||||
});
|
});
|
||||||
|
|
||||||
// Verify password for sensitive actions (requires current user to be authenticated)
|
|
||||||
router.post('/verify-password', authMiddleware, async (req: AuthRequest, res) => {
|
|
||||||
try {
|
|
||||||
const { password } = req.body;
|
|
||||||
|
|
||||||
if (!password) {
|
|
||||||
return res.status(400).json({ error: 'Password required' });
|
|
||||||
}
|
|
||||||
|
|
||||||
// Re-authenticate the current user with the provided password
|
|
||||||
const user = await authenticateUser(req.user!.email, password);
|
|
||||||
|
|
||||||
if (!user) {
|
|
||||||
return res.status(401).json({ error: 'Invalid password', verified: false });
|
|
||||||
}
|
|
||||||
|
|
||||||
res.json({ verified: true });
|
|
||||||
} catch (error) {
|
|
||||||
console.error('Password verification error:', error);
|
|
||||||
res.status(500).json({ error: 'Internal server error' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
export default router;
|
export default router;
|
||||||
|
|||||||
@@ -15,14 +15,9 @@
|
|||||||
|
|
||||||
import { Router, Request, Response } from 'express';
|
import { Router, Request, Response } from 'express';
|
||||||
import { pool } from '../db/pool';
|
import { pool } from '../db/pool';
|
||||||
import { authMiddleware, requireRole } from '../auth/middleware';
|
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
// All job-queue routes require authentication and admin role
|
|
||||||
router.use(authMiddleware);
|
|
||||||
router.use(requireRole('admin', 'superadmin'));
|
|
||||||
|
|
||||||
// In-memory queue state (would be in Redis in production)
|
// In-memory queue state (would be in Redis in production)
|
||||||
let queuePaused = false;
|
let queuePaused = false;
|
||||||
|
|
||||||
|
|||||||
@@ -1,145 +0,0 @@
|
|||||||
/**
|
|
||||||
* Kubernetes Control Routes
|
|
||||||
*
|
|
||||||
* Provides admin UI control over k8s resources like worker scaling.
|
|
||||||
* Uses in-cluster config when running in k8s, or kubeconfig locally.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { Router, Request, Response } from 'express';
|
|
||||||
import * as k8s from '@kubernetes/client-node';
|
|
||||||
import { authMiddleware, requireRole } from '../auth/middleware';
|
|
||||||
|
|
||||||
const router = Router();
|
|
||||||
|
|
||||||
// K8s control routes require authentication and admin role
|
|
||||||
router.use(authMiddleware);
|
|
||||||
router.use(requireRole('admin', 'superadmin'));
|
|
||||||
|
|
||||||
// K8s client setup - lazy initialization
|
|
||||||
let appsApi: k8s.AppsV1Api | null = null;
|
|
||||||
let k8sError: string | null = null;
|
|
||||||
|
|
||||||
function getK8sClient(): k8s.AppsV1Api | null {
|
|
||||||
if (appsApi) return appsApi;
|
|
||||||
if (k8sError) return null;
|
|
||||||
|
|
||||||
try {
|
|
||||||
const kc = new k8s.KubeConfig();
|
|
||||||
|
|
||||||
// Try in-cluster config first (when running in k8s)
|
|
||||||
try {
|
|
||||||
kc.loadFromCluster();
|
|
||||||
console.log('[K8s] Loaded in-cluster config');
|
|
||||||
} catch {
|
|
||||||
// Fall back to default kubeconfig (local dev)
|
|
||||||
try {
|
|
||||||
kc.loadFromDefault();
|
|
||||||
console.log('[K8s] Loaded default kubeconfig');
|
|
||||||
} catch (e) {
|
|
||||||
k8sError = 'No k8s config available';
|
|
||||||
console.log('[K8s] No config available - k8s routes disabled');
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
appsApi = kc.makeApiClient(k8s.AppsV1Api);
|
|
||||||
return appsApi;
|
|
||||||
} catch (e: any) {
|
|
||||||
k8sError = e.message;
|
|
||||||
console.error('[K8s] Failed to initialize client:', e.message);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const NAMESPACE = process.env.K8S_NAMESPACE || 'dispensary-scraper';
|
|
||||||
const WORKER_DEPLOYMENT = 'scraper-worker';
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/k8s/workers
|
|
||||||
* Get current worker deployment status
|
|
||||||
*/
|
|
||||||
router.get('/workers', async (_req: Request, res: Response) => {
|
|
||||||
const client = getK8sClient();
|
|
||||||
|
|
||||||
if (!client) {
|
|
||||||
return res.json({
|
|
||||||
success: true,
|
|
||||||
available: false,
|
|
||||||
error: k8sError || 'K8s not available',
|
|
||||||
replicas: 0,
|
|
||||||
readyReplicas: 0,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const deployment = await client.readNamespacedDeployment({
|
|
||||||
name: WORKER_DEPLOYMENT,
|
|
||||||
namespace: NAMESPACE,
|
|
||||||
});
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
available: true,
|
|
||||||
replicas: deployment.spec?.replicas || 0,
|
|
||||||
readyReplicas: deployment.status?.readyReplicas || 0,
|
|
||||||
availableReplicas: deployment.status?.availableReplicas || 0,
|
|
||||||
updatedReplicas: deployment.status?.updatedReplicas || 0,
|
|
||||||
});
|
|
||||||
} catch (e: any) {
|
|
||||||
console.error('[K8s] Error getting deployment:', e.message);
|
|
||||||
res.status(500).json({
|
|
||||||
success: false,
|
|
||||||
error: e.message,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/k8s/workers/scale
|
|
||||||
* Scale worker deployment
|
|
||||||
* Body: { replicas: number }
|
|
||||||
*/
|
|
||||||
router.post('/workers/scale', async (req: Request, res: Response) => {
|
|
||||||
const client = getK8sClient();
|
|
||||||
|
|
||||||
if (!client) {
|
|
||||||
return res.status(503).json({
|
|
||||||
success: false,
|
|
||||||
error: k8sError || 'K8s not available',
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
const { replicas } = req.body;
|
|
||||||
|
|
||||||
if (typeof replicas !== 'number' || replicas < 0 || replicas > 50) {
|
|
||||||
return res.status(400).json({
|
|
||||||
success: false,
|
|
||||||
error: 'replicas must be a number between 0 and 50',
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Patch the deployment to set replicas
|
|
||||||
await client.patchNamespacedDeploymentScale({
|
|
||||||
name: WORKER_DEPLOYMENT,
|
|
||||||
namespace: NAMESPACE,
|
|
||||||
body: { spec: { replicas } },
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log(`[K8s] Scaled ${WORKER_DEPLOYMENT} to ${replicas} replicas`);
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
replicas,
|
|
||||||
message: `Scaled to ${replicas} workers`,
|
|
||||||
});
|
|
||||||
} catch (e: any) {
|
|
||||||
console.error('[K8s] Error scaling deployment:', e.message);
|
|
||||||
res.status(500).json({
|
|
||||||
success: false,
|
|
||||||
error: e.message,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
export default router;
|
|
||||||
@@ -11,14 +11,9 @@ import { getLatestTrace, getTracesForDispensary, getTraceById } from '../service
|
|||||||
import { getProviderDisplayName } from '../utils/provider-display';
|
import { getProviderDisplayName } from '../utils/provider-display';
|
||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
import { authMiddleware, requireRole } from '../auth/middleware';
|
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
// Orchestrator admin routes require authentication and admin role
|
|
||||||
router.use(authMiddleware);
|
|
||||||
router.use(requireRole('admin', 'superadmin'));
|
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// ORCHESTRATOR METRICS
|
// ORCHESTRATOR METRICS
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|||||||
@@ -21,13 +21,9 @@ import {
|
|||||||
listPayloadMetadata,
|
listPayloadMetadata,
|
||||||
} from '../utils/payload-storage';
|
} from '../utils/payload-storage';
|
||||||
import { Pool } from 'pg';
|
import { Pool } from 'pg';
|
||||||
import { authMiddleware } from '../auth/middleware';
|
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
// All payload routes require authentication (trusted origins or API token)
|
|
||||||
router.use(authMiddleware);
|
|
||||||
|
|
||||||
// Get pool instance for queries
|
// Get pool instance for queries
|
||||||
const getDbPool = (): Pool => getPool() as unknown as Pool;
|
const getDbPool = (): Pool => getPool() as unknown as Pool;
|
||||||
|
|
||||||
|
|||||||
@@ -18,14 +18,9 @@
|
|||||||
|
|
||||||
import { Router, Request, Response } from 'express';
|
import { Router, Request, Response } from 'express';
|
||||||
import { pool } from '../db/pool';
|
import { pool } from '../db/pool';
|
||||||
import { authMiddleware, requireRole } from '../auth/middleware';
|
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
// Pipeline routes require authentication and admin role
|
|
||||||
router.use(authMiddleware);
|
|
||||||
router.use(requireRole('admin', 'superadmin'));
|
|
||||||
|
|
||||||
// Valid stages
|
// Valid stages
|
||||||
const STAGES = ['discovered', 'validated', 'promoted', 'sandbox', 'production', 'failing'] as const;
|
const STAGES = ['discovered', 'validated', 'promoted', 'sandbox', 'production', 'failing'] as const;
|
||||||
type Stage = typeof STAGES[number];
|
type Stage = typeof STAGES[number];
|
||||||
|
|||||||
@@ -278,7 +278,7 @@ router.post('/update-locations', requireRole('superadmin', 'admin'), async (req,
|
|||||||
|
|
||||||
// Run in background
|
// Run in background
|
||||||
updateAllProxyLocations().catch(err => {
|
updateAllProxyLocations().catch(err => {
|
||||||
console.error('Location update failed:', err);
|
console.error('❌ Location update failed:', err);
|
||||||
});
|
});
|
||||||
|
|
||||||
res.json({ message: 'Location update job started' });
|
res.json({ message: 'Location update job started' });
|
||||||
|
|||||||
@@ -19,14 +19,9 @@ import {
|
|||||||
resumeTaskPool,
|
resumeTaskPool,
|
||||||
getTaskPoolStatus,
|
getTaskPoolStatus,
|
||||||
} from '../tasks/task-pool-state';
|
} from '../tasks/task-pool-state';
|
||||||
import { authMiddleware, requireRole } from '../auth/middleware';
|
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
// Task routes require authentication and admin role
|
|
||||||
router.use(authMiddleware);
|
|
||||||
router.use(requireRole('admin', 'superadmin'));
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/tasks
|
* GET /api/tasks
|
||||||
* List tasks with optional filters
|
* List tasks with optional filters
|
||||||
|
|||||||
@@ -23,14 +23,9 @@
|
|||||||
import { Router, Request, Response } from 'express';
|
import { Router, Request, Response } from 'express';
|
||||||
import { pool } from '../db/pool';
|
import { pool } from '../db/pool';
|
||||||
import os from 'os';
|
import os from 'os';
|
||||||
import { authMiddleware } from '../auth/middleware';
|
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
// Worker registry routes require authentication
|
|
||||||
// Note: Internal workers (pods) can access via trusted IP (localhost, in-cluster)
|
|
||||||
router.use(authMiddleware);
|
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// WORKER REGISTRATION
|
// WORKER REGISTRATION
|
||||||
// ============================================================
|
// ============================================================
|
||||||
@@ -75,20 +70,21 @@ router.post('/register', async (req: Request, res: Response) => {
|
|||||||
);
|
);
|
||||||
|
|
||||||
if (existing.rows.length > 0) {
|
if (existing.rows.length > 0) {
|
||||||
// Re-activate existing worker - keep existing pod_name (fantasy name), don't overwrite with K8s name
|
// Re-activate existing worker
|
||||||
const { rows } = await pool.query(`
|
const { rows } = await pool.query(`
|
||||||
UPDATE worker_registry
|
UPDATE worker_registry
|
||||||
SET status = 'active',
|
SET status = 'active',
|
||||||
role = $1,
|
role = $1,
|
||||||
hostname = $2,
|
pod_name = $2,
|
||||||
ip_address = $3,
|
hostname = $3,
|
||||||
|
ip_address = $4,
|
||||||
last_heartbeat_at = NOW(),
|
last_heartbeat_at = NOW(),
|
||||||
started_at = NOW(),
|
started_at = NOW(),
|
||||||
metadata = $4,
|
metadata = $5,
|
||||||
updated_at = NOW()
|
updated_at = NOW()
|
||||||
WHERE worker_id = $5
|
WHERE worker_id = $6
|
||||||
RETURNING id, worker_id, friendly_name, pod_name, role
|
RETURNING id, worker_id, friendly_name, role
|
||||||
`, [role, finalHostname, clientIp, metadata, finalWorkerId]);
|
`, [role, pod_name, finalHostname, clientIp, metadata, finalWorkerId]);
|
||||||
|
|
||||||
const worker = rows[0];
|
const worker = rows[0];
|
||||||
const roleMsg = role ? `for ${role}` : 'as role-agnostic';
|
const roleMsg = role ? `for ${role}` : 'as role-agnostic';
|
||||||
@@ -109,13 +105,13 @@ router.post('/register', async (req: Request, res: Response) => {
|
|||||||
const nameResult = await pool.query('SELECT assign_worker_name($1) as name', [finalWorkerId]);
|
const nameResult = await pool.query('SELECT assign_worker_name($1) as name', [finalWorkerId]);
|
||||||
const friendlyName = nameResult.rows[0].name;
|
const friendlyName = nameResult.rows[0].name;
|
||||||
|
|
||||||
// Register the worker - use friendlyName as pod_name (not K8s name)
|
// Register the worker
|
||||||
const { rows } = await pool.query(`
|
const { rows } = await pool.query(`
|
||||||
INSERT INTO worker_registry (
|
INSERT INTO worker_registry (
|
||||||
worker_id, friendly_name, role, pod_name, hostname, ip_address, status, metadata
|
worker_id, friendly_name, role, pod_name, hostname, ip_address, status, metadata
|
||||||
) VALUES ($1, $2, $3, $4, $5, $6, 'active', $7)
|
) VALUES ($1, $2, $3, $4, $5, $6, 'active', $7)
|
||||||
RETURNING id, worker_id, friendly_name, pod_name, role
|
RETURNING id, worker_id, friendly_name, role
|
||||||
`, [finalWorkerId, friendlyName, role, friendlyName, finalHostname, clientIp, metadata]);
|
`, [finalWorkerId, friendlyName, role, pod_name, finalHostname, clientIp, metadata]);
|
||||||
|
|
||||||
const worker = rows[0];
|
const worker = rows[0];
|
||||||
const roleMsg = role ? `for ${role}` : 'as role-agnostic';
|
const roleMsg = role ? `for ${role}` : 'as role-agnostic';
|
||||||
@@ -142,36 +138,17 @@ router.post('/register', async (req: Request, res: Response) => {
|
|||||||
*
|
*
|
||||||
* Body:
|
* Body:
|
||||||
* - worker_id: string (required)
|
* - worker_id: string (required)
|
||||||
* - current_task_id: number (optional) - task currently being processed (primary task)
|
* - current_task_id: number (optional) - task currently being processed
|
||||||
* - current_task_ids: number[] (optional) - all tasks currently being processed (concurrent)
|
|
||||||
* - active_task_count: number (optional) - number of tasks currently running
|
|
||||||
* - max_concurrent_tasks: number (optional) - max concurrent tasks this worker can handle
|
|
||||||
* - status: string (optional) - 'active', 'idle'
|
* - status: string (optional) - 'active', 'idle'
|
||||||
* - resources: object (optional) - memory_mb, cpu_user_ms, cpu_system_ms, etc.
|
|
||||||
*/
|
*/
|
||||||
router.post('/heartbeat', async (req: Request, res: Response) => {
|
router.post('/heartbeat', async (req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
const {
|
const { worker_id, current_task_id, status = 'active', resources } = req.body;
|
||||||
worker_id,
|
|
||||||
current_task_id,
|
|
||||||
current_task_ids,
|
|
||||||
active_task_count,
|
|
||||||
max_concurrent_tasks,
|
|
||||||
status = 'active',
|
|
||||||
resources
|
|
||||||
} = req.body;
|
|
||||||
|
|
||||||
if (!worker_id) {
|
if (!worker_id) {
|
||||||
return res.status(400).json({ success: false, error: 'worker_id is required' });
|
return res.status(400).json({ success: false, error: 'worker_id is required' });
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build metadata object with all the new fields
|
|
||||||
const metadata: Record<string, unknown> = {};
|
|
||||||
if (resources) Object.assign(metadata, resources);
|
|
||||||
if (current_task_ids) metadata.current_task_ids = current_task_ids;
|
|
||||||
if (active_task_count !== undefined) metadata.active_task_count = active_task_count;
|
|
||||||
if (max_concurrent_tasks !== undefined) metadata.max_concurrent_tasks = max_concurrent_tasks;
|
|
||||||
|
|
||||||
// Store resources in metadata jsonb column
|
// Store resources in metadata jsonb column
|
||||||
const { rows } = await pool.query(`
|
const { rows } = await pool.query(`
|
||||||
UPDATE worker_registry
|
UPDATE worker_registry
|
||||||
@@ -182,7 +159,7 @@ router.post('/heartbeat', async (req: Request, res: Response) => {
|
|||||||
updated_at = NOW()
|
updated_at = NOW()
|
||||||
WHERE worker_id = $3
|
WHERE worker_id = $3
|
||||||
RETURNING id, friendly_name, status
|
RETURNING id, friendly_name, status
|
||||||
`, [current_task_id || null, status, worker_id, Object.keys(metadata).length > 0 ? JSON.stringify(metadata) : null]);
|
`, [current_task_id || null, status, worker_id, resources ? JSON.stringify(resources) : null]);
|
||||||
|
|
||||||
if (rows.length === 0) {
|
if (rows.length === 0) {
|
||||||
return res.status(404).json({ success: false, error: 'Worker not found - please register first' });
|
return res.status(404).json({ success: false, error: 'Worker not found - please register first' });
|
||||||
@@ -353,27 +330,12 @@ router.get('/workers', async (req: Request, res: Response) => {
|
|||||||
tasks_completed,
|
tasks_completed,
|
||||||
tasks_failed,
|
tasks_failed,
|
||||||
current_task_id,
|
current_task_id,
|
||||||
-- Concurrent task fields from metadata
|
|
||||||
(metadata->>'current_task_ids')::jsonb as current_task_ids,
|
|
||||||
(metadata->>'active_task_count')::int as active_task_count,
|
|
||||||
(metadata->>'max_concurrent_tasks')::int as max_concurrent_tasks,
|
|
||||||
-- Decommission fields
|
|
||||||
COALESCE(decommission_requested, false) as decommission_requested,
|
|
||||||
decommission_reason,
|
|
||||||
-- Preflight fields (dual-transport verification)
|
|
||||||
curl_ip,
|
|
||||||
http_ip,
|
|
||||||
preflight_status,
|
|
||||||
preflight_at,
|
|
||||||
fingerprint_data,
|
|
||||||
-- Full metadata for resources
|
|
||||||
metadata,
|
metadata,
|
||||||
EXTRACT(EPOCH FROM (NOW() - last_heartbeat_at)) as seconds_since_heartbeat,
|
EXTRACT(EPOCH FROM (NOW() - last_heartbeat_at)) as seconds_since_heartbeat,
|
||||||
CASE
|
CASE
|
||||||
WHEN status = 'offline' OR status = 'terminated' THEN status
|
WHEN status = 'offline' OR status = 'terminated' THEN status
|
||||||
WHEN last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
WHEN last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
||||||
WHEN current_task_id IS NOT NULL THEN 'busy'
|
WHEN current_task_id IS NOT NULL THEN 'busy'
|
||||||
WHEN (metadata->>'active_task_count')::int > 0 THEN 'busy'
|
|
||||||
ELSE 'ready'
|
ELSE 'ready'
|
||||||
END as health_status,
|
END as health_status,
|
||||||
created_at
|
created_at
|
||||||
@@ -710,163 +672,4 @@ router.get('/capacity', async (_req: Request, res: Response) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// WORKER LIFECYCLE MANAGEMENT
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/worker-registry/workers/:workerId/decommission
|
|
||||||
* Request graceful decommission of a worker (will stop after current task)
|
|
||||||
*/
|
|
||||||
router.post('/workers/:workerId/decommission', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const { workerId } = req.params;
|
|
||||||
const { reason, issued_by } = req.body;
|
|
||||||
|
|
||||||
// Update worker_registry to flag for decommission
|
|
||||||
const result = await pool.query(
|
|
||||||
`UPDATE worker_registry
|
|
||||||
SET decommission_requested = true,
|
|
||||||
decommission_reason = $2,
|
|
||||||
decommission_requested_at = NOW()
|
|
||||||
WHERE worker_id = $1
|
|
||||||
RETURNING friendly_name, status, current_task_id`,
|
|
||||||
[workerId, reason || 'Manual decommission from admin']
|
|
||||||
);
|
|
||||||
|
|
||||||
if (result.rows.length === 0) {
|
|
||||||
return res.status(404).json({ success: false, error: 'Worker not found' });
|
|
||||||
}
|
|
||||||
|
|
||||||
const worker = result.rows[0];
|
|
||||||
|
|
||||||
// Also log to worker_commands for audit trail
|
|
||||||
await pool.query(
|
|
||||||
`INSERT INTO worker_commands (worker_id, command, reason, issued_by)
|
|
||||||
VALUES ($1, 'decommission', $2, $3)
|
|
||||||
ON CONFLICT DO NOTHING`,
|
|
||||||
[workerId, reason || 'Manual decommission', issued_by || 'admin']
|
|
||||||
).catch(() => {
|
|
||||||
// Table might not exist yet - ignore
|
|
||||||
});
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
message: worker.current_task_id
|
|
||||||
? `Worker ${worker.friendly_name} will stop after completing task #${worker.current_task_id}`
|
|
||||||
: `Worker ${worker.friendly_name} will stop on next poll`,
|
|
||||||
worker: {
|
|
||||||
friendly_name: worker.friendly_name,
|
|
||||||
status: worker.status,
|
|
||||||
current_task_id: worker.current_task_id,
|
|
||||||
decommission_requested: true
|
|
||||||
}
|
|
||||||
});
|
|
||||||
} catch (error: any) {
|
|
||||||
res.status(500).json({ success: false, error: error.message });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/worker-registry/workers/:workerId/cancel-decommission
|
|
||||||
* Cancel a pending decommission request
|
|
||||||
*/
|
|
||||||
router.post('/workers/:workerId/cancel-decommission', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const { workerId } = req.params;
|
|
||||||
|
|
||||||
const result = await pool.query(
|
|
||||||
`UPDATE worker_registry
|
|
||||||
SET decommission_requested = false,
|
|
||||||
decommission_reason = NULL,
|
|
||||||
decommission_requested_at = NULL
|
|
||||||
WHERE worker_id = $1
|
|
||||||
RETURNING friendly_name`,
|
|
||||||
[workerId]
|
|
||||||
);
|
|
||||||
|
|
||||||
if (result.rows.length === 0) {
|
|
||||||
return res.status(404).json({ success: false, error: 'Worker not found' });
|
|
||||||
}
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
message: `Decommission cancelled for ${result.rows[0].friendly_name}`
|
|
||||||
});
|
|
||||||
} catch (error: any) {
|
|
||||||
res.status(500).json({ success: false, error: error.message });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/worker-registry/spawn
|
|
||||||
* Spawn a new worker in the current pod (only works in multi-worker-per-pod mode)
|
|
||||||
* For now, this is a placeholder - actual spawning requires the pod supervisor
|
|
||||||
*/
|
|
||||||
router.post('/spawn', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const { pod_name, role } = req.body;
|
|
||||||
|
|
||||||
// For now, we can't actually spawn workers from the API
|
|
||||||
// This would require a supervisor process in each pod that listens for spawn commands
|
|
||||||
// Instead, return instructions for how to scale
|
|
||||||
res.json({
|
|
||||||
success: false,
|
|
||||||
error: 'Direct worker spawning not yet implemented',
|
|
||||||
instructions: 'To add workers, scale the K8s deployment: kubectl scale deployment/scraper-worker --replicas=N'
|
|
||||||
});
|
|
||||||
} catch (error: any) {
|
|
||||||
res.status(500).json({ success: false, error: error.message });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/worker-registry/pods
|
|
||||||
* Get workers grouped by pod
|
|
||||||
*/
|
|
||||||
router.get('/pods', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const { rows } = await pool.query(`
|
|
||||||
SELECT
|
|
||||||
COALESCE(pod_name, 'Unknown') as pod_name,
|
|
||||||
COUNT(*) as worker_count,
|
|
||||||
COUNT(*) FILTER (WHERE current_task_id IS NOT NULL) as busy_count,
|
|
||||||
COUNT(*) FILTER (WHERE current_task_id IS NULL) as idle_count,
|
|
||||||
SUM(tasks_completed) as total_completed,
|
|
||||||
SUM(tasks_failed) as total_failed,
|
|
||||||
SUM((metadata->>'memory_rss_mb')::int) as total_memory_mb,
|
|
||||||
array_agg(json_build_object(
|
|
||||||
'worker_id', worker_id,
|
|
||||||
'friendly_name', friendly_name,
|
|
||||||
'status', status,
|
|
||||||
'current_task_id', current_task_id,
|
|
||||||
'tasks_completed', tasks_completed,
|
|
||||||
'tasks_failed', tasks_failed,
|
|
||||||
'decommission_requested', COALESCE(decommission_requested, false),
|
|
||||||
'last_heartbeat_at', last_heartbeat_at
|
|
||||||
)) as workers
|
|
||||||
FROM worker_registry
|
|
||||||
WHERE status NOT IN ('offline', 'terminated')
|
|
||||||
GROUP BY pod_name
|
|
||||||
ORDER BY pod_name
|
|
||||||
`);
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
pods: rows.map(row => ({
|
|
||||||
pod_name: row.pod_name,
|
|
||||||
worker_count: parseInt(row.worker_count),
|
|
||||||
busy_count: parseInt(row.busy_count),
|
|
||||||
idle_count: parseInt(row.idle_count),
|
|
||||||
total_completed: parseInt(row.total_completed) || 0,
|
|
||||||
total_failed: parseInt(row.total_failed) || 0,
|
|
||||||
total_memory_mb: parseInt(row.total_memory_mb) || 0,
|
|
||||||
workers: row.workers
|
|
||||||
}))
|
|
||||||
});
|
|
||||||
} catch (error: any) {
|
|
||||||
res.status(500).json({ success: false, error: error.message });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
export default router;
|
export default router;
|
||||||
|
|||||||
@@ -26,20 +26,16 @@
|
|||||||
import { Router, Request, Response } from 'express';
|
import { Router, Request, Response } from 'express';
|
||||||
import { pool } from '../db/pool';
|
import { pool } from '../db/pool';
|
||||||
import * as k8s from '@kubernetes/client-node';
|
import * as k8s from '@kubernetes/client-node';
|
||||||
import { authMiddleware } from '../auth/middleware';
|
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
// All worker routes require authentication (trusted origins or API token)
|
|
||||||
router.use(authMiddleware);
|
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// K8S SCALING CONFIGURATION (added 2024-12-10)
|
// K8S SCALING CONFIGURATION (added 2024-12-10)
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: Admin can scale workers from UI
|
// Per TASK_WORKFLOW_2024-12-10.md: Admin can scale workers from UI
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
const K8S_NAMESPACE = process.env.K8S_NAMESPACE || 'dispensary-scraper';
|
const K8S_NAMESPACE = process.env.K8S_NAMESPACE || 'dispensary-scraper';
|
||||||
const K8S_DEPLOYMENT_NAME = process.env.K8S_WORKER_DEPLOYMENT || 'scraper-worker';
|
const K8S_STATEFULSET_NAME = process.env.K8S_WORKER_STATEFULSET || 'scraper-worker';
|
||||||
|
|
||||||
// Initialize K8s client - uses in-cluster config when running in K8s,
|
// Initialize K8s client - uses in-cluster config when running in K8s,
|
||||||
// or kubeconfig when running locally
|
// or kubeconfig when running locally
|
||||||
@@ -74,7 +70,7 @@ function getK8sClient(): k8s.AppsV1Api | null {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/workers/k8s/replicas - Get current worker replica count
|
* GET /api/workers/k8s/replicas - Get current worker replica count
|
||||||
* Returns current and desired replica counts from the Deployment
|
* Returns current and desired replica counts from the StatefulSet
|
||||||
*/
|
*/
|
||||||
router.get('/k8s/replicas', async (_req: Request, res: Response) => {
|
router.get('/k8s/replicas', async (_req: Request, res: Response) => {
|
||||||
const client = getK8sClient();
|
const client = getK8sClient();
|
||||||
@@ -88,21 +84,21 @@ router.get('/k8s/replicas', async (_req: Request, res: Response) => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await client.readNamespacedDeployment({
|
const response = await client.readNamespacedStatefulSet({
|
||||||
name: K8S_DEPLOYMENT_NAME,
|
name: K8S_STATEFULSET_NAME,
|
||||||
namespace: K8S_NAMESPACE,
|
namespace: K8S_NAMESPACE,
|
||||||
});
|
});
|
||||||
|
|
||||||
const deployment = response;
|
const statefulSet = response;
|
||||||
res.json({
|
res.json({
|
||||||
success: true,
|
success: true,
|
||||||
replicas: {
|
replicas: {
|
||||||
current: deployment.status?.readyReplicas || 0,
|
current: statefulSet.status?.readyReplicas || 0,
|
||||||
desired: deployment.spec?.replicas || 0,
|
desired: statefulSet.spec?.replicas || 0,
|
||||||
available: deployment.status?.availableReplicas || 0,
|
available: statefulSet.status?.availableReplicas || 0,
|
||||||
updated: deployment.status?.updatedReplicas || 0,
|
updated: statefulSet.status?.updatedReplicas || 0,
|
||||||
},
|
},
|
||||||
deployment: K8S_DEPLOYMENT_NAME,
|
statefulset: K8S_STATEFULSET_NAME,
|
||||||
namespace: K8S_NAMESPACE,
|
namespace: K8S_NAMESPACE,
|
||||||
});
|
});
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
@@ -116,7 +112,7 @@ router.get('/k8s/replicas', async (_req: Request, res: Response) => {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* POST /api/workers/k8s/scale - Scale worker replicas
|
* POST /api/workers/k8s/scale - Scale worker replicas
|
||||||
* Body: { replicas: number } - desired replica count (0-20)
|
* Body: { replicas: number } - desired replica count (1-20)
|
||||||
*/
|
*/
|
||||||
router.post('/k8s/scale', async (req: Request, res: Response) => {
|
router.post('/k8s/scale', async (req: Request, res: Response) => {
|
||||||
const client = getK8sClient();
|
const client = getK8sClient();
|
||||||
@@ -140,21 +136,21 @@ router.post('/k8s/scale', async (req: Request, res: Response) => {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
// Get current state first
|
// Get current state first
|
||||||
const currentResponse = await client.readNamespacedDeploymentScale({
|
const currentResponse = await client.readNamespacedStatefulSetScale({
|
||||||
name: K8S_DEPLOYMENT_NAME,
|
name: K8S_STATEFULSET_NAME,
|
||||||
namespace: K8S_NAMESPACE,
|
namespace: K8S_NAMESPACE,
|
||||||
});
|
});
|
||||||
const currentReplicas = currentResponse.spec?.replicas || 0;
|
const currentReplicas = currentResponse.spec?.replicas || 0;
|
||||||
|
|
||||||
// Update scale using replaceNamespacedDeploymentScale
|
// Update scale using replaceNamespacedStatefulSetScale
|
||||||
await client.replaceNamespacedDeploymentScale({
|
await client.replaceNamespacedStatefulSetScale({
|
||||||
name: K8S_DEPLOYMENT_NAME,
|
name: K8S_STATEFULSET_NAME,
|
||||||
namespace: K8S_NAMESPACE,
|
namespace: K8S_NAMESPACE,
|
||||||
body: {
|
body: {
|
||||||
apiVersion: 'autoscaling/v1',
|
apiVersion: 'autoscaling/v1',
|
||||||
kind: 'Scale',
|
kind: 'Scale',
|
||||||
metadata: {
|
metadata: {
|
||||||
name: K8S_DEPLOYMENT_NAME,
|
name: K8S_STATEFULSET_NAME,
|
||||||
namespace: K8S_NAMESPACE,
|
namespace: K8S_NAMESPACE,
|
||||||
},
|
},
|
||||||
spec: {
|
spec: {
|
||||||
@@ -163,14 +159,14 @@ router.post('/k8s/scale', async (req: Request, res: Response) => {
|
|||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
console.log(`[Workers] Scaled ${K8S_DEPLOYMENT_NAME} from ${currentReplicas} to ${replicas} replicas`);
|
console.log(`[Workers] Scaled ${K8S_STATEFULSET_NAME} from ${currentReplicas} to ${replicas} replicas`);
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
success: true,
|
success: true,
|
||||||
message: `Scaled from ${currentReplicas} to ${replicas} replicas`,
|
message: `Scaled from ${currentReplicas} to ${replicas} replicas`,
|
||||||
previous: currentReplicas,
|
previous: currentReplicas,
|
||||||
desired: replicas,
|
desired: replicas,
|
||||||
deployment: K8S_DEPLOYMENT_NAME,
|
statefulset: K8S_STATEFULSET_NAME,
|
||||||
namespace: K8S_NAMESPACE,
|
namespace: K8S_NAMESPACE,
|
||||||
});
|
});
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
@@ -182,73 +178,6 @@ router.post('/k8s/scale', async (req: Request, res: Response) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/workers/k8s/scale-up - Scale up worker replicas by 1
|
|
||||||
* Convenience endpoint for adding a single worker
|
|
||||||
*/
|
|
||||||
router.post('/k8s/scale-up', async (_req: Request, res: Response) => {
|
|
||||||
const client = getK8sClient();
|
|
||||||
|
|
||||||
if (!client) {
|
|
||||||
return res.status(503).json({
|
|
||||||
success: false,
|
|
||||||
error: 'K8s client not available (not running in cluster or no kubeconfig)',
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Get current replica count
|
|
||||||
const currentResponse = await client.readNamespacedDeploymentScale({
|
|
||||||
name: K8S_DEPLOYMENT_NAME,
|
|
||||||
namespace: K8S_NAMESPACE,
|
|
||||||
});
|
|
||||||
const currentReplicas = currentResponse.spec?.replicas || 0;
|
|
||||||
const newReplicas = currentReplicas + 1;
|
|
||||||
|
|
||||||
// Cap at 20 replicas
|
|
||||||
if (newReplicas > 20) {
|
|
||||||
return res.status(400).json({
|
|
||||||
success: false,
|
|
||||||
error: 'Maximum replica count (20) reached',
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Scale up by 1
|
|
||||||
await client.replaceNamespacedDeploymentScale({
|
|
||||||
name: K8S_DEPLOYMENT_NAME,
|
|
||||||
namespace: K8S_NAMESPACE,
|
|
||||||
body: {
|
|
||||||
apiVersion: 'autoscaling/v1',
|
|
||||||
kind: 'Scale',
|
|
||||||
metadata: {
|
|
||||||
name: K8S_DEPLOYMENT_NAME,
|
|
||||||
namespace: K8S_NAMESPACE,
|
|
||||||
},
|
|
||||||
spec: {
|
|
||||||
replicas: newReplicas,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log(`[Workers] Scaled up ${K8S_DEPLOYMENT_NAME} from ${currentReplicas} to ${newReplicas} replicas`);
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
message: `Added worker (${currentReplicas} → ${newReplicas} replicas)`,
|
|
||||||
previous: currentReplicas,
|
|
||||||
desired: newReplicas,
|
|
||||||
deployment: K8S_DEPLOYMENT_NAME,
|
|
||||||
namespace: K8S_NAMESPACE,
|
|
||||||
});
|
|
||||||
} catch (err: any) {
|
|
||||||
console.error('[Workers] K8s scale-up error:', err.body?.message || err.message);
|
|
||||||
res.status(500).json({
|
|
||||||
success: false,
|
|
||||||
error: err.body?.message || err.message,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// STATIC ROUTES (must come before parameterized routes)
|
// STATIC ROUTES (must come before parameterized routes)
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|||||||
@@ -683,118 +683,6 @@ export class CrawlRotator {
|
|||||||
const current = this.proxy.getCurrent();
|
const current = this.proxy.getCurrent();
|
||||||
return current?.timezone;
|
return current?.timezone;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Preflight check - verifies proxy and anti-detect are working
|
|
||||||
* MUST be called before any task execution to ensure anonymity.
|
|
||||||
*
|
|
||||||
* Tests:
|
|
||||||
* 1. Proxy available - a proxy must be loaded and active
|
|
||||||
* 2. Proxy connectivity - makes HTTP request through proxy to verify connection
|
|
||||||
* 3. Anti-detect headers - verifies fingerprint is set with required headers
|
|
||||||
*
|
|
||||||
* @returns Promise<PreflightResult> with pass/fail status and details
|
|
||||||
*/
|
|
||||||
async preflight(): Promise<PreflightResult> {
|
|
||||||
const result: PreflightResult = {
|
|
||||||
passed: false,
|
|
||||||
proxyAvailable: false,
|
|
||||||
proxyConnected: false,
|
|
||||||
antidetectReady: false,
|
|
||||||
proxyIp: null,
|
|
||||||
fingerprint: null,
|
|
||||||
error: null,
|
|
||||||
responseTimeMs: null,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Step 1: Check proxy is available
|
|
||||||
const currentProxy = this.proxy.getCurrent();
|
|
||||||
if (!currentProxy) {
|
|
||||||
result.error = 'No proxy available';
|
|
||||||
console.log('[Preflight] FAILED - No proxy available');
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
result.proxyAvailable = true;
|
|
||||||
result.proxyIp = currentProxy.host;
|
|
||||||
|
|
||||||
// Step 2: Check fingerprint/anti-detect is ready
|
|
||||||
const fingerprint = this.userAgent.getCurrent();
|
|
||||||
if (!fingerprint || !fingerprint.userAgent) {
|
|
||||||
result.error = 'Anti-detect fingerprint not initialized';
|
|
||||||
console.log('[Preflight] FAILED - No fingerprint');
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
result.antidetectReady = true;
|
|
||||||
result.fingerprint = {
|
|
||||||
userAgent: fingerprint.userAgent,
|
|
||||||
browserName: fingerprint.browserName,
|
|
||||||
deviceCategory: fingerprint.deviceCategory,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Step 3: Test proxy connectivity with an actual HTTP request
|
|
||||||
// Use httpbin.org/ip to verify request goes through proxy
|
|
||||||
const proxyUrl = this.proxy.getProxyUrl(currentProxy);
|
|
||||||
const testUrl = 'https://httpbin.org/ip';
|
|
||||||
|
|
||||||
try {
|
|
||||||
const { default: axios } = await import('axios');
|
|
||||||
const { HttpsProxyAgent } = await import('https-proxy-agent');
|
|
||||||
|
|
||||||
const agent = new HttpsProxyAgent(proxyUrl);
|
|
||||||
const startTime = Date.now();
|
|
||||||
|
|
||||||
const response = await axios.get(testUrl, {
|
|
||||||
httpsAgent: agent,
|
|
||||||
timeout: 15000, // 15 second timeout
|
|
||||||
headers: {
|
|
||||||
'User-Agent': fingerprint.userAgent,
|
|
||||||
'Accept-Language': fingerprint.acceptLanguage,
|
|
||||||
...(fingerprint.secChUa && { 'sec-ch-ua': fingerprint.secChUa }),
|
|
||||||
...(fingerprint.secChUaPlatform && { 'sec-ch-ua-platform': fingerprint.secChUaPlatform }),
|
|
||||||
...(fingerprint.secChUaMobile && { 'sec-ch-ua-mobile': fingerprint.secChUaMobile }),
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
result.responseTimeMs = Date.now() - startTime;
|
|
||||||
result.proxyConnected = true;
|
|
||||||
result.passed = true;
|
|
||||||
|
|
||||||
// Mark success on proxy stats
|
|
||||||
await this.proxy.markSuccess(currentProxy.id, result.responseTimeMs);
|
|
||||||
|
|
||||||
console.log(`[Preflight] PASSED - Proxy ${currentProxy.host} connected (${result.responseTimeMs}ms), UA: ${fingerprint.browserName}/${fingerprint.deviceCategory}`);
|
|
||||||
} catch (err: any) {
|
|
||||||
result.error = `Proxy connection failed: ${err.message || 'Unknown error'}`;
|
|
||||||
console.log(`[Preflight] FAILED - Proxy connection error: ${err.message}`);
|
|
||||||
|
|
||||||
// Mark failure on proxy stats
|
|
||||||
await this.proxy.markFailed(currentProxy.id, err.message);
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Result from preflight check
|
|
||||||
*/
|
|
||||||
export interface PreflightResult {
|
|
||||||
/** Overall pass/fail */
|
|
||||||
passed: boolean;
|
|
||||||
/** Step 1: Is a proxy loaded? */
|
|
||||||
proxyAvailable: boolean;
|
|
||||||
/** Step 2: Did HTTP request through proxy succeed? */
|
|
||||||
proxyConnected: boolean;
|
|
||||||
/** Step 3: Is fingerprint/anti-detect ready? */
|
|
||||||
antidetectReady: boolean;
|
|
||||||
/** Current proxy IP */
|
|
||||||
proxyIp: string | null;
|
|
||||||
/** Fingerprint summary */
|
|
||||||
fingerprint: { userAgent: string; browserName: string; deviceCategory: string } | null;
|
|
||||||
/** Error message if failed */
|
|
||||||
error: string | null;
|
|
||||||
/** Proxy response time in ms */
|
|
||||||
responseTimeMs: number | null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|||||||
@@ -1,100 +0,0 @@
|
|||||||
/**
|
|
||||||
* Curl Preflight - Verify curl/axios transport works through proxy
|
|
||||||
*
|
|
||||||
* Tests:
|
|
||||||
* 1. Proxy is available and active
|
|
||||||
* 2. HTTP request through proxy succeeds
|
|
||||||
* 3. Anti-detect headers are properly set
|
|
||||||
*
|
|
||||||
* Use case: Fast, simple API requests that don't need browser fingerprint
|
|
||||||
*/
|
|
||||||
|
|
||||||
import axios from 'axios';
|
|
||||||
import { HttpsProxyAgent } from 'https-proxy-agent';
|
|
||||||
import { CrawlRotator, PreflightResult } from './crawl-rotator';
|
|
||||||
|
|
||||||
export interface CurlPreflightResult extends PreflightResult {
|
|
||||||
method: 'curl';
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Run curl preflight check
|
|
||||||
* Tests proxy connectivity using axios/curl through the proxy
|
|
||||||
*/
|
|
||||||
export async function runCurlPreflight(
|
|
||||||
crawlRotator: CrawlRotator
|
|
||||||
): Promise<CurlPreflightResult> {
|
|
||||||
const result: CurlPreflightResult = {
|
|
||||||
method: 'curl',
|
|
||||||
passed: false,
|
|
||||||
proxyAvailable: false,
|
|
||||||
proxyConnected: false,
|
|
||||||
antidetectReady: false,
|
|
||||||
proxyIp: null,
|
|
||||||
fingerprint: null,
|
|
||||||
error: null,
|
|
||||||
responseTimeMs: null,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Step 1: Check proxy is available
|
|
||||||
const currentProxy = crawlRotator.proxy.getCurrent();
|
|
||||||
if (!currentProxy) {
|
|
||||||
result.error = 'No proxy available';
|
|
||||||
console.log('[CurlPreflight] FAILED - No proxy available');
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
result.proxyAvailable = true;
|
|
||||||
result.proxyIp = currentProxy.host;
|
|
||||||
|
|
||||||
// Step 2: Check fingerprint/anti-detect is ready
|
|
||||||
const fingerprint = crawlRotator.userAgent.getCurrent();
|
|
||||||
if (!fingerprint || !fingerprint.userAgent) {
|
|
||||||
result.error = 'Anti-detect fingerprint not initialized';
|
|
||||||
console.log('[CurlPreflight] FAILED - No fingerprint');
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
result.antidetectReady = true;
|
|
||||||
result.fingerprint = {
|
|
||||||
userAgent: fingerprint.userAgent,
|
|
||||||
browserName: fingerprint.browserName,
|
|
||||||
deviceCategory: fingerprint.deviceCategory,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Step 3: Test proxy connectivity with an actual HTTP request
|
|
||||||
const proxyUrl = crawlRotator.proxy.getProxyUrl(currentProxy);
|
|
||||||
const testUrl = 'https://httpbin.org/ip';
|
|
||||||
|
|
||||||
try {
|
|
||||||
const agent = new HttpsProxyAgent(proxyUrl);
|
|
||||||
const startTime = Date.now();
|
|
||||||
|
|
||||||
const response = await axios.get(testUrl, {
|
|
||||||
httpsAgent: agent,
|
|
||||||
timeout: 15000, // 15 second timeout
|
|
||||||
headers: {
|
|
||||||
'User-Agent': fingerprint.userAgent,
|
|
||||||
'Accept-Language': fingerprint.acceptLanguage,
|
|
||||||
...(fingerprint.secChUa && { 'sec-ch-ua': fingerprint.secChUa }),
|
|
||||||
...(fingerprint.secChUaPlatform && { 'sec-ch-ua-platform': fingerprint.secChUaPlatform }),
|
|
||||||
...(fingerprint.secChUaMobile && { 'sec-ch-ua-mobile': fingerprint.secChUaMobile }),
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
result.responseTimeMs = Date.now() - startTime;
|
|
||||||
result.proxyConnected = true;
|
|
||||||
result.passed = true;
|
|
||||||
|
|
||||||
// Mark success on proxy stats
|
|
||||||
await crawlRotator.proxy.markSuccess(currentProxy.id, result.responseTimeMs);
|
|
||||||
|
|
||||||
console.log(`[CurlPreflight] PASSED - Proxy ${currentProxy.host} connected (${result.responseTimeMs}ms), UA: ${fingerprint.browserName}/${fingerprint.deviceCategory}`);
|
|
||||||
} catch (err: any) {
|
|
||||||
result.error = `Proxy connection failed: ${err.message || 'Unknown error'}`;
|
|
||||||
console.log(`[CurlPreflight] FAILED - Proxy connection error: ${err.message}`);
|
|
||||||
|
|
||||||
// Mark failure on proxy stats
|
|
||||||
await crawlRotator.proxy.markFailed(currentProxy.id, err.message);
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
@@ -1,399 +0,0 @@
|
|||||||
/**
|
|
||||||
* Puppeteer Preflight - Verify browser-based transport works with anti-detect
|
|
||||||
*
|
|
||||||
* Uses Puppeteer + StealthPlugin to:
|
|
||||||
* 1. Launch headless browser with stealth mode + PROXY
|
|
||||||
* 2. Visit fingerprint.com demo to verify anti-detect and confirm proxy IP
|
|
||||||
* 3. Establish session by visiting Dutchie embedded menu
|
|
||||||
* 4. Make GraphQL request from browser context
|
|
||||||
* 5. Verify we get a valid response (not blocked)
|
|
||||||
*
|
|
||||||
* Use case: Anti-detect scraping that needs real browser fingerprint through proxy
|
|
||||||
*
|
|
||||||
* Based on test-intercept.js which successfully captures 1000+ products
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { PreflightResult, CrawlRotator } from './crawl-rotator';
|
|
||||||
|
|
||||||
// GraphQL hash for FilteredProducts query - MUST match CLAUDE.md
|
|
||||||
const FILTERED_PRODUCTS_HASH = 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0';
|
|
||||||
|
|
||||||
// Test dispensary - AZ-Deeply-Rooted (known working)
|
|
||||||
const TEST_CNAME = 'AZ-Deeply-Rooted';
|
|
||||||
const TEST_PLATFORM_ID = '6405ef617056e8014d79101b';
|
|
||||||
|
|
||||||
// Anti-detect verification sites (primary + fallback)
|
|
||||||
const FINGERPRINT_DEMO_URL = 'https://demo.fingerprint.com/';
|
|
||||||
const AMIUNIQUE_URL = 'https://amiunique.org/fingerprint';
|
|
||||||
|
|
||||||
export interface PuppeteerPreflightResult extends PreflightResult {
|
|
||||||
method: 'http';
|
|
||||||
/** Number of products returned (proves API access) */
|
|
||||||
productsReturned?: number;
|
|
||||||
/** Browser user agent used */
|
|
||||||
browserUserAgent?: string;
|
|
||||||
/** Bot detection result from fingerprint.com */
|
|
||||||
botDetection?: {
|
|
||||||
detected: boolean;
|
|
||||||
probability?: number;
|
|
||||||
type?: string;
|
|
||||||
};
|
|
||||||
/** Expected proxy IP (from pool) */
|
|
||||||
expectedProxyIp?: string;
|
|
||||||
/** Whether IP verification passed (detected IP matches proxy) */
|
|
||||||
ipVerified?: boolean;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Run Puppeteer preflight check with proxy
|
|
||||||
* Tests browser-based access with anti-detect verification via fingerprint.com
|
|
||||||
*
|
|
||||||
* @param crawlRotator - CrawlRotator instance to get proxy from pool
|
|
||||||
*/
|
|
||||||
export async function runPuppeteerPreflight(
|
|
||||||
crawlRotator?: CrawlRotator
|
|
||||||
): Promise<PuppeteerPreflightResult> {
|
|
||||||
const result: PuppeteerPreflightResult = {
|
|
||||||
method: 'http',
|
|
||||||
passed: false,
|
|
||||||
proxyAvailable: false,
|
|
||||||
proxyConnected: false,
|
|
||||||
antidetectReady: false,
|
|
||||||
proxyIp: null,
|
|
||||||
fingerprint: null,
|
|
||||||
error: null,
|
|
||||||
responseTimeMs: null,
|
|
||||||
productsReturned: 0,
|
|
||||||
ipVerified: false,
|
|
||||||
};
|
|
||||||
|
|
||||||
let browser: any = null;
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Step 0: Get a proxy from the pool
|
|
||||||
let proxyUrl: string | null = null;
|
|
||||||
let expectedProxyHost: string | null = null;
|
|
||||||
|
|
||||||
if (crawlRotator) {
|
|
||||||
const currentProxy = crawlRotator.proxy.getCurrent();
|
|
||||||
if (currentProxy) {
|
|
||||||
result.proxyAvailable = true;
|
|
||||||
proxyUrl = crawlRotator.proxy.getProxyUrl(currentProxy);
|
|
||||||
expectedProxyHost = currentProxy.host;
|
|
||||||
result.expectedProxyIp = expectedProxyHost;
|
|
||||||
console.log(`[PuppeteerPreflight] Using proxy: ${currentProxy.host}:${currentProxy.port}`);
|
|
||||||
} else {
|
|
||||||
result.error = 'No proxy available from pool';
|
|
||||||
console.log(`[PuppeteerPreflight] FAILED - No proxy available`);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
console.log(`[PuppeteerPreflight] WARNING: No CrawlRotator provided - using direct connection`);
|
|
||||||
result.proxyAvailable = true; // No proxy needed for direct
|
|
||||||
}
|
|
||||||
|
|
||||||
// Dynamic imports to avoid loading Puppeteer unless needed
|
|
||||||
const puppeteer = require('puppeteer-extra');
|
|
||||||
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
|
|
||||||
puppeteer.use(StealthPlugin());
|
|
||||||
|
|
||||||
const startTime = Date.now();
|
|
||||||
|
|
||||||
// Build browser args
|
|
||||||
const browserArgs = ['--no-sandbox', '--disable-setuid-sandbox'];
|
|
||||||
if (proxyUrl) {
|
|
||||||
// Extract host:port for Puppeteer (it handles auth separately)
|
|
||||||
const proxyUrlParsed = new URL(proxyUrl);
|
|
||||||
browserArgs.push(`--proxy-server=${proxyUrlParsed.host}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Launch browser with stealth + proxy
|
|
||||||
browser = await puppeteer.launch({
|
|
||||||
headless: 'new',
|
|
||||||
args: browserArgs,
|
|
||||||
});
|
|
||||||
|
|
||||||
const page = await browser.newPage();
|
|
||||||
|
|
||||||
// If proxy has auth, set it up
|
|
||||||
if (proxyUrl) {
|
|
||||||
const proxyUrlParsed = new URL(proxyUrl);
|
|
||||||
if (proxyUrlParsed.username && proxyUrlParsed.password) {
|
|
||||||
await page.authenticate({
|
|
||||||
username: decodeURIComponent(proxyUrlParsed.username),
|
|
||||||
password: decodeURIComponent(proxyUrlParsed.password),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get browser user agent
|
|
||||||
const userAgent = await page.evaluate(() => navigator.userAgent);
|
|
||||||
result.browserUserAgent = userAgent;
|
|
||||||
result.fingerprint = {
|
|
||||||
userAgent,
|
|
||||||
browserName: 'Chrome (Puppeteer)',
|
|
||||||
deviceCategory: 'desktop',
|
|
||||||
};
|
|
||||||
|
|
||||||
// =========================================================================
|
|
||||||
// STEP 1: Visit fingerprint.com demo to verify anti-detect and get IP
|
|
||||||
// =========================================================================
|
|
||||||
console.log(`[PuppeteerPreflight] Testing anti-detect at ${FINGERPRINT_DEMO_URL}...`);
|
|
||||||
|
|
||||||
try {
|
|
||||||
await page.goto(FINGERPRINT_DEMO_URL, {
|
|
||||||
waitUntil: 'networkidle2',
|
|
||||||
timeout: 30000,
|
|
||||||
});
|
|
||||||
|
|
||||||
result.proxyConnected = true; // If we got here, proxy is working
|
|
||||||
|
|
||||||
// Wait for fingerprint results to load
|
|
||||||
await page.waitForSelector('[data-test="visitor-id"]', { timeout: 10000 }).catch(() => {});
|
|
||||||
|
|
||||||
// Extract fingerprint data from the page
|
|
||||||
const fingerprintData = await page.evaluate(() => {
|
|
||||||
// Try to find the IP address displayed on the page
|
|
||||||
const ipElement = document.querySelector('[data-test="ip-address"]');
|
|
||||||
const ip = ipElement?.textContent?.trim() || null;
|
|
||||||
|
|
||||||
// Try to find bot detection info
|
|
||||||
const botElement = document.querySelector('[data-test="bot-detected"]');
|
|
||||||
const botDetected = botElement?.textContent?.toLowerCase().includes('true') || false;
|
|
||||||
|
|
||||||
// Try to find visitor ID (proves fingerprinting worked)
|
|
||||||
const visitorIdElement = document.querySelector('[data-test="visitor-id"]');
|
|
||||||
const visitorId = visitorIdElement?.textContent?.trim() || null;
|
|
||||||
|
|
||||||
// Alternative: look for common UI patterns if data-test attrs not present
|
|
||||||
let detectedIp = ip;
|
|
||||||
if (!detectedIp) {
|
|
||||||
// Look for IP in any element containing IP-like pattern
|
|
||||||
const allText = document.body.innerText;
|
|
||||||
const ipMatch = allText.match(/\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b/);
|
|
||||||
detectedIp = ipMatch ? ipMatch[1] : null;
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
ip: detectedIp,
|
|
||||||
botDetected,
|
|
||||||
visitorId,
|
|
||||||
pageLoaded: !!document.body,
|
|
||||||
};
|
|
||||||
});
|
|
||||||
|
|
||||||
if (fingerprintData.ip) {
|
|
||||||
result.proxyIp = fingerprintData.ip;
|
|
||||||
console.log(`[PuppeteerPreflight] Detected IP: ${fingerprintData.ip}`);
|
|
||||||
|
|
||||||
// Verify IP matches expected proxy
|
|
||||||
if (expectedProxyHost) {
|
|
||||||
// Check if detected IP contains the proxy host (or is close match)
|
|
||||||
if (fingerprintData.ip === expectedProxyHost ||
|
|
||||||
expectedProxyHost.includes(fingerprintData.ip) ||
|
|
||||||
fingerprintData.ip.includes(expectedProxyHost.split('.').slice(0, 3).join('.'))) {
|
|
||||||
result.ipVerified = true;
|
|
||||||
console.log(`[PuppeteerPreflight] IP VERIFIED - matches proxy`);
|
|
||||||
} else {
|
|
||||||
console.log(`[PuppeteerPreflight] IP mismatch: expected ${expectedProxyHost}, got ${fingerprintData.ip}`);
|
|
||||||
// Don't fail - residential proxies often show different egress IPs
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fingerprintData.visitorId) {
|
|
||||||
console.log(`[PuppeteerPreflight] Fingerprint visitor ID: ${fingerprintData.visitorId}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
result.botDetection = {
|
|
||||||
detected: fingerprintData.botDetected,
|
|
||||||
};
|
|
||||||
|
|
||||||
if (fingerprintData.botDetected) {
|
|
||||||
console.log(`[PuppeteerPreflight] WARNING: Bot detection triggered!`);
|
|
||||||
} else {
|
|
||||||
console.log(`[PuppeteerPreflight] Anti-detect check: NOT detected as bot`);
|
|
||||||
result.antidetectReady = true;
|
|
||||||
}
|
|
||||||
} catch (fpErr: any) {
|
|
||||||
// Could mean proxy connection failed
|
|
||||||
console.log(`[PuppeteerPreflight] Fingerprint.com check failed: ${fpErr.message}`);
|
|
||||||
if (fpErr.message.includes('net::ERR_PROXY') || fpErr.message.includes('ECONNREFUSED')) {
|
|
||||||
result.error = `Proxy connection failed: ${fpErr.message}`;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try fallback: amiunique.org
|
|
||||||
console.log(`[PuppeteerPreflight] Trying fallback: ${AMIUNIQUE_URL}...`);
|
|
||||||
try {
|
|
||||||
await page.goto(AMIUNIQUE_URL, {
|
|
||||||
waitUntil: 'networkidle2',
|
|
||||||
timeout: 30000,
|
|
||||||
});
|
|
||||||
|
|
||||||
result.proxyConnected = true;
|
|
||||||
|
|
||||||
// Extract IP from amiunique.org page
|
|
||||||
const amiData = await page.evaluate(() => {
|
|
||||||
const allText = document.body.innerText;
|
|
||||||
const ipMatch = allText.match(/\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b/);
|
|
||||||
return {
|
|
||||||
ip: ipMatch ? ipMatch[1] : null,
|
|
||||||
pageLoaded: !!document.body,
|
|
||||||
};
|
|
||||||
});
|
|
||||||
|
|
||||||
if (amiData.ip) {
|
|
||||||
result.proxyIp = amiData.ip;
|
|
||||||
console.log(`[PuppeteerPreflight] Detected IP via amiunique.org: ${amiData.ip}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
result.antidetectReady = true;
|
|
||||||
console.log(`[PuppeteerPreflight] amiunique.org fallback succeeded`);
|
|
||||||
} catch (amiErr: any) {
|
|
||||||
console.log(`[PuppeteerPreflight] amiunique.org fallback also failed: ${amiErr.message}`);
|
|
||||||
// Continue with Dutchie test anyway
|
|
||||||
result.proxyConnected = true;
|
|
||||||
result.antidetectReady = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// =========================================================================
|
|
||||||
// STEP 2: Test Dutchie API access (the real test)
|
|
||||||
// =========================================================================
|
|
||||||
const embedUrl = `https://dutchie.com/embedded-menu/${TEST_CNAME}?menuType=rec`;
|
|
||||||
console.log(`[PuppeteerPreflight] Establishing session at ${embedUrl}...`);
|
|
||||||
|
|
||||||
await page.goto(embedUrl, {
|
|
||||||
waitUntil: 'networkidle2',
|
|
||||||
timeout: 30000,
|
|
||||||
});
|
|
||||||
|
|
||||||
// Make GraphQL request from browser context
|
|
||||||
const graphqlResult = await page.evaluate(
|
|
||||||
async (platformId: string, hash: string) => {
|
|
||||||
try {
|
|
||||||
const variables = {
|
|
||||||
includeEnterpriseSpecials: false,
|
|
||||||
productsFilter: {
|
|
||||||
dispensaryId: platformId,
|
|
||||||
pricingType: 'rec',
|
|
||||||
Status: 'Active', // CRITICAL: Must be 'Active' per CLAUDE.md
|
|
||||||
types: [],
|
|
||||||
useCache: true,
|
|
||||||
isDefaultSort: true,
|
|
||||||
sortBy: 'popularSortIdx',
|
|
||||||
sortDirection: 1,
|
|
||||||
bypassOnlineThresholds: true,
|
|
||||||
isKioskMenu: false,
|
|
||||||
removeProductsBelowOptionThresholds: false,
|
|
||||||
},
|
|
||||||
page: 0,
|
|
||||||
perPage: 10, // Just need a few to prove it works
|
|
||||||
};
|
|
||||||
|
|
||||||
const extensions = {
|
|
||||||
persistedQuery: {
|
|
||||||
version: 1,
|
|
||||||
sha256Hash: hash,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
const qs = new URLSearchParams({
|
|
||||||
operationName: 'FilteredProducts',
|
|
||||||
variables: JSON.stringify(variables),
|
|
||||||
extensions: JSON.stringify(extensions),
|
|
||||||
});
|
|
||||||
|
|
||||||
const url = `https://dutchie.com/api-3/graphql?${qs.toString()}`;
|
|
||||||
const sessionId = 'preflight-' + Date.now();
|
|
||||||
|
|
||||||
const response = await fetch(url, {
|
|
||||||
method: 'GET',
|
|
||||||
headers: {
|
|
||||||
Accept: 'application/json',
|
|
||||||
'content-type': 'application/json',
|
|
||||||
'x-dutchie-session': sessionId,
|
|
||||||
'apollographql-client-name': 'Marketplace (production)',
|
|
||||||
},
|
|
||||||
credentials: 'include',
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!response.ok) {
|
|
||||||
return { error: `HTTP ${response.status}`, products: 0 };
|
|
||||||
}
|
|
||||||
|
|
||||||
const json = await response.json();
|
|
||||||
|
|
||||||
if (json.errors) {
|
|
||||||
return { error: JSON.stringify(json.errors).slice(0, 200), products: 0 };
|
|
||||||
}
|
|
||||||
|
|
||||||
const products = json?.data?.filteredProducts?.products || [];
|
|
||||||
return { error: null, products: products.length };
|
|
||||||
} catch (err: any) {
|
|
||||||
return { error: err.message || 'Unknown error', products: 0 };
|
|
||||||
}
|
|
||||||
},
|
|
||||||
TEST_PLATFORM_ID,
|
|
||||||
FILTERED_PRODUCTS_HASH
|
|
||||||
);
|
|
||||||
|
|
||||||
result.responseTimeMs = Date.now() - startTime;
|
|
||||||
|
|
||||||
if (graphqlResult.error) {
|
|
||||||
result.error = `GraphQL error: ${graphqlResult.error}`;
|
|
||||||
console.log(`[PuppeteerPreflight] FAILED - ${result.error}`);
|
|
||||||
} else if (graphqlResult.products === 0) {
|
|
||||||
result.error = 'GraphQL returned 0 products';
|
|
||||||
console.log(`[PuppeteerPreflight] FAILED - No products returned`);
|
|
||||||
} else {
|
|
||||||
result.passed = true;
|
|
||||||
result.productsReturned = graphqlResult.products;
|
|
||||||
console.log(
|
|
||||||
`[PuppeteerPreflight] PASSED - Got ${graphqlResult.products} products in ${result.responseTimeMs}ms`
|
|
||||||
);
|
|
||||||
if (result.proxyIp) {
|
|
||||||
console.log(`[PuppeteerPreflight] Browser IP via proxy: ${result.proxyIp}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (err: any) {
|
|
||||||
result.error = `Browser error: ${err.message || 'Unknown error'}`;
|
|
||||||
console.log(`[PuppeteerPreflight] FAILED - ${result.error}`);
|
|
||||||
} finally {
|
|
||||||
if (browser) {
|
|
||||||
await browser.close().catch(() => {});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Run Puppeteer preflight with retry
|
|
||||||
* Retries once on failure to handle transient issues
|
|
||||||
*
|
|
||||||
* @param crawlRotator - CrawlRotator instance to get proxy from pool
|
|
||||||
* @param maxRetries - Number of retry attempts (default 1)
|
|
||||||
*/
|
|
||||||
export async function runPuppeteerPreflightWithRetry(
|
|
||||||
crawlRotator?: CrawlRotator,
|
|
||||||
maxRetries: number = 1
|
|
||||||
): Promise<PuppeteerPreflightResult> {
|
|
||||||
let lastResult: PuppeteerPreflightResult | null = null;
|
|
||||||
|
|
||||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
||||||
if (attempt > 0) {
|
|
||||||
console.log(`[PuppeteerPreflight] Retry attempt ${attempt}/${maxRetries}...`);
|
|
||||||
await new Promise((r) => setTimeout(r, 5000)); // Wait 5s between retries
|
|
||||||
}
|
|
||||||
|
|
||||||
lastResult = await runPuppeteerPreflight(crawlRotator);
|
|
||||||
|
|
||||||
if (lastResult.passed) {
|
|
||||||
return lastResult;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return lastResult!;
|
|
||||||
}
|
|
||||||
@@ -1,30 +1,566 @@
|
|||||||
/**
|
/**
|
||||||
* System API Routes (Stub)
|
* System API Routes
|
||||||
*
|
*
|
||||||
* The full system routes depend on SyncOrchestrator which was moved to _deprecated.
|
* Provides REST API endpoints for system monitoring and control:
|
||||||
* This stub provides empty routers to maintain backward compatibility.
|
* - /api/system/sync/* - Sync orchestrator
|
||||||
|
* - /api/system/dlq/* - Dead-letter queue
|
||||||
|
* - /api/system/integrity/* - Integrity checks
|
||||||
|
* - /api/system/fix/* - Auto-fix routines
|
||||||
|
* - /api/system/alerts/* - System alerts
|
||||||
|
* - /metrics - Prometheus metrics
|
||||||
*
|
*
|
||||||
* Full implementation available at: src/_deprecated/system/routes/index.ts
|
* Phase 5: Full Production Sync + Monitoring
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { Router, Request, Response } from 'express';
|
import { Router, Request, Response } from 'express';
|
||||||
import { Pool } from 'pg';
|
import { Pool } from 'pg';
|
||||||
import { MetricsService } from '../services';
|
import {
|
||||||
|
SyncOrchestrator,
|
||||||
|
MetricsService,
|
||||||
|
DLQService,
|
||||||
|
AlertService,
|
||||||
|
IntegrityService,
|
||||||
|
AutoFixService,
|
||||||
|
} from '../services';
|
||||||
|
|
||||||
export function createSystemRouter(_pool: Pool): Router {
|
export function createSystemRouter(pool: Pool): Router {
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
// Stub - full sync/dlq/integrity/fix/alerts routes moved to _deprecated
|
// Initialize services
|
||||||
router.get('/status', (_req: Request, res: Response) => {
|
const metrics = new MetricsService(pool);
|
||||||
res.json({
|
const dlq = new DLQService(pool);
|
||||||
message: 'System routes temporarily disabled - see _deprecated/system/routes',
|
const alerts = new AlertService(pool);
|
||||||
status: 'stub',
|
const integrity = new IntegrityService(pool, alerts);
|
||||||
});
|
const autoFix = new AutoFixService(pool, alerts);
|
||||||
|
const orchestrator = new SyncOrchestrator(pool, metrics, dlq, alerts);
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// SYNC ORCHESTRATOR ENDPOINTS
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/sync/status
|
||||||
|
* Get current sync status
|
||||||
|
*/
|
||||||
|
router.get('/sync/status', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const status = await orchestrator.getStatus();
|
||||||
|
res.json(status);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Sync status error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get sync status' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/sync/run
|
||||||
|
* Trigger a sync run
|
||||||
|
*/
|
||||||
|
router.post('/sync/run', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const triggeredBy = req.body.triggeredBy || 'api';
|
||||||
|
const result = await orchestrator.runSync();
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
triggeredBy,
|
||||||
|
metrics: result,
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Sync run error:', error);
|
||||||
|
res.status(500).json({
|
||||||
|
success: false,
|
||||||
|
error: error instanceof Error ? error.message : 'Sync run failed',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/sync/queue-depth
|
||||||
|
* Get queue depth information
|
||||||
|
*/
|
||||||
|
router.get('/sync/queue-depth', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const depth = await orchestrator.getQueueDepth();
|
||||||
|
res.json(depth);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Queue depth error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get queue depth' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/sync/health
|
||||||
|
* Get sync health status
|
||||||
|
*/
|
||||||
|
router.get('/sync/health', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const health = await orchestrator.getHealth();
|
||||||
|
res.status(health.healthy ? 200 : 503).json(health);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Health check error:', error);
|
||||||
|
res.status(500).json({ healthy: false, error: 'Health check failed' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/sync/pause
|
||||||
|
* Pause the orchestrator
|
||||||
|
*/
|
||||||
|
router.post('/sync/pause', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const reason = req.body.reason || 'Manual pause';
|
||||||
|
await orchestrator.pause(reason);
|
||||||
|
res.json({ success: true, message: 'Orchestrator paused' });
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Pause error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to pause orchestrator' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/sync/resume
|
||||||
|
* Resume the orchestrator
|
||||||
|
*/
|
||||||
|
router.post('/sync/resume', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
await orchestrator.resume();
|
||||||
|
res.json({ success: true, message: 'Orchestrator resumed' });
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Resume error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to resume orchestrator' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// DLQ ENDPOINTS
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/dlq
|
||||||
|
* List DLQ payloads
|
||||||
|
*/
|
||||||
|
router.get('/dlq', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const options = {
|
||||||
|
status: req.query.status as string,
|
||||||
|
errorType: req.query.errorType as string,
|
||||||
|
dispensaryId: req.query.dispensaryId ? parseInt(req.query.dispensaryId as string) : undefined,
|
||||||
|
limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
|
||||||
|
offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await dlq.listPayloads(options);
|
||||||
|
res.json(result);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] DLQ list error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to list DLQ payloads' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/dlq/stats
|
||||||
|
* Get DLQ statistics
|
||||||
|
*/
|
||||||
|
router.get('/dlq/stats', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const stats = await dlq.getStats();
|
||||||
|
res.json(stats);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] DLQ stats error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get DLQ stats' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/dlq/summary
|
||||||
|
* Get DLQ summary by error type
|
||||||
|
*/
|
||||||
|
router.get('/dlq/summary', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const summary = await dlq.getSummary();
|
||||||
|
res.json(summary);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] DLQ summary error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get DLQ summary' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/dlq/:id
|
||||||
|
* Get a specific DLQ payload
|
||||||
|
*/
|
||||||
|
router.get('/dlq/:id', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const payload = await dlq.getPayload(req.params.id);
|
||||||
|
if (!payload) {
|
||||||
|
return res.status(404).json({ error: 'Payload not found' });
|
||||||
|
}
|
||||||
|
res.json(payload);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] DLQ get error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get DLQ payload' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/dlq/:id/retry
|
||||||
|
* Retry a DLQ payload
|
||||||
|
*/
|
||||||
|
router.post('/dlq/:id/retry', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const result = await dlq.retryPayload(req.params.id);
|
||||||
|
if (result.success) {
|
||||||
|
res.json(result);
|
||||||
|
} else {
|
||||||
|
res.status(400).json(result);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] DLQ retry error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to retry payload' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/dlq/:id/abandon
|
||||||
|
* Abandon a DLQ payload
|
||||||
|
*/
|
||||||
|
router.post('/dlq/:id/abandon', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const reason = req.body.reason || 'Manually abandoned';
|
||||||
|
const abandonedBy = req.body.abandonedBy || 'api';
|
||||||
|
const success = await dlq.abandonPayload(req.params.id, reason, abandonedBy);
|
||||||
|
res.json({ success });
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] DLQ abandon error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to abandon payload' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/dlq/bulk-retry
|
||||||
|
* Bulk retry payloads by error type
|
||||||
|
*/
|
||||||
|
router.post('/dlq/bulk-retry', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { errorType } = req.body;
|
||||||
|
if (!errorType) {
|
||||||
|
return res.status(400).json({ error: 'errorType is required' });
|
||||||
|
}
|
||||||
|
const result = await dlq.bulkRetryByErrorType(errorType);
|
||||||
|
res.json(result);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] DLQ bulk retry error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to bulk retry' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// INTEGRITY CHECK ENDPOINTS
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/integrity/run
|
||||||
|
* Run all integrity checks
|
||||||
|
*/
|
||||||
|
router.post('/integrity/run', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const triggeredBy = req.body.triggeredBy || 'api';
|
||||||
|
const result = await integrity.runAllChecks(triggeredBy);
|
||||||
|
res.json(result);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Integrity run error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to run integrity checks' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/integrity/runs
|
||||||
|
* Get recent integrity check runs
|
||||||
|
*/
|
||||||
|
router.get('/integrity/runs', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const limit = req.query.limit ? parseInt(req.query.limit as string) : 10;
|
||||||
|
const runs = await integrity.getRecentRuns(limit);
|
||||||
|
res.json(runs);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Integrity runs error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get integrity runs' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/integrity/runs/:runId
|
||||||
|
* Get results for a specific integrity run
|
||||||
|
*/
|
||||||
|
router.get('/integrity/runs/:runId', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const results = await integrity.getRunResults(req.params.runId);
|
||||||
|
res.json(results);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Integrity run results error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get run results' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// AUTO-FIX ENDPOINTS
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/fix/routines
|
||||||
|
* Get available fix routines
|
||||||
|
*/
|
||||||
|
router.get('/fix/routines', (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const routines = autoFix.getAvailableRoutines();
|
||||||
|
res.json(routines);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Get routines error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get routines' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/fix/:routine
|
||||||
|
* Run a fix routine
|
||||||
|
*/
|
||||||
|
router.post('/fix/:routine', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const routineName = req.params.routine;
|
||||||
|
const dryRun = req.body.dryRun === true;
|
||||||
|
const triggeredBy = req.body.triggeredBy || 'api';
|
||||||
|
|
||||||
|
const result = await autoFix.runRoutine(routineName as any, triggeredBy, { dryRun });
|
||||||
|
res.json(result);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Fix routine error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to run fix routine' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/fix/runs
|
||||||
|
* Get recent fix runs
|
||||||
|
*/
|
||||||
|
router.get('/fix/runs', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const limit = req.query.limit ? parseInt(req.query.limit as string) : 20;
|
||||||
|
const runs = await autoFix.getRecentRuns(limit);
|
||||||
|
res.json(runs);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Fix runs error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get fix runs' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// ALERTS ENDPOINTS
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/alerts
|
||||||
|
* List alerts
|
||||||
|
*/
|
||||||
|
router.get('/alerts', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const options = {
|
||||||
|
status: req.query.status as any,
|
||||||
|
severity: req.query.severity as any,
|
||||||
|
type: req.query.type as string,
|
||||||
|
limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
|
||||||
|
offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await alerts.listAlerts(options);
|
||||||
|
res.json(result);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Alerts list error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to list alerts' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/alerts/active
|
||||||
|
* Get active alerts
|
||||||
|
*/
|
||||||
|
router.get('/alerts/active', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const activeAlerts = await alerts.getActiveAlerts();
|
||||||
|
res.json(activeAlerts);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Active alerts error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get active alerts' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/alerts/summary
|
||||||
|
* Get alert summary
|
||||||
|
*/
|
||||||
|
router.get('/alerts/summary', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const summary = await alerts.getSummary();
|
||||||
|
res.json(summary);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Alerts summary error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get alerts summary' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/alerts/:id/acknowledge
|
||||||
|
* Acknowledge an alert
|
||||||
|
*/
|
||||||
|
router.post('/alerts/:id/acknowledge', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const alertId = parseInt(req.params.id);
|
||||||
|
const acknowledgedBy = req.body.acknowledgedBy || 'api';
|
||||||
|
const success = await alerts.acknowledgeAlert(alertId, acknowledgedBy);
|
||||||
|
res.json({ success });
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Acknowledge alert error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to acknowledge alert' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/alerts/:id/resolve
|
||||||
|
* Resolve an alert
|
||||||
|
*/
|
||||||
|
router.post('/alerts/:id/resolve', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const alertId = parseInt(req.params.id);
|
||||||
|
const resolvedBy = req.body.resolvedBy || 'api';
|
||||||
|
const success = await alerts.resolveAlert(alertId, resolvedBy);
|
||||||
|
res.json({ success });
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Resolve alert error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to resolve alert' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/alerts/bulk-acknowledge
|
||||||
|
* Bulk acknowledge alerts
|
||||||
|
*/
|
||||||
|
router.post('/alerts/bulk-acknowledge', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { ids, acknowledgedBy } = req.body;
|
||||||
|
if (!ids || !Array.isArray(ids)) {
|
||||||
|
return res.status(400).json({ error: 'ids array is required' });
|
||||||
|
}
|
||||||
|
const count = await alerts.bulkAcknowledge(ids, acknowledgedBy || 'api');
|
||||||
|
res.json({ acknowledged: count });
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Bulk acknowledge error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to bulk acknowledge' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// METRICS ENDPOINTS
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/metrics
|
||||||
|
* Get all current metrics
|
||||||
|
*/
|
||||||
|
router.get('/metrics', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const allMetrics = await metrics.getAllMetrics();
|
||||||
|
res.json(allMetrics);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Metrics error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get metrics' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/metrics/:name
|
||||||
|
* Get a specific metric
|
||||||
|
*/
|
||||||
|
router.get('/metrics/:name', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const metric = await metrics.getMetric(req.params.name);
|
||||||
|
if (!metric) {
|
||||||
|
return res.status(404).json({ error: 'Metric not found' });
|
||||||
|
}
|
||||||
|
res.json(metric);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Metric error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get metric' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/metrics/:name/history
|
||||||
|
* Get metric time series
|
||||||
|
*/
|
||||||
|
router.get('/metrics/:name/history', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const hours = req.query.hours ? parseInt(req.query.hours as string) : 24;
|
||||||
|
const history = await metrics.getMetricHistory(req.params.name, hours);
|
||||||
|
res.json(history);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Metric history error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get metric history' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/errors
|
||||||
|
* Get error summary
|
||||||
|
*/
|
||||||
|
router.get('/errors', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const summary = await metrics.getErrorSummary();
|
||||||
|
res.json(summary);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Error summary error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get error summary' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/errors/recent
|
||||||
|
* Get recent errors
|
||||||
|
*/
|
||||||
|
router.get('/errors/recent', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const limit = req.query.limit ? parseInt(req.query.limit as string) : 50;
|
||||||
|
const errorType = req.query.type as string;
|
||||||
|
const errors = await metrics.getRecentErrors(limit, errorType);
|
||||||
|
res.json(errors);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Recent errors error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get recent errors' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/errors/acknowledge
|
||||||
|
* Acknowledge errors
|
||||||
|
*/
|
||||||
|
router.post('/errors/acknowledge', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { ids, acknowledgedBy } = req.body;
|
||||||
|
if (!ids || !Array.isArray(ids)) {
|
||||||
|
return res.status(400).json({ error: 'ids array is required' });
|
||||||
|
}
|
||||||
|
const count = await metrics.acknowledgeErrors(ids, acknowledgedBy || 'api');
|
||||||
|
res.json({ acknowledged: count });
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Acknowledge errors error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to acknowledge errors' });
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
return router;
|
return router;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create Prometheus metrics endpoint (standalone)
|
||||||
|
*/
|
||||||
export function createPrometheusRouter(pool: Pool): Router {
|
export function createPrometheusRouter(pool: Pool): Router {
|
||||||
const router = Router();
|
const router = Router();
|
||||||
const metrics = new MetricsService(pool);
|
const metrics = new MetricsService(pool);
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
* Phase 5: Full Production Sync + Monitoring
|
* Phase 5: Full Production Sync + Monitoring
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// SyncOrchestrator moved to _deprecated (depends on hydration module)
|
export { SyncOrchestrator, type SyncStatus, type QueueDepth, type SyncRunMetrics, type OrchestratorStatus } from './sync-orchestrator';
|
||||||
export { MetricsService, ERROR_TYPES, type Metric, type MetricTimeSeries, type ErrorBucket, type ErrorType } from './metrics';
|
export { MetricsService, ERROR_TYPES, type Metric, type MetricTimeSeries, type ErrorBucket, type ErrorType } from './metrics';
|
||||||
export { DLQService, type DLQPayload, type DLQStats } from './dlq';
|
export { DLQService, type DLQPayload, type DLQStats } from './dlq';
|
||||||
export { AlertService, type SystemAlert, type AlertSummary, type AlertSeverity, type AlertStatus } from './alerts';
|
export { AlertService, type SystemAlert, type AlertSummary, type AlertSeverity, type AlertStatus } from './alerts';
|
||||||
|
|||||||
@@ -4,9 +4,8 @@
|
|||||||
* Exports all task handlers for the task worker.
|
* Exports all task handlers for the task worker.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
export { handleProductDiscovery } from './product-discovery';
|
|
||||||
export { handleProductRefresh } from './product-refresh';
|
export { handleProductRefresh } from './product-refresh';
|
||||||
|
export { handleProductDiscovery } from './product-discovery';
|
||||||
export { handleStoreDiscovery } from './store-discovery';
|
export { handleStoreDiscovery } from './store-discovery';
|
||||||
export { handleEntryPointDiscovery } from './entry-point-discovery';
|
export { handleEntryPointDiscovery } from './entry-point-discovery';
|
||||||
export { handleAnalyticsRefresh } from './analytics-refresh';
|
export { handleAnalyticsRefresh } from './analytics-refresh';
|
||||||
export { handleWhoami } from './whoami';
|
|
||||||
|
|||||||
@@ -1,80 +0,0 @@
|
|||||||
/**
|
|
||||||
* WhoAmI Handler
|
|
||||||
* Tests proxy connectivity and anti-detect by fetching public IP
|
|
||||||
* Reports: proxy IP, fingerprint info, and connection status
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { TaskContext, TaskResult } from '../task-worker';
|
|
||||||
import { execSync } from 'child_process';
|
|
||||||
|
|
||||||
export async function handleWhoami(ctx: TaskContext): Promise<TaskResult> {
|
|
||||||
const { pool, crawlRotator } = ctx;
|
|
||||||
|
|
||||||
console.log('[WhoAmI] Testing proxy and anti-detect...');
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Use the preflight check which tests proxy + anti-detect
|
|
||||||
if (crawlRotator) {
|
|
||||||
const preflight = await crawlRotator.preflight();
|
|
||||||
|
|
||||||
if (!preflight.passed) {
|
|
||||||
return {
|
|
||||||
success: false,
|
|
||||||
error: preflight.error || 'Preflight check failed',
|
|
||||||
proxyAvailable: preflight.proxyAvailable,
|
|
||||||
proxyConnected: preflight.proxyConnected,
|
|
||||||
antidetectReady: preflight.antidetectReady,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`[WhoAmI] Proxy IP: ${preflight.proxyIp}, Response: ${preflight.responseTimeMs}ms`);
|
|
||||||
console.log(`[WhoAmI] Fingerprint: ${preflight.fingerprint?.browserName}/${preflight.fingerprint?.deviceCategory}`);
|
|
||||||
|
|
||||||
return {
|
|
||||||
success: true,
|
|
||||||
proxyIp: preflight.proxyIp,
|
|
||||||
responseTimeMs: preflight.responseTimeMs,
|
|
||||||
fingerprint: preflight.fingerprint,
|
|
||||||
proxyAvailable: preflight.proxyAvailable,
|
|
||||||
proxyConnected: preflight.proxyConnected,
|
|
||||||
antidetectReady: preflight.antidetectReady,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback: Direct proxy test without CrawlRotator
|
|
||||||
const proxyResult = await pool.query(`
|
|
||||||
SELECT host, port, username, password
|
|
||||||
FROM proxies
|
|
||||||
WHERE is_active = true
|
|
||||||
LIMIT 1
|
|
||||||
`);
|
|
||||||
|
|
||||||
if (proxyResult.rows.length === 0) {
|
|
||||||
return { success: false, error: 'No active proxy configured' };
|
|
||||||
}
|
|
||||||
|
|
||||||
const p = proxyResult.rows[0];
|
|
||||||
const proxyUrl = p.username
|
|
||||||
? `http://${p.username}:${p.password}@${p.host}:${p.port}`
|
|
||||||
: `http://${p.host}:${p.port}`;
|
|
||||||
|
|
||||||
console.log(`[WhoAmI] Using proxy: ${p.host}:${p.port}`);
|
|
||||||
|
|
||||||
// Fetch IP via proxy
|
|
||||||
const cmd = `curl -s --proxy '${proxyUrl}' 'https://api.ipify.org?format=json'`;
|
|
||||||
const output = execSync(cmd, { timeout: 30000 }).toString().trim();
|
|
||||||
const data = JSON.parse(output);
|
|
||||||
|
|
||||||
console.log(`[WhoAmI] Proxy IP: ${data.ip}`);
|
|
||||||
|
|
||||||
return {
|
|
||||||
success: true,
|
|
||||||
proxyIp: data.ip,
|
|
||||||
proxyHost: p.host,
|
|
||||||
proxyPort: p.port,
|
|
||||||
};
|
|
||||||
} catch (error: any) {
|
|
||||||
console.error('[WhoAmI] Error:', error.message);
|
|
||||||
return { success: false, error: error.message };
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -17,8 +17,8 @@ export {
|
|||||||
export { TaskWorker, TaskContext, TaskResult } from './task-worker';
|
export { TaskWorker, TaskContext, TaskResult } from './task-worker';
|
||||||
|
|
||||||
export {
|
export {
|
||||||
handleProductDiscovery,
|
|
||||||
handleProductRefresh,
|
handleProductRefresh,
|
||||||
|
handleProductDiscovery,
|
||||||
handleStoreDiscovery,
|
handleStoreDiscovery,
|
||||||
handleEntryPointDiscovery,
|
handleEntryPointDiscovery,
|
||||||
handleAnalyticsRefresh,
|
handleAnalyticsRefresh,
|
||||||
|
|||||||
@@ -24,16 +24,14 @@ async function tableExists(tableName: string): Promise<boolean> {
|
|||||||
|
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: Task roles
|
// Per TASK_WORKFLOW_2024-12-10.md: Task roles
|
||||||
// payload_fetch: Hits Dutchie API, saves raw payload to filesystem
|
// payload_fetch: Hits Dutchie API, saves raw payload to filesystem
|
||||||
// product_discovery: Main product crawl handler
|
// product_refresh: Reads local payload, normalizes, upserts to DB
|
||||||
// product_refresh: Legacy role (deprecated but kept for compatibility)
|
|
||||||
export type TaskRole =
|
export type TaskRole =
|
||||||
| 'store_discovery'
|
| 'store_discovery'
|
||||||
| 'entry_point_discovery'
|
| 'entry_point_discovery'
|
||||||
| 'product_discovery'
|
| 'product_discovery'
|
||||||
| 'payload_fetch' // Fetches from API, saves to disk
|
| 'payload_fetch' // NEW: Fetches from API, saves to disk
|
||||||
| 'product_refresh' // DEPRECATED: Use product_discovery instead
|
| 'product_refresh' // CHANGED: Now reads from local payload
|
||||||
| 'analytics_refresh'
|
| 'analytics_refresh';
|
||||||
| 'whoami'; // Tests proxy + anti-detect connectivity
|
|
||||||
|
|
||||||
export type TaskStatus =
|
export type TaskStatus =
|
||||||
| 'pending'
|
| 'pending'
|
||||||
@@ -52,7 +50,6 @@ export interface WorkerTask {
|
|||||||
platform: string | null;
|
platform: string | null;
|
||||||
status: TaskStatus;
|
status: TaskStatus;
|
||||||
priority: number;
|
priority: number;
|
||||||
method: 'curl' | 'http' | null; // Transport method: curl=axios/proxy, http=Puppeteer/browser
|
|
||||||
scheduled_for: Date | null;
|
scheduled_for: Date | null;
|
||||||
worker_id: string | null;
|
worker_id: string | null;
|
||||||
claimed_at: Date | null;
|
claimed_at: Date | null;
|
||||||
@@ -154,33 +151,23 @@ class TaskService {
|
|||||||
* Claim a task atomically for a worker
|
* Claim a task atomically for a worker
|
||||||
* If role is null, claims ANY available task (role-agnostic worker)
|
* If role is null, claims ANY available task (role-agnostic worker)
|
||||||
* Returns null if task pool is paused.
|
* Returns null if task pool is paused.
|
||||||
*
|
|
||||||
* @param role - Task role to claim, or null for any task
|
|
||||||
* @param workerId - Worker ID claiming the task
|
|
||||||
* @param curlPassed - Whether worker passed curl preflight (default true for backward compat)
|
|
||||||
* @param httpPassed - Whether worker passed http/Puppeteer preflight (default false)
|
|
||||||
*/
|
*/
|
||||||
async claimTask(
|
async claimTask(role: TaskRole | null, workerId: string): Promise<WorkerTask | null> {
|
||||||
role: TaskRole | null,
|
|
||||||
workerId: string,
|
|
||||||
curlPassed: boolean = true,
|
|
||||||
httpPassed: boolean = false
|
|
||||||
): Promise<WorkerTask | null> {
|
|
||||||
// Check if task pool is paused - don't claim any tasks
|
// Check if task pool is paused - don't claim any tasks
|
||||||
if (isTaskPoolPaused()) {
|
if (isTaskPoolPaused()) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (role) {
|
if (role) {
|
||||||
// Role-specific claiming - use the SQL function with preflight capabilities
|
// Role-specific claiming - use the SQL function
|
||||||
const result = await pool.query(
|
const result = await pool.query(
|
||||||
`SELECT * FROM claim_task($1, $2, $3, $4)`,
|
`SELECT * FROM claim_task($1, $2)`,
|
||||||
[role, workerId, curlPassed, httpPassed]
|
[role, workerId]
|
||||||
);
|
);
|
||||||
return (result.rows[0] as WorkerTask) || null;
|
return (result.rows[0] as WorkerTask) || null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Role-agnostic claiming - claim ANY pending task matching worker capabilities
|
// Role-agnostic claiming - claim ANY pending task
|
||||||
const result = await pool.query(`
|
const result = await pool.query(`
|
||||||
UPDATE worker_tasks
|
UPDATE worker_tasks
|
||||||
SET
|
SET
|
||||||
@@ -191,12 +178,6 @@ class TaskService {
|
|||||||
SELECT id FROM worker_tasks
|
SELECT id FROM worker_tasks
|
||||||
WHERE status = 'pending'
|
WHERE status = 'pending'
|
||||||
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
|
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
|
||||||
-- Method compatibility: worker must have passed the required preflight
|
|
||||||
AND (
|
|
||||||
method IS NULL -- No preference, any worker can claim
|
|
||||||
OR (method = 'curl' AND $2 = TRUE)
|
|
||||||
OR (method = 'http' AND $3 = TRUE)
|
|
||||||
)
|
|
||||||
-- Exclude stores that already have an active task
|
-- Exclude stores that already have an active task
|
||||||
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
|
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
|
||||||
SELECT dispensary_id FROM worker_tasks
|
SELECT dispensary_id FROM worker_tasks
|
||||||
@@ -208,7 +189,7 @@ class TaskService {
|
|||||||
FOR UPDATE SKIP LOCKED
|
FOR UPDATE SKIP LOCKED
|
||||||
)
|
)
|
||||||
RETURNING *
|
RETURNING *
|
||||||
`, [workerId, curlPassed, httpPassed]);
|
`, [workerId]);
|
||||||
|
|
||||||
return (result.rows[0] as WorkerTask) || null;
|
return (result.rows[0] as WorkerTask) || null;
|
||||||
}
|
}
|
||||||
@@ -249,24 +230,6 @@ class TaskService {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Release a claimed task back to pending (e.g., when preflight fails)
|
|
||||||
* This allows another worker to pick it up.
|
|
||||||
*/
|
|
||||||
async releaseTask(taskId: number): Promise<void> {
|
|
||||||
await pool.query(
|
|
||||||
`UPDATE worker_tasks
|
|
||||||
SET status = 'pending',
|
|
||||||
worker_id = NULL,
|
|
||||||
claimed_at = NULL,
|
|
||||||
started_at = NULL,
|
|
||||||
updated_at = NOW()
|
|
||||||
WHERE id = $1 AND status IN ('claimed', 'running')`,
|
|
||||||
[taskId]
|
|
||||||
);
|
|
||||||
console.log(`[TaskService] Task ${taskId} released back to pending`);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Mark a task as failed, with auto-retry if under max_retries
|
* Mark a task as failed, with auto-retry if under max_retries
|
||||||
* Returns true if task was re-queued for retry, false if permanently failed
|
* Returns true if task was re-queued for retry, false if permanently failed
|
||||||
|
|||||||
@@ -51,10 +51,6 @@ import os from 'os';
|
|||||||
import { CrawlRotator } from '../services/crawl-rotator';
|
import { CrawlRotator } from '../services/crawl-rotator';
|
||||||
import { setCrawlRotator } from '../platforms/dutchie';
|
import { setCrawlRotator } from '../platforms/dutchie';
|
||||||
|
|
||||||
// Dual-transport preflight system
|
|
||||||
import { runCurlPreflight, CurlPreflightResult } from '../services/curl-preflight';
|
|
||||||
import { runPuppeteerPreflightWithRetry, PuppeteerPreflightResult } from '../services/puppeteer-preflight';
|
|
||||||
|
|
||||||
// Task handlers by role
|
// Task handlers by role
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: payload_fetch and product_refresh are now separate
|
// Per TASK_WORKFLOW_2024-12-10.md: payload_fetch and product_refresh are now separate
|
||||||
import { handlePayloadFetch } from './handlers/payload-fetch';
|
import { handlePayloadFetch } from './handlers/payload-fetch';
|
||||||
@@ -63,59 +59,16 @@ import { handleProductDiscovery } from './handlers/product-discovery';
|
|||||||
import { handleStoreDiscovery } from './handlers/store-discovery';
|
import { handleStoreDiscovery } from './handlers/store-discovery';
|
||||||
import { handleEntryPointDiscovery } from './handlers/entry-point-discovery';
|
import { handleEntryPointDiscovery } from './handlers/entry-point-discovery';
|
||||||
import { handleAnalyticsRefresh } from './handlers/analytics-refresh';
|
import { handleAnalyticsRefresh } from './handlers/analytics-refresh';
|
||||||
import { handleWhoami } from './handlers/whoami';
|
|
||||||
|
|
||||||
const POLL_INTERVAL_MS = parseInt(process.env.POLL_INTERVAL_MS || '5000');
|
const POLL_INTERVAL_MS = parseInt(process.env.POLL_INTERVAL_MS || '5000');
|
||||||
const HEARTBEAT_INTERVAL_MS = parseInt(process.env.HEARTBEAT_INTERVAL_MS || '30000');
|
const HEARTBEAT_INTERVAL_MS = parseInt(process.env.HEARTBEAT_INTERVAL_MS || '30000');
|
||||||
const API_BASE_URL = process.env.API_BASE_URL || 'http://localhost:3010';
|
const API_BASE_URL = process.env.API_BASE_URL || 'http://localhost:3010';
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// CONCURRENT TASK PROCESSING SETTINGS
|
|
||||||
// =============================================================================
|
|
||||||
// Workers can process multiple tasks simultaneously using async I/O.
|
|
||||||
// This improves throughput for I/O-bound tasks (network calls, DB queries).
|
|
||||||
//
|
|
||||||
// Resource thresholds trigger "backoff" - the worker stops claiming new tasks
|
|
||||||
// but continues processing existing ones until resources return to normal.
|
|
||||||
//
|
|
||||||
// See: docs/WORKER_TASK_ARCHITECTURE.md#concurrent-task-processing
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
// Maximum number of tasks this worker will run concurrently
|
|
||||||
// Tune based on workload: I/O-bound tasks benefit from higher concurrency
|
|
||||||
const MAX_CONCURRENT_TASKS = parseInt(process.env.MAX_CONCURRENT_TASKS || '3');
|
|
||||||
|
|
||||||
// When heap memory usage exceeds this threshold (as decimal 0.0-1.0), stop claiming new tasks
|
|
||||||
// Default 85% - gives headroom before OOM
|
|
||||||
const MEMORY_BACKOFF_THRESHOLD = parseFloat(process.env.MEMORY_BACKOFF_THRESHOLD || '0.85');
|
|
||||||
|
|
||||||
// Parse max heap size from NODE_OPTIONS (--max-old-space-size=1500)
|
|
||||||
// This is used as the denominator for memory percentage calculation
|
|
||||||
// V8's heapTotal is dynamic and stays small when idle, causing false high percentages
|
|
||||||
function getMaxHeapSizeMb(): number {
|
|
||||||
const nodeOptions = process.env.NODE_OPTIONS || '';
|
|
||||||
const match = nodeOptions.match(/--max-old-space-size=(\d+)/);
|
|
||||||
if (match) {
|
|
||||||
return parseInt(match[1], 10);
|
|
||||||
}
|
|
||||||
// Fallback: use 512MB if not specified
|
|
||||||
return 512;
|
|
||||||
}
|
|
||||||
const MAX_HEAP_SIZE_MB = getMaxHeapSizeMb();
|
|
||||||
|
|
||||||
// When CPU usage exceeds this threshold (as decimal 0.0-1.0), stop claiming new tasks
|
|
||||||
// Default 90% - allows some burst capacity
|
|
||||||
const CPU_BACKOFF_THRESHOLD = parseFloat(process.env.CPU_BACKOFF_THRESHOLD || '0.90');
|
|
||||||
|
|
||||||
// How long to wait (ms) when in backoff state before rechecking resources
|
|
||||||
const BACKOFF_DURATION_MS = parseInt(process.env.BACKOFF_DURATION_MS || '10000');
|
|
||||||
|
|
||||||
export interface TaskContext {
|
export interface TaskContext {
|
||||||
pool: Pool;
|
pool: Pool;
|
||||||
workerId: string;
|
workerId: string;
|
||||||
task: WorkerTask;
|
task: WorkerTask;
|
||||||
heartbeat: () => Promise<void>;
|
heartbeat: () => Promise<void>;
|
||||||
crawlRotator?: CrawlRotator;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface TaskResult {
|
export interface TaskResult {
|
||||||
@@ -130,38 +83,17 @@ export interface TaskResult {
|
|||||||
type TaskHandler = (ctx: TaskContext) => Promise<TaskResult>;
|
type TaskHandler = (ctx: TaskContext) => Promise<TaskResult>;
|
||||||
|
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: Handler registry
|
// Per TASK_WORKFLOW_2024-12-10.md: Handler registry
|
||||||
// payload_fetch: Fetches from Dutchie API, saves to disk
|
// payload_fetch: Fetches from Dutchie API, saves to disk, chains to product_refresh
|
||||||
// product_refresh: Reads local payload, normalizes, upserts to DB
|
// product_refresh: Reads local payload, normalizes, upserts to DB
|
||||||
// product_discovery: Main handler for product crawling
|
|
||||||
const TASK_HANDLERS: Record<TaskRole, TaskHandler> = {
|
const TASK_HANDLERS: Record<TaskRole, TaskHandler> = {
|
||||||
payload_fetch: handlePayloadFetch, // API fetch -> disk
|
payload_fetch: handlePayloadFetch, // NEW: API fetch -> disk
|
||||||
product_refresh: handleProductRefresh, // disk -> DB
|
product_refresh: handleProductRefresh, // CHANGED: disk -> DB
|
||||||
product_discovery: handleProductDiscovery,
|
product_discovery: handleProductDiscovery,
|
||||||
store_discovery: handleStoreDiscovery,
|
store_discovery: handleStoreDiscovery,
|
||||||
entry_point_discovery: handleEntryPointDiscovery,
|
entry_point_discovery: handleEntryPointDiscovery,
|
||||||
analytics_refresh: handleAnalyticsRefresh,
|
analytics_refresh: handleAnalyticsRefresh,
|
||||||
whoami: handleWhoami, // Tests proxy + anti-detect
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
|
||||||
* Resource usage stats reported to the registry and used for backoff decisions.
|
|
||||||
* These values are included in worker heartbeats and displayed in the UI.
|
|
||||||
*/
|
|
||||||
interface ResourceStats {
|
|
||||||
/** Current heap memory usage as decimal (0.0 to 1.0) */
|
|
||||||
memoryPercent: number;
|
|
||||||
/** Current heap used in MB */
|
|
||||||
memoryMb: number;
|
|
||||||
/** Total heap available in MB */
|
|
||||||
memoryTotalMb: number;
|
|
||||||
/** CPU usage percentage since last check (0 to 100) */
|
|
||||||
cpuPercent: number;
|
|
||||||
/** True if worker is currently in backoff state */
|
|
||||||
isBackingOff: boolean;
|
|
||||||
/** Reason for backoff (e.g., "Memory at 87.3% (threshold: 85%)") */
|
|
||||||
backoffReason: string | null;
|
|
||||||
}
|
|
||||||
|
|
||||||
export class TaskWorker {
|
export class TaskWorker {
|
||||||
private pool: Pool;
|
private pool: Pool;
|
||||||
private workerId: string;
|
private workerId: string;
|
||||||
@@ -170,125 +102,14 @@ export class TaskWorker {
|
|||||||
private isRunning: boolean = false;
|
private isRunning: boolean = false;
|
||||||
private heartbeatInterval: NodeJS.Timeout | null = null;
|
private heartbeatInterval: NodeJS.Timeout | null = null;
|
||||||
private registryHeartbeatInterval: NodeJS.Timeout | null = null;
|
private registryHeartbeatInterval: NodeJS.Timeout | null = null;
|
||||||
|
private currentTask: WorkerTask | null = null;
|
||||||
private crawlRotator: CrawlRotator;
|
private crawlRotator: CrawlRotator;
|
||||||
|
|
||||||
// ==========================================================================
|
|
||||||
// CONCURRENT TASK TRACKING
|
|
||||||
// ==========================================================================
|
|
||||||
// activeTasks: Map of task ID -> task object for all currently running tasks
|
|
||||||
// taskPromises: Map of task ID -> Promise for cleanup when task completes
|
|
||||||
// maxConcurrentTasks: How many tasks this worker will run in parallel
|
|
||||||
// ==========================================================================
|
|
||||||
private activeTasks: Map<number, WorkerTask> = new Map();
|
|
||||||
private taskPromises: Map<number, Promise<void>> = new Map();
|
|
||||||
private maxConcurrentTasks: number = MAX_CONCURRENT_TASKS;
|
|
||||||
|
|
||||||
// ==========================================================================
|
|
||||||
// RESOURCE MONITORING FOR BACKOFF
|
|
||||||
// ==========================================================================
|
|
||||||
// CPU tracking uses differential measurement - we track last values and
|
|
||||||
// calculate percentage based on elapsed time since last check.
|
|
||||||
// ==========================================================================
|
|
||||||
private lastCpuUsage: { user: number; system: number } = { user: 0, system: 0 };
|
|
||||||
private lastCpuCheck: number = Date.now();
|
|
||||||
private isBackingOff: boolean = false;
|
|
||||||
private backoffReason: string | null = null;
|
|
||||||
|
|
||||||
// ==========================================================================
|
|
||||||
// DUAL-TRANSPORT PREFLIGHT STATUS
|
|
||||||
// ==========================================================================
|
|
||||||
// Workers run BOTH preflights on startup:
|
|
||||||
// - curl: axios/proxy transport - fast, for simple API calls
|
|
||||||
// - http: Puppeteer/browser transport - anti-detect, for Dutchie GraphQL
|
|
||||||
//
|
|
||||||
// Task claiming checks method compatibility - worker must have passed
|
|
||||||
// the preflight for the task's required method.
|
|
||||||
// ==========================================================================
|
|
||||||
private preflightCurlPassed: boolean = false;
|
|
||||||
private preflightHttpPassed: boolean = false;
|
|
||||||
private preflightCurlResult: CurlPreflightResult | null = null;
|
|
||||||
private preflightHttpResult: PuppeteerPreflightResult | null = null;
|
|
||||||
|
|
||||||
constructor(role: TaskRole | null = null, workerId?: string) {
|
constructor(role: TaskRole | null = null, workerId?: string) {
|
||||||
this.pool = getPool();
|
this.pool = getPool();
|
||||||
this.role = role;
|
this.role = role;
|
||||||
this.workerId = workerId || `worker-${uuidv4().slice(0, 8)}`;
|
this.workerId = workerId || `worker-${uuidv4().slice(0, 8)}`;
|
||||||
this.crawlRotator = new CrawlRotator(this.pool);
|
this.crawlRotator = new CrawlRotator(this.pool);
|
||||||
|
|
||||||
// Initialize CPU tracking
|
|
||||||
const cpuUsage = process.cpuUsage();
|
|
||||||
this.lastCpuUsage = { user: cpuUsage.user, system: cpuUsage.system };
|
|
||||||
this.lastCpuCheck = Date.now();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get current resource usage
|
|
||||||
* Memory percentage is calculated against MAX_HEAP_SIZE_MB (from --max-old-space-size)
|
|
||||||
* NOT against V8's dynamic heapTotal which stays small when idle
|
|
||||||
*/
|
|
||||||
private getResourceStats(): ResourceStats {
|
|
||||||
const memUsage = process.memoryUsage();
|
|
||||||
const heapUsedMb = memUsage.heapUsed / 1024 / 1024;
|
|
||||||
// Use MAX_HEAP_SIZE_MB as ceiling, not dynamic heapTotal
|
|
||||||
// V8's heapTotal stays small when idle (e.g., 36MB) causing false 95%+ readings
|
|
||||||
// With --max-old-space-size=1500, we should calculate against 1500MB
|
|
||||||
const memoryPercent = heapUsedMb / MAX_HEAP_SIZE_MB;
|
|
||||||
|
|
||||||
// Calculate CPU usage since last check
|
|
||||||
const cpuUsage = process.cpuUsage();
|
|
||||||
const now = Date.now();
|
|
||||||
const elapsed = now - this.lastCpuCheck;
|
|
||||||
|
|
||||||
let cpuPercent = 0;
|
|
||||||
if (elapsed > 0) {
|
|
||||||
const userDiff = (cpuUsage.user - this.lastCpuUsage.user) / 1000; // microseconds to ms
|
|
||||||
const systemDiff = (cpuUsage.system - this.lastCpuUsage.system) / 1000;
|
|
||||||
cpuPercent = ((userDiff + systemDiff) / elapsed) * 100;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update last values
|
|
||||||
this.lastCpuUsage = { user: cpuUsage.user, system: cpuUsage.system };
|
|
||||||
this.lastCpuCheck = now;
|
|
||||||
|
|
||||||
return {
|
|
||||||
memoryPercent,
|
|
||||||
memoryMb: Math.round(heapUsedMb),
|
|
||||||
memoryTotalMb: MAX_HEAP_SIZE_MB, // Use max-old-space-size, not dynamic heapTotal
|
|
||||||
cpuPercent: Math.min(100, cpuPercent), // Cap at 100%
|
|
||||||
isBackingOff: this.isBackingOff,
|
|
||||||
backoffReason: this.backoffReason,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if we should back off from taking new tasks
|
|
||||||
*/
|
|
||||||
private shouldBackOff(): { backoff: boolean; reason: string | null } {
|
|
||||||
const stats = this.getResourceStats();
|
|
||||||
|
|
||||||
if (stats.memoryPercent > MEMORY_BACKOFF_THRESHOLD) {
|
|
||||||
return { backoff: true, reason: `Memory at ${(stats.memoryPercent * 100).toFixed(1)}% (threshold: ${MEMORY_BACKOFF_THRESHOLD * 100}%)` };
|
|
||||||
}
|
|
||||||
|
|
||||||
if (stats.cpuPercent > CPU_BACKOFF_THRESHOLD * 100) {
|
|
||||||
return { backoff: true, reason: `CPU at ${stats.cpuPercent.toFixed(1)}% (threshold: ${CPU_BACKOFF_THRESHOLD * 100}%)` };
|
|
||||||
}
|
|
||||||
|
|
||||||
return { backoff: false, reason: null };
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get count of currently running tasks
|
|
||||||
*/
|
|
||||||
get activeTaskCount(): number {
|
|
||||||
return this.activeTasks.size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if we can accept more tasks
|
|
||||||
*/
|
|
||||||
private canAcceptMoreTasks(): boolean {
|
|
||||||
return this.activeTasks.size < this.maxConcurrentTasks;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -371,99 +192,6 @@ export class TaskWorker {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Run dual-transport preflights on startup
|
|
||||||
* Tests both curl (axios/proxy) and http (Puppeteer/browser) transport methods.
|
|
||||||
* Results are reported to worker_registry and used for task claiming.
|
|
||||||
*
|
|
||||||
* NOTE: All current tasks require 'http' method, so http preflight must pass
|
|
||||||
* for the worker to claim any tasks. Curl preflight is for future use.
|
|
||||||
*/
|
|
||||||
private async runDualPreflights(): Promise<void> {
|
|
||||||
console.log(`[TaskWorker] Running dual-transport preflights...`);
|
|
||||||
|
|
||||||
// Run both preflights in parallel for efficiency
|
|
||||||
const [curlResult, httpResult] = await Promise.all([
|
|
||||||
runCurlPreflight(this.crawlRotator).catch((err): CurlPreflightResult => ({
|
|
||||||
method: 'curl',
|
|
||||||
passed: false,
|
|
||||||
proxyAvailable: false,
|
|
||||||
proxyConnected: false,
|
|
||||||
antidetectReady: false,
|
|
||||||
proxyIp: null,
|
|
||||||
fingerprint: null,
|
|
||||||
error: `Preflight error: ${err.message}`,
|
|
||||||
responseTimeMs: null,
|
|
||||||
})),
|
|
||||||
runPuppeteerPreflightWithRetry(this.crawlRotator, 1).catch((err): PuppeteerPreflightResult => ({
|
|
||||||
method: 'http',
|
|
||||||
passed: false,
|
|
||||||
proxyAvailable: false,
|
|
||||||
proxyConnected: false,
|
|
||||||
antidetectReady: false,
|
|
||||||
proxyIp: null,
|
|
||||||
fingerprint: null,
|
|
||||||
error: `Preflight error: ${err.message}`,
|
|
||||||
responseTimeMs: null,
|
|
||||||
productsReturned: 0,
|
|
||||||
})),
|
|
||||||
]);
|
|
||||||
|
|
||||||
// Store results
|
|
||||||
this.preflightCurlResult = curlResult;
|
|
||||||
this.preflightHttpResult = httpResult;
|
|
||||||
this.preflightCurlPassed = curlResult.passed;
|
|
||||||
this.preflightHttpPassed = httpResult.passed;
|
|
||||||
|
|
||||||
// Log results
|
|
||||||
console.log(`[TaskWorker] CURL preflight: ${curlResult.passed ? 'PASSED' : 'FAILED'}${curlResult.error ? ` - ${curlResult.error}` : ''}`);
|
|
||||||
console.log(`[TaskWorker] HTTP preflight: ${httpResult.passed ? 'PASSED' : 'FAILED'}${httpResult.error ? ` - ${httpResult.error}` : ''}`);
|
|
||||||
|
|
||||||
if (httpResult.passed && httpResult.productsReturned) {
|
|
||||||
console.log(`[TaskWorker] HTTP preflight returned ${httpResult.productsReturned} products from test store`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Report to worker_registry via API
|
|
||||||
await this.reportPreflightStatus();
|
|
||||||
|
|
||||||
// Since all tasks require 'http', warn if http preflight failed
|
|
||||||
if (!this.preflightHttpPassed) {
|
|
||||||
console.warn(`[TaskWorker] WARNING: HTTP preflight failed - this worker cannot claim any tasks!`);
|
|
||||||
console.warn(`[TaskWorker] Error: ${httpResult.error}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Report preflight status to worker_registry
|
|
||||||
*/
|
|
||||||
private async reportPreflightStatus(): Promise<void> {
|
|
||||||
try {
|
|
||||||
// Update worker_registry directly via SQL (more reliable than API)
|
|
||||||
await this.pool.query(`
|
|
||||||
SELECT update_worker_preflight($1, 'curl', $2, $3, $4)
|
|
||||||
`, [
|
|
||||||
this.workerId,
|
|
||||||
this.preflightCurlPassed ? 'passed' : 'failed',
|
|
||||||
this.preflightCurlResult?.responseTimeMs || null,
|
|
||||||
this.preflightCurlResult?.error || null,
|
|
||||||
]);
|
|
||||||
|
|
||||||
await this.pool.query(`
|
|
||||||
SELECT update_worker_preflight($1, 'http', $2, $3, $4)
|
|
||||||
`, [
|
|
||||||
this.workerId,
|
|
||||||
this.preflightHttpPassed ? 'passed' : 'failed',
|
|
||||||
this.preflightHttpResult?.responseTimeMs || null,
|
|
||||||
this.preflightHttpResult?.error || null,
|
|
||||||
]);
|
|
||||||
|
|
||||||
console.log(`[TaskWorker] Preflight status reported to worker_registry`);
|
|
||||||
} catch (err: any) {
|
|
||||||
// Non-fatal - worker can still function
|
|
||||||
console.warn(`[TaskWorker] Could not report preflight status: ${err.message}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Register worker with the registry (get friendly name)
|
* Register worker with the registry (get friendly name)
|
||||||
*/
|
*/
|
||||||
@@ -524,32 +252,21 @@ export class TaskWorker {
|
|||||||
const memUsage = process.memoryUsage();
|
const memUsage = process.memoryUsage();
|
||||||
const cpuUsage = process.cpuUsage();
|
const cpuUsage = process.cpuUsage();
|
||||||
const proxyLocation = this.crawlRotator.getProxyLocation();
|
const proxyLocation = this.crawlRotator.getProxyLocation();
|
||||||
const resourceStats = this.getResourceStats();
|
|
||||||
|
|
||||||
// Get array of active task IDs
|
|
||||||
const activeTaskIds = Array.from(this.activeTasks.keys());
|
|
||||||
|
|
||||||
await fetch(`${API_BASE_URL}/api/worker-registry/heartbeat`, {
|
await fetch(`${API_BASE_URL}/api/worker-registry/heartbeat`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
worker_id: this.workerId,
|
worker_id: this.workerId,
|
||||||
current_task_id: activeTaskIds[0] || null, // Primary task for backwards compat
|
current_task_id: this.currentTask?.id || null,
|
||||||
current_task_ids: activeTaskIds, // All active tasks
|
status: this.currentTask ? 'active' : 'idle',
|
||||||
active_task_count: this.activeTasks.size,
|
|
||||||
max_concurrent_tasks: this.maxConcurrentTasks,
|
|
||||||
status: this.activeTasks.size > 0 ? 'active' : 'idle',
|
|
||||||
resources: {
|
resources: {
|
||||||
memory_mb: Math.round(memUsage.heapUsed / 1024 / 1024),
|
memory_mb: Math.round(memUsage.heapUsed / 1024 / 1024),
|
||||||
memory_total_mb: Math.round(memUsage.heapTotal / 1024 / 1024),
|
memory_total_mb: Math.round(memUsage.heapTotal / 1024 / 1024),
|
||||||
memory_rss_mb: Math.round(memUsage.rss / 1024 / 1024),
|
memory_rss_mb: Math.round(memUsage.rss / 1024 / 1024),
|
||||||
memory_percent: Math.round(resourceStats.memoryPercent * 100),
|
|
||||||
cpu_user_ms: Math.round(cpuUsage.user / 1000),
|
cpu_user_ms: Math.round(cpuUsage.user / 1000),
|
||||||
cpu_system_ms: Math.round(cpuUsage.system / 1000),
|
cpu_system_ms: Math.round(cpuUsage.system / 1000),
|
||||||
cpu_percent: Math.round(resourceStats.cpuPercent),
|
|
||||||
proxy_location: proxyLocation,
|
proxy_location: proxyLocation,
|
||||||
is_backing_off: this.isBackingOff,
|
|
||||||
backoff_reason: this.backoffReason,
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
@@ -607,119 +324,24 @@ export class TaskWorker {
|
|||||||
// Register with the API to get a friendly name
|
// Register with the API to get a friendly name
|
||||||
await this.register();
|
await this.register();
|
||||||
|
|
||||||
// Run dual-transport preflights
|
|
||||||
await this.runDualPreflights();
|
|
||||||
|
|
||||||
// Start registry heartbeat
|
// Start registry heartbeat
|
||||||
this.startRegistryHeartbeat();
|
this.startRegistryHeartbeat();
|
||||||
|
|
||||||
const roleMsg = this.role ? `for role: ${this.role}` : '(role-agnostic - any task)';
|
const roleMsg = this.role ? `for role: ${this.role}` : '(role-agnostic - any task)';
|
||||||
const preflightMsg = `curl=${this.preflightCurlPassed ? '✓' : '✗'} http=${this.preflightHttpPassed ? '✓' : '✗'}`;
|
console.log(`[TaskWorker] ${this.friendlyName} starting ${roleMsg}`);
|
||||||
console.log(`[TaskWorker] ${this.friendlyName} starting ${roleMsg} (${preflightMsg}, max ${this.maxConcurrentTasks} concurrent tasks)`);
|
|
||||||
|
|
||||||
while (this.isRunning) {
|
while (this.isRunning) {
|
||||||
try {
|
try {
|
||||||
await this.mainLoop();
|
await this.processNextTask();
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
console.error(`[TaskWorker] Loop error:`, error.message);
|
console.error(`[TaskWorker] Loop error:`, error.message);
|
||||||
await this.sleep(POLL_INTERVAL_MS);
|
await this.sleep(POLL_INTERVAL_MS);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for any remaining tasks to complete
|
|
||||||
if (this.taskPromises.size > 0) {
|
|
||||||
console.log(`[TaskWorker] Waiting for ${this.taskPromises.size} active tasks to complete...`);
|
|
||||||
await Promise.allSettled(this.taskPromises.values());
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`[TaskWorker] Worker ${this.workerId} stopped`);
|
console.log(`[TaskWorker] Worker ${this.workerId} stopped`);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Main loop - tries to fill up to maxConcurrentTasks
|
|
||||||
*/
|
|
||||||
private async mainLoop(): Promise<void> {
|
|
||||||
// Check resource usage and backoff if needed
|
|
||||||
const { backoff, reason } = this.shouldBackOff();
|
|
||||||
if (backoff) {
|
|
||||||
if (!this.isBackingOff) {
|
|
||||||
console.log(`[TaskWorker] ${this.friendlyName} backing off: ${reason}`);
|
|
||||||
}
|
|
||||||
this.isBackingOff = true;
|
|
||||||
this.backoffReason = reason;
|
|
||||||
await this.sleep(BACKOFF_DURATION_MS);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clear backoff state
|
|
||||||
if (this.isBackingOff) {
|
|
||||||
console.log(`[TaskWorker] ${this.friendlyName} resuming normal operation`);
|
|
||||||
this.isBackingOff = false;
|
|
||||||
this.backoffReason = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for decommission signal
|
|
||||||
const shouldDecommission = await this.checkDecommission();
|
|
||||||
if (shouldDecommission) {
|
|
||||||
console.log(`[TaskWorker] ${this.friendlyName} received decommission signal - waiting for ${this.activeTasks.size} tasks to complete`);
|
|
||||||
// Stop accepting new tasks, wait for current to finish
|
|
||||||
this.isRunning = false;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to claim more tasks if we have capacity
|
|
||||||
if (this.canAcceptMoreTasks()) {
|
|
||||||
// Pass preflight capabilities to only claim compatible tasks
|
|
||||||
const task = await taskService.claimTask(
|
|
||||||
this.role,
|
|
||||||
this.workerId,
|
|
||||||
this.preflightCurlPassed,
|
|
||||||
this.preflightHttpPassed
|
|
||||||
);
|
|
||||||
|
|
||||||
if (task) {
|
|
||||||
console.log(`[TaskWorker] ${this.friendlyName} claimed task ${task.id} (${task.role}) [${this.activeTasks.size + 1}/${this.maxConcurrentTasks}]`);
|
|
||||||
|
|
||||||
// =================================================================
|
|
||||||
// PREFLIGHT CHECK - CRITICAL: Worker MUST pass before task execution
|
|
||||||
// Verifies: 1) Proxy available 2) Proxy connected 3) Anti-detect ready
|
|
||||||
// =================================================================
|
|
||||||
const preflight = await this.crawlRotator.preflight();
|
|
||||||
if (!preflight.passed) {
|
|
||||||
console.log(`[TaskWorker] ${this.friendlyName} PREFLIGHT FAILED for task ${task.id}: ${preflight.error}`);
|
|
||||||
console.log(`[TaskWorker] Releasing task ${task.id} back to pending - worker cannot proceed without proxy/anti-detect`);
|
|
||||||
|
|
||||||
// Release task back to pending so another worker can pick it up
|
|
||||||
await taskService.releaseTask(task.id);
|
|
||||||
|
|
||||||
// Wait before trying again - give proxies time to recover
|
|
||||||
await this.sleep(30000); // 30 second wait on preflight failure
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`[TaskWorker] ${this.friendlyName} preflight PASSED for task ${task.id} (proxy: ${preflight.proxyIp}, ${preflight.responseTimeMs}ms)`);
|
|
||||||
|
|
||||||
this.activeTasks.set(task.id, task);
|
|
||||||
|
|
||||||
// Start task in background (don't await)
|
|
||||||
const taskPromise = this.executeTask(task);
|
|
||||||
this.taskPromises.set(task.id, taskPromise);
|
|
||||||
|
|
||||||
// Clean up when done
|
|
||||||
taskPromise.finally(() => {
|
|
||||||
this.activeTasks.delete(task.id);
|
|
||||||
this.taskPromises.delete(task.id);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Immediately try to claim more tasks (don't wait for poll interval)
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// No task claimed or at capacity - wait before next poll
|
|
||||||
await this.sleep(POLL_INTERVAL_MS);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stop the worker
|
* Stop the worker
|
||||||
*/
|
*/
|
||||||
@@ -732,10 +354,23 @@ export class TaskWorker {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Execute a single task (runs concurrently with other tasks)
|
* Process the next available task
|
||||||
*/
|
*/
|
||||||
private async executeTask(task: WorkerTask): Promise<void> {
|
private async processNextTask(): Promise<void> {
|
||||||
console.log(`[TaskWorker] ${this.friendlyName} starting task ${task.id} (${task.role}) for dispensary ${task.dispensary_id || 'N/A'}`);
|
// Try to claim a task
|
||||||
|
const task = await taskService.claimTask(this.role, this.workerId);
|
||||||
|
|
||||||
|
if (!task) {
|
||||||
|
// No tasks available, wait and retry
|
||||||
|
await this.sleep(POLL_INTERVAL_MS);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.currentTask = task;
|
||||||
|
console.log(`[TaskWorker] Claimed task ${task.id} (${task.role}) for dispensary ${task.dispensary_id || 'N/A'}`);
|
||||||
|
|
||||||
|
// Start heartbeat
|
||||||
|
this.startHeartbeat(task.id);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Mark as running
|
// Mark as running
|
||||||
@@ -755,7 +390,6 @@ export class TaskWorker {
|
|||||||
heartbeat: async () => {
|
heartbeat: async () => {
|
||||||
await taskService.heartbeat(task.id);
|
await taskService.heartbeat(task.id);
|
||||||
},
|
},
|
||||||
crawlRotator: this.crawlRotator,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Execute the task
|
// Execute the task
|
||||||
@@ -765,7 +399,7 @@ export class TaskWorker {
|
|||||||
// Mark as completed
|
// Mark as completed
|
||||||
await taskService.completeTask(task.id, result);
|
await taskService.completeTask(task.id, result);
|
||||||
await this.reportTaskCompletion(true);
|
await this.reportTaskCompletion(true);
|
||||||
console.log(`[TaskWorker] ${this.friendlyName} completed task ${task.id} [${this.activeTasks.size}/${this.maxConcurrentTasks} active]`);
|
console.log(`[TaskWorker] ${this.friendlyName} completed task ${task.id}`);
|
||||||
|
|
||||||
// Chain next task if applicable
|
// Chain next task if applicable
|
||||||
const chainedTask = await taskService.chainNextTask({
|
const chainedTask = await taskService.chainNextTask({
|
||||||
@@ -787,35 +421,9 @@ export class TaskWorker {
|
|||||||
await taskService.failTask(task.id, error.message);
|
await taskService.failTask(task.id, error.message);
|
||||||
await this.reportTaskCompletion(false);
|
await this.reportTaskCompletion(false);
|
||||||
console.error(`[TaskWorker] ${this.friendlyName} task ${task.id} error:`, error.message);
|
console.error(`[TaskWorker] ${this.friendlyName} task ${task.id} error:`, error.message);
|
||||||
}
|
} finally {
|
||||||
// Note: cleanup (removing from activeTasks) is handled in mainLoop's finally block
|
this.stopHeartbeat();
|
||||||
}
|
this.currentTask = null;
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if this worker has been flagged for decommission
|
|
||||||
* Returns true if worker should stop after current task
|
|
||||||
*/
|
|
||||||
private async checkDecommission(): Promise<boolean> {
|
|
||||||
try {
|
|
||||||
// Check worker_registry for decommission flag
|
|
||||||
const result = await this.pool.query(
|
|
||||||
`SELECT decommission_requested, decommission_reason
|
|
||||||
FROM worker_registry
|
|
||||||
WHERE worker_id = $1`,
|
|
||||||
[this.workerId]
|
|
||||||
);
|
|
||||||
|
|
||||||
if (result.rows.length > 0 && result.rows[0].decommission_requested) {
|
|
||||||
const reason = result.rows[0].decommission_reason || 'No reason provided';
|
|
||||||
console.log(`[TaskWorker] Decommission requested: ${reason}`);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
} catch (error: any) {
|
|
||||||
// If we can't check, continue running
|
|
||||||
console.warn(`[TaskWorker] Could not check decommission status: ${error.message}`);
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -852,29 +460,12 @@ export class TaskWorker {
|
|||||||
/**
|
/**
|
||||||
* Get worker info
|
* Get worker info
|
||||||
*/
|
*/
|
||||||
getInfo(): {
|
getInfo(): { workerId: string; role: TaskRole | null; isRunning: boolean; currentTaskId: number | null } {
|
||||||
workerId: string;
|
|
||||||
role: TaskRole | null;
|
|
||||||
isRunning: boolean;
|
|
||||||
activeTaskIds: number[];
|
|
||||||
activeTaskCount: number;
|
|
||||||
maxConcurrentTasks: number;
|
|
||||||
isBackingOff: boolean;
|
|
||||||
backoffReason: string | null;
|
|
||||||
preflightCurlPassed: boolean;
|
|
||||||
preflightHttpPassed: boolean;
|
|
||||||
} {
|
|
||||||
return {
|
return {
|
||||||
workerId: this.workerId,
|
workerId: this.workerId,
|
||||||
role: this.role,
|
role: this.role,
|
||||||
isRunning: this.isRunning,
|
isRunning: this.isRunning,
|
||||||
activeTaskIds: Array.from(this.activeTasks.keys()),
|
currentTaskId: this.currentTask?.id || null,
|
||||||
activeTaskCount: this.activeTasks.size,
|
|
||||||
maxConcurrentTasks: this.maxConcurrentTasks,
|
|
||||||
isBackingOff: this.isBackingOff,
|
|
||||||
backoffReason: this.backoffReason,
|
|
||||||
preflightCurlPassed: this.preflightCurlPassed,
|
|
||||||
preflightHttpPassed: this.preflightHttpPassed,
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -891,8 +482,8 @@ async function main(): Promise<void> {
|
|||||||
'store_discovery',
|
'store_discovery',
|
||||||
'entry_point_discovery',
|
'entry_point_discovery',
|
||||||
'product_discovery',
|
'product_discovery',
|
||||||
'payload_fetch', // Fetches from API, saves to disk
|
'payload_fetch', // NEW: Fetches from API, saves to disk
|
||||||
'product_refresh', // Reads from disk, processes to DB
|
'product_refresh', // CHANGED: Reads from disk, processes to DB
|
||||||
'analytics_refresh',
|
'analytics_refresh',
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|||||||
@@ -1,180 +0,0 @@
|
|||||||
/**
|
|
||||||
* Stealth Browser Payload Capture - Direct GraphQL Injection
|
|
||||||
*
|
|
||||||
* Uses the browser session to make GraphQL requests that look organic.
|
|
||||||
* Adds proper headers matching what Dutchie's frontend sends.
|
|
||||||
*/
|
|
||||||
|
|
||||||
const puppeteer = require('puppeteer-extra');
|
|
||||||
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
|
|
||||||
const fs = require('fs');
|
|
||||||
|
|
||||||
puppeteer.use(StealthPlugin());
|
|
||||||
|
|
||||||
async function capturePayload(config) {
|
|
||||||
const {
|
|
||||||
dispensaryId = null,
|
|
||||||
platformId,
|
|
||||||
cName,
|
|
||||||
outputPath = `/tmp/payload_${cName}_${Date.now()}.json`,
|
|
||||||
} = config;
|
|
||||||
|
|
||||||
const browser = await puppeteer.launch({
|
|
||||||
headless: 'new',
|
|
||||||
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
|
||||||
});
|
|
||||||
|
|
||||||
const page = await browser.newPage();
|
|
||||||
|
|
||||||
// Establish session by visiting the embedded menu
|
|
||||||
const embedUrl = `https://dutchie.com/embedded-menu/${cName}?menuType=rec`;
|
|
||||||
console.log(`[Capture] Establishing session at ${embedUrl}...`);
|
|
||||||
|
|
||||||
await page.goto(embedUrl, {
|
|
||||||
waitUntil: 'networkidle2',
|
|
||||||
timeout: 60000
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log('[Capture] Session established, fetching ALL products...');
|
|
||||||
|
|
||||||
// Fetch all products using GET requests with proper headers
|
|
||||||
const result = await page.evaluate(async (platformId, cName) => {
|
|
||||||
const allProducts = [];
|
|
||||||
const logs = [];
|
|
||||||
let pageNum = 0;
|
|
||||||
const perPage = 100;
|
|
||||||
let totalCount = 0;
|
|
||||||
const sessionId = 'browser-session-' + Date.now();
|
|
||||||
|
|
||||||
try {
|
|
||||||
while (pageNum < 30) { // Max 30 pages = 3000 products
|
|
||||||
const variables = {
|
|
||||||
includeEnterpriseSpecials: false,
|
|
||||||
productsFilter: {
|
|
||||||
dispensaryId: platformId,
|
|
||||||
pricingType: 'rec',
|
|
||||||
Status: 'Active', // 'Active' for in-stock products per CLAUDE.md
|
|
||||||
types: [],
|
|
||||||
useCache: true,
|
|
||||||
isDefaultSort: true,
|
|
||||||
sortBy: 'popularSortIdx',
|
|
||||||
sortDirection: 1,
|
|
||||||
bypassOnlineThresholds: true,
|
|
||||||
isKioskMenu: false,
|
|
||||||
removeProductsBelowOptionThresholds: false,
|
|
||||||
},
|
|
||||||
page: pageNum,
|
|
||||||
perPage: perPage,
|
|
||||||
};
|
|
||||||
|
|
||||||
const extensions = {
|
|
||||||
persistedQuery: {
|
|
||||||
version: 1,
|
|
||||||
sha256Hash: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0'
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Build GET URL like the browser does
|
|
||||||
const qs = new URLSearchParams({
|
|
||||||
operationName: 'FilteredProducts',
|
|
||||||
variables: JSON.stringify(variables),
|
|
||||||
extensions: JSON.stringify(extensions)
|
|
||||||
});
|
|
||||||
const url = `https://dutchie.com/api-3/graphql?${qs.toString()}`;
|
|
||||||
|
|
||||||
const response = await fetch(url, {
|
|
||||||
method: 'GET',
|
|
||||||
headers: {
|
|
||||||
'Accept': 'application/json',
|
|
||||||
'content-type': 'application/json',
|
|
||||||
'x-dutchie-session': sessionId,
|
|
||||||
'apollographql-client-name': 'Marketplace (production)',
|
|
||||||
},
|
|
||||||
credentials: 'include'
|
|
||||||
});
|
|
||||||
|
|
||||||
logs.push(`Page ${pageNum}: HTTP ${response.status}`);
|
|
||||||
|
|
||||||
if (!response.ok) {
|
|
||||||
const text = await response.text();
|
|
||||||
logs.push(`HTTP error: ${response.status} - ${text.slice(0, 200)}`);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
const json = await response.json();
|
|
||||||
|
|
||||||
if (json.errors) {
|
|
||||||
logs.push(`GraphQL error: ${JSON.stringify(json.errors).slice(0, 200)}`);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
const data = json?.data?.filteredProducts;
|
|
||||||
if (!data || !data.products) {
|
|
||||||
logs.push('No products in response');
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
const products = data.products;
|
|
||||||
allProducts.push(...products);
|
|
||||||
|
|
||||||
if (pageNum === 0) {
|
|
||||||
totalCount = data.queryInfo?.totalCount || 0;
|
|
||||||
logs.push(`Total reported: ${totalCount}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
logs.push(`Got ${products.length} products (total: ${allProducts.length}/${totalCount})`);
|
|
||||||
|
|
||||||
if (allProducts.length >= totalCount || products.length < perPage) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
pageNum++;
|
|
||||||
|
|
||||||
// Small delay between pages to be polite
|
|
||||||
await new Promise(r => setTimeout(r, 200));
|
|
||||||
}
|
|
||||||
} catch (err) {
|
|
||||||
logs.push(`Error: ${err.message}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
return { products: allProducts, totalCount, logs };
|
|
||||||
}, platformId, cName);
|
|
||||||
|
|
||||||
await browser.close();
|
|
||||||
|
|
||||||
// Print logs from browser context
|
|
||||||
result.logs.forEach(log => console.log(`[Browser] ${log}`));
|
|
||||||
|
|
||||||
console.log(`[Capture] Got ${result.products.length} products (API reported ${result.totalCount})`);
|
|
||||||
|
|
||||||
const payload = {
|
|
||||||
dispensaryId: dispensaryId,
|
|
||||||
platformId: platformId,
|
|
||||||
cName,
|
|
||||||
fetchedAt: new Date().toISOString(),
|
|
||||||
productCount: result.products.length,
|
|
||||||
products: result.products,
|
|
||||||
};
|
|
||||||
|
|
||||||
fs.writeFileSync(outputPath, JSON.stringify(payload, null, 2));
|
|
||||||
|
|
||||||
console.log(`\n=== Capture Complete ===`);
|
|
||||||
console.log(`Total products: ${result.products.length}`);
|
|
||||||
console.log(`Saved to: ${outputPath}`);
|
|
||||||
console.log(`File size: ${(fs.statSync(outputPath).size / 1024).toFixed(1)} KB`);
|
|
||||||
|
|
||||||
return payload;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Run
|
|
||||||
(async () => {
|
|
||||||
const payload = await capturePayload({
|
|
||||||
cName: 'AZ-Deeply-Rooted',
|
|
||||||
platformId: '6405ef617056e8014d79101b',
|
|
||||||
});
|
|
||||||
|
|
||||||
if (payload.products.length > 0) {
|
|
||||||
const sample = payload.products[0];
|
|
||||||
console.log(`\nSample: ${sample.Name || sample.name} - ${sample.brand?.name || sample.brandName}`);
|
|
||||||
}
|
|
||||||
})().catch(console.error);
|
|
||||||
@@ -14,5 +14,5 @@
|
|||||||
"allowSyntheticDefaultImports": true
|
"allowSyntheticDefaultImports": true
|
||||||
},
|
},
|
||||||
"include": ["src/**/*"],
|
"include": ["src/**/*"],
|
||||||
"exclude": ["node_modules", "dist", "src/**/*.test.ts", "src/**/__tests__/**", "src/_deprecated/**"]
|
"exclude": ["node_modules", "dist", "src/**/*.test.ts", "src/**/__tests__/**"]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8" />
|
<meta charset="UTF-8" />
|
||||||
<link rel="icon" type="image/svg+xml" href="/favicon.svg" />
|
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||||
<title>CannaIQ - Cannabis Menu Intelligence Platform</title>
|
<title>CannaIQ - Cannabis Menu Intelligence Platform</title>
|
||||||
<meta name="description" content="CannaIQ provides real-time cannabis dispensary menu data, product tracking, and analytics for dispensaries across Arizona." />
|
<meta name="description" content="CannaIQ provides real-time cannabis dispensary menu data, product tracking, and analytics for dispensaries across Arizona." />
|
||||||
|
|||||||
@@ -1,5 +0,0 @@
|
|||||||
<svg viewBox="0 0 32 32" xmlns="http://www.w3.org/2000/svg">
|
|
||||||
<rect width="32" height="32" rx="6" fill="#059669"/>
|
|
||||||
<path d="M16 6C12.5 6 9.5 7.5 7.5 10L16 16L24.5 10C22.5 7.5 19.5 6 16 6Z" fill="white"/>
|
|
||||||
<path d="M7.5 10C6 12 5 14.5 5 17C5 22.5 10 26 16 26C22 26 27 22.5 27 17C27 14.5 26 12 24.5 10L16 16L7.5 10Z" fill="white" fill-opacity="0.7"/>
|
|
||||||
</svg>
|
|
||||||
|
Before Width: | Height: | Size: 360 B |
@@ -47,6 +47,7 @@ import CrossStateCompare from './pages/CrossStateCompare';
|
|||||||
import StateDetail from './pages/StateDetail';
|
import StateDetail from './pages/StateDetail';
|
||||||
import { Discovery } from './pages/Discovery';
|
import { Discovery } from './pages/Discovery';
|
||||||
import { WorkersDashboard } from './pages/WorkersDashboard';
|
import { WorkersDashboard } from './pages/WorkersDashboard';
|
||||||
|
import { JobQueue } from './pages/JobQueue';
|
||||||
import TasksDashboard from './pages/TasksDashboard';
|
import TasksDashboard from './pages/TasksDashboard';
|
||||||
import { ScraperOverviewDashboard } from './pages/ScraperOverviewDashboard';
|
import { ScraperOverviewDashboard } from './pages/ScraperOverviewDashboard';
|
||||||
import { SeoOrchestrator } from './pages/admin/seo/SeoOrchestrator';
|
import { SeoOrchestrator } from './pages/admin/seo/SeoOrchestrator';
|
||||||
@@ -124,6 +125,8 @@ export default function App() {
|
|||||||
<Route path="/discovery" element={<PrivateRoute><Discovery /></PrivateRoute>} />
|
<Route path="/discovery" element={<PrivateRoute><Discovery /></PrivateRoute>} />
|
||||||
{/* Workers Dashboard */}
|
{/* Workers Dashboard */}
|
||||||
<Route path="/workers" element={<PrivateRoute><WorkersDashboard /></PrivateRoute>} />
|
<Route path="/workers" element={<PrivateRoute><WorkersDashboard /></PrivateRoute>} />
|
||||||
|
{/* Job Queue Management */}
|
||||||
|
<Route path="/job-queue" element={<PrivateRoute><JobQueue /></PrivateRoute>} />
|
||||||
{/* Task Queue Dashboard */}
|
{/* Task Queue Dashboard */}
|
||||||
<Route path="/tasks" element={<PrivateRoute><TasksDashboard /></PrivateRoute>} />
|
<Route path="/tasks" element={<PrivateRoute><TasksDashboard /></PrivateRoute>} />
|
||||||
{/* Scraper Overview Dashboard (new primary) */}
|
{/* Scraper Overview Dashboard (new primary) */}
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
import { ReactNode, useEffect, useState, useRef } from 'react';
|
import { ReactNode, useEffect, useState } from 'react';
|
||||||
import { useNavigate, useLocation, Link } from 'react-router-dom';
|
import { useNavigate, useLocation } from 'react-router-dom';
|
||||||
import { useAuthStore } from '../store/authStore';
|
import { useAuthStore } from '../store/authStore';
|
||||||
import { api } from '../lib/api';
|
import { api } from '../lib/api';
|
||||||
|
import { StateSelector } from './StateSelector';
|
||||||
import {
|
import {
|
||||||
LayoutDashboard,
|
LayoutDashboard,
|
||||||
Building2,
|
Building2,
|
||||||
@@ -47,8 +48,8 @@ interface NavLinkProps {
|
|||||||
|
|
||||||
function NavLink({ to, icon, label, isActive }: NavLinkProps) {
|
function NavLink({ to, icon, label, isActive }: NavLinkProps) {
|
||||||
return (
|
return (
|
||||||
<Link
|
<a
|
||||||
to={to}
|
href={to}
|
||||||
className={`flex items-center gap-3 px-3 py-2 rounded-lg text-sm font-medium transition-colors ${
|
className={`flex items-center gap-3 px-3 py-2 rounded-lg text-sm font-medium transition-colors ${
|
||||||
isActive
|
isActive
|
||||||
? 'bg-emerald-50 text-emerald-700'
|
? 'bg-emerald-50 text-emerald-700'
|
||||||
@@ -57,7 +58,7 @@ function NavLink({ to, icon, label, isActive }: NavLinkProps) {
|
|||||||
>
|
>
|
||||||
<span className={`flex-shrink-0 ${isActive ? 'text-emerald-600' : 'text-gray-400'}`}>{icon}</span>
|
<span className={`flex-shrink-0 ${isActive ? 'text-emerald-600' : 'text-gray-400'}`}>{icon}</span>
|
||||||
<span>{label}</span>
|
<span>{label}</span>
|
||||||
</Link>
|
</a>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -85,8 +86,6 @@ export function Layout({ children }: LayoutProps) {
|
|||||||
const { user, logout } = useAuthStore();
|
const { user, logout } = useAuthStore();
|
||||||
const [versionInfo, setVersionInfo] = useState<VersionInfo | null>(null);
|
const [versionInfo, setVersionInfo] = useState<VersionInfo | null>(null);
|
||||||
const [sidebarOpen, setSidebarOpen] = useState(false);
|
const [sidebarOpen, setSidebarOpen] = useState(false);
|
||||||
const navRef = useRef<HTMLElement>(null);
|
|
||||||
const scrollPositionRef = useRef<number>(0);
|
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
const fetchVersion = async () => {
|
const fetchVersion = async () => {
|
||||||
@@ -112,34 +111,16 @@ export function Layout({ children }: LayoutProps) {
|
|||||||
return location.pathname.startsWith(path);
|
return location.pathname.startsWith(path);
|
||||||
};
|
};
|
||||||
|
|
||||||
// Save scroll position before route change
|
// Close sidebar on route change (mobile)
|
||||||
useEffect(() => {
|
|
||||||
const nav = navRef.current;
|
|
||||||
if (nav) {
|
|
||||||
const handleScroll = () => {
|
|
||||||
scrollPositionRef.current = nav.scrollTop;
|
|
||||||
};
|
|
||||||
nav.addEventListener('scroll', handleScroll);
|
|
||||||
return () => nav.removeEventListener('scroll', handleScroll);
|
|
||||||
}
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
// Restore scroll position after route change and close mobile sidebar
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
setSidebarOpen(false);
|
setSidebarOpen(false);
|
||||||
// Restore scroll position after render
|
|
||||||
requestAnimationFrame(() => {
|
|
||||||
if (navRef.current) {
|
|
||||||
navRef.current.scrollTop = scrollPositionRef.current;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}, [location.pathname]);
|
}, [location.pathname]);
|
||||||
|
|
||||||
const sidebarContent = (
|
const sidebarContent = (
|
||||||
<>
|
<>
|
||||||
{/* Logo/Brand */}
|
{/* Logo/Brand */}
|
||||||
<div className="px-6 py-5 border-b border-gray-200">
|
<div className="px-6 py-5 border-b border-gray-200">
|
||||||
<Link to="/dashboard" className="flex items-center gap-3 hover:opacity-80 transition-opacity">
|
<div className="flex items-center gap-3">
|
||||||
<div className="w-8 h-8 bg-emerald-600 rounded-lg flex items-center justify-center">
|
<div className="w-8 h-8 bg-emerald-600 rounded-lg flex items-center justify-center">
|
||||||
<svg viewBox="0 0 24 24" className="w-5 h-5 text-white" fill="currentColor">
|
<svg viewBox="0 0 24 24" className="w-5 h-5 text-white" fill="currentColor">
|
||||||
<path d="M12 2C8.5 2 5.5 3.5 3.5 6L12 12L20.5 6C18.5 3.5 15.5 2 12 2Z" />
|
<path d="M12 2C8.5 2 5.5 3.5 3.5 6L12 12L20.5 6C18.5 3.5 15.5 2 12 2Z" />
|
||||||
@@ -154,13 +135,17 @@ export function Layout({ children }: LayoutProps) {
|
|||||||
</p>
|
</p>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
</Link>
|
</div>
|
||||||
<p className="text-xs text-gray-500 mt-2 truncate">{user?.email}</p>
|
<p className="text-xs text-gray-500 mt-2 truncate">{user?.email}</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{/* State Selector */}
|
||||||
|
<div className="px-4 py-3 border-b border-gray-200 bg-gray-50">
|
||||||
|
<StateSelector showLabel={false} />
|
||||||
|
</div>
|
||||||
|
|
||||||
{/* Navigation */}
|
{/* Navigation */}
|
||||||
<nav ref={navRef} className="flex-1 px-3 py-4 space-y-6 overflow-y-auto">
|
<nav className="flex-1 px-3 py-4 space-y-6 overflow-y-auto">
|
||||||
<NavSection title="Main">
|
<NavSection title="Main">
|
||||||
<NavLink to="/dashboard" icon={<LayoutDashboard className="w-4 h-4" />} label="Dashboard" isActive={isActive('/dashboard', true)} />
|
<NavLink to="/dashboard" icon={<LayoutDashboard className="w-4 h-4" />} label="Dashboard" isActive={isActive('/dashboard', true)} />
|
||||||
<NavLink to="/dispensaries" icon={<Building2 className="w-4 h-4" />} label="Dispensaries" isActive={isActive('/dispensaries')} />
|
<NavLink to="/dispensaries" icon={<Building2 className="w-4 h-4" />} label="Dispensaries" isActive={isActive('/dispensaries')} />
|
||||||
@@ -179,7 +164,8 @@ export function Layout({ children }: LayoutProps) {
|
|||||||
<NavLink to="/admin/orchestrator" icon={<Activity className="w-4 h-4" />} label="Orchestrator" isActive={isActive('/admin/orchestrator')} />
|
<NavLink to="/admin/orchestrator" icon={<Activity className="w-4 h-4" />} label="Orchestrator" isActive={isActive('/admin/orchestrator')} />
|
||||||
<NavLink to="/users" icon={<UserCog className="w-4 h-4" />} label="Users" isActive={isActive('/users')} />
|
<NavLink to="/users" icon={<UserCog className="w-4 h-4" />} label="Users" isActive={isActive('/users')} />
|
||||||
<NavLink to="/workers" icon={<Users className="w-4 h-4" />} label="Workers" isActive={isActive('/workers')} />
|
<NavLink to="/workers" icon={<Users className="w-4 h-4" />} label="Workers" isActive={isActive('/workers')} />
|
||||||
<NavLink to="/tasks" icon={<ListChecks className="w-4 h-4" />} label="Tasks" isActive={isActive('/tasks')} />
|
<NavLink to="/job-queue" icon={<ListOrdered className="w-4 h-4" />} label="Job Queue" isActive={isActive('/job-queue')} />
|
||||||
|
<NavLink to="/tasks" icon={<ListChecks className="w-4 h-4" />} label="Task Queue" isActive={isActive('/tasks')} />
|
||||||
<NavLink to="/admin/seo" icon={<FileText className="w-4 h-4" />} label="SEO Pages" isActive={isActive('/admin/seo')} />
|
<NavLink to="/admin/seo" icon={<FileText className="w-4 h-4" />} label="SEO Pages" isActive={isActive('/admin/seo')} />
|
||||||
<NavLink to="/proxies" icon={<Shield className="w-4 h-4" />} label="Proxies" isActive={isActive('/proxies')} />
|
<NavLink to="/proxies" icon={<Shield className="w-4 h-4" />} label="Proxies" isActive={isActive('/proxies')} />
|
||||||
<NavLink to="/api-permissions" icon={<Key className="w-4 h-4" />} label="API Keys" isActive={isActive('/api-permissions')} />
|
<NavLink to="/api-permissions" icon={<Key className="w-4 h-4" />} label="API Keys" isActive={isActive('/api-permissions')} />
|
||||||
@@ -228,7 +214,7 @@ export function Layout({ children }: LayoutProps) {
|
|||||||
<button onClick={() => setSidebarOpen(true)} className="p-2 -ml-2 rounded-lg hover:bg-gray-100">
|
<button onClick={() => setSidebarOpen(true)} className="p-2 -ml-2 rounded-lg hover:bg-gray-100">
|
||||||
<Menu className="w-5 h-5 text-gray-600" />
|
<Menu className="w-5 h-5 text-gray-600" />
|
||||||
</button>
|
</button>
|
||||||
<Link to="/dashboard" className="flex items-center gap-2 hover:opacity-80 transition-opacity">
|
<div className="flex items-center gap-2">
|
||||||
<div className="w-6 h-6 bg-emerald-600 rounded flex items-center justify-center">
|
<div className="w-6 h-6 bg-emerald-600 rounded flex items-center justify-center">
|
||||||
<svg viewBox="0 0 24 24" className="w-4 h-4 text-white" fill="currentColor">
|
<svg viewBox="0 0 24 24" className="w-4 h-4 text-white" fill="currentColor">
|
||||||
<path d="M12 2C8.5 2 5.5 3.5 3.5 6L12 12L20.5 6C18.5 3.5 15.5 2 12 2Z" />
|
<path d="M12 2C8.5 2 5.5 3.5 3.5 6L12 12L20.5 6C18.5 3.5 15.5 2 12 2Z" />
|
||||||
@@ -236,7 +222,7 @@ export function Layout({ children }: LayoutProps) {
|
|||||||
</svg>
|
</svg>
|
||||||
</div>
|
</div>
|
||||||
<span className="font-semibold text-gray-900">CannaIQ</span>
|
<span className="font-semibold text-gray-900">CannaIQ</span>
|
||||||
</Link>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Page content */}
|
{/* Page content */}
|
||||||
|
|||||||
@@ -1,138 +0,0 @@
|
|||||||
import { useState, useEffect, useRef } from 'react';
|
|
||||||
import { api } from '../lib/api';
|
|
||||||
import { Shield, X, Loader2 } from 'lucide-react';
|
|
||||||
|
|
||||||
interface PasswordConfirmModalProps {
|
|
||||||
isOpen: boolean;
|
|
||||||
onClose: () => void;
|
|
||||||
onConfirm: () => void;
|
|
||||||
title: string;
|
|
||||||
description: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
export function PasswordConfirmModal({
|
|
||||||
isOpen,
|
|
||||||
onClose,
|
|
||||||
onConfirm,
|
|
||||||
title,
|
|
||||||
description,
|
|
||||||
}: PasswordConfirmModalProps) {
|
|
||||||
const [password, setPassword] = useState('');
|
|
||||||
const [error, setError] = useState('');
|
|
||||||
const [loading, setLoading] = useState(false);
|
|
||||||
const inputRef = useRef<HTMLInputElement>(null);
|
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
if (isOpen) {
|
|
||||||
setPassword('');
|
|
||||||
setError('');
|
|
||||||
// Focus the input when modal opens
|
|
||||||
setTimeout(() => inputRef.current?.focus(), 100);
|
|
||||||
}
|
|
||||||
}, [isOpen]);
|
|
||||||
|
|
||||||
const handleSubmit = async (e: React.FormEvent) => {
|
|
||||||
e.preventDefault();
|
|
||||||
if (!password.trim()) {
|
|
||||||
setError('Password is required');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
setLoading(true);
|
|
||||||
setError('');
|
|
||||||
|
|
||||||
try {
|
|
||||||
const result = await api.verifyPassword(password);
|
|
||||||
if (result.verified) {
|
|
||||||
onConfirm();
|
|
||||||
onClose();
|
|
||||||
} else {
|
|
||||||
setError('Invalid password');
|
|
||||||
}
|
|
||||||
} catch (err: any) {
|
|
||||||
setError(err.message || 'Verification failed');
|
|
||||||
} finally {
|
|
||||||
setLoading(false);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
if (!isOpen) return null;
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="fixed inset-0 z-50 flex items-center justify-center">
|
|
||||||
{/* Backdrop */}
|
|
||||||
<div
|
|
||||||
className="absolute inset-0 bg-black bg-opacity-50"
|
|
||||||
onClick={onClose}
|
|
||||||
/>
|
|
||||||
|
|
||||||
{/* Modal */}
|
|
||||||
<div className="relative bg-white rounded-lg shadow-xl max-w-md w-full mx-4">
|
|
||||||
{/* Header */}
|
|
||||||
<div className="flex items-center justify-between px-6 py-4 border-b border-gray-200">
|
|
||||||
<div className="flex items-center gap-3">
|
|
||||||
<div className="p-2 bg-amber-100 rounded-lg">
|
|
||||||
<Shield className="w-5 h-5 text-amber-600" />
|
|
||||||
</div>
|
|
||||||
<h3 className="text-lg font-semibold text-gray-900">{title}</h3>
|
|
||||||
</div>
|
|
||||||
<button
|
|
||||||
onClick={onClose}
|
|
||||||
className="p-1 hover:bg-gray-100 rounded-lg transition-colors"
|
|
||||||
>
|
|
||||||
<X className="w-5 h-5 text-gray-500" />
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Body */}
|
|
||||||
<form onSubmit={handleSubmit}>
|
|
||||||
<div className="px-6 py-4">
|
|
||||||
<p className="text-gray-600 mb-4">{description}</p>
|
|
||||||
|
|
||||||
<div className="space-y-2">
|
|
||||||
<label
|
|
||||||
htmlFor="password"
|
|
||||||
className="block text-sm font-medium text-gray-700"
|
|
||||||
>
|
|
||||||
Enter your password to continue
|
|
||||||
</label>
|
|
||||||
<input
|
|
||||||
ref={inputRef}
|
|
||||||
type="password"
|
|
||||||
id="password"
|
|
||||||
value={password}
|
|
||||||
onChange={(e) => setPassword(e.target.value)}
|
|
||||||
className="w-full px-4 py-2 border border-gray-300 rounded-lg focus:ring-2 focus:ring-emerald-500 focus:border-emerald-500"
|
|
||||||
placeholder="Password"
|
|
||||||
disabled={loading}
|
|
||||||
/>
|
|
||||||
{error && (
|
|
||||||
<p className="text-sm text-red-600">{error}</p>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Footer */}
|
|
||||||
<div className="flex justify-end gap-3 px-6 py-4 border-t border-gray-200 bg-gray-50 rounded-b-lg">
|
|
||||||
<button
|
|
||||||
type="button"
|
|
||||||
onClick={onClose}
|
|
||||||
disabled={loading}
|
|
||||||
className="px-4 py-2 text-gray-700 hover:bg-gray-100 rounded-lg transition-colors"
|
|
||||||
>
|
|
||||||
Cancel
|
|
||||||
</button>
|
|
||||||
<button
|
|
||||||
type="submit"
|
|
||||||
disabled={loading}
|
|
||||||
className="px-4 py-2 bg-emerald-600 text-white rounded-lg hover:bg-emerald-700 transition-colors disabled:opacity-50 flex items-center gap-2"
|
|
||||||
>
|
|
||||||
{loading && <Loader2 className="w-4 h-4 animate-spin" />}
|
|
||||||
Confirm
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
@@ -84,13 +84,6 @@ class ApiClient {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async verifyPassword(password: string) {
|
|
||||||
return this.request<{ verified: boolean; error?: string }>('/api/auth/verify-password', {
|
|
||||||
method: 'POST',
|
|
||||||
body: JSON.stringify({ password }),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
async getMe() {
|
async getMe() {
|
||||||
return this.request<{ user: any }>('/api/auth/me');
|
return this.request<{ user: any }>('/api/auth/me');
|
||||||
}
|
}
|
||||||
@@ -2957,25 +2950,6 @@ class ApiClient {
|
|||||||
{ method: 'POST' }
|
{ method: 'POST' }
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// K8s Worker Control
|
|
||||||
async getK8sWorkers() {
|
|
||||||
return this.request<{
|
|
||||||
success: boolean;
|
|
||||||
available: boolean;
|
|
||||||
replicas: number;
|
|
||||||
readyReplicas: number;
|
|
||||||
availableReplicas?: number;
|
|
||||||
error?: string;
|
|
||||||
}>('/api/k8s/workers');
|
|
||||||
}
|
|
||||||
|
|
||||||
async scaleK8sWorkers(replicas: number) {
|
|
||||||
return this.request<{ success: boolean; replicas: number; message?: string; error?: string }>(
|
|
||||||
'/api/k8s/workers/scale',
|
|
||||||
{ method: 'POST', body: JSON.stringify({ replicas }) }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export const api = new ApiClient(API_URL);
|
export const api = new ApiClient(API_URL);
|
||||||
|
|||||||
1068
cannaiq/src/pages/JobQueue.tsx
Normal file
1068
cannaiq/src/pages/JobQueue.tsx
Normal file
File diff suppressed because it is too large
Load Diff
@@ -275,7 +275,7 @@ export default function NationalDashboard() {
|
|||||||
<>
|
<>
|
||||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4">
|
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4">
|
||||||
<MetricCard
|
<MetricCard
|
||||||
title="Regions (US + CA)"
|
title="States"
|
||||||
value={summary.activeStates}
|
value={summary.activeStates}
|
||||||
icon={Globe}
|
icon={Globe}
|
||||||
/>
|
/>
|
||||||
|
|||||||
@@ -14,27 +14,11 @@ export function Settings() {
|
|||||||
loadSettings();
|
loadSettings();
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
// AI-related settings are managed in /ai-settings, filter them out here
|
|
||||||
const AI_SETTING_KEYS = [
|
|
||||||
'ai_model',
|
|
||||||
'ai_provider',
|
|
||||||
'anthropic_api_key',
|
|
||||||
'openai_api_key',
|
|
||||||
'anthropic_model',
|
|
||||||
'openai_model',
|
|
||||||
'anthropic_enabled',
|
|
||||||
'openai_enabled',
|
|
||||||
];
|
|
||||||
|
|
||||||
const loadSettings = async () => {
|
const loadSettings = async () => {
|
||||||
setLoading(true);
|
setLoading(true);
|
||||||
try {
|
try {
|
||||||
const data = await api.getSettings();
|
const data = await api.getSettings();
|
||||||
// Filter out AI settings - those are managed in /ai-settings
|
setSettings(data.settings);
|
||||||
const filteredSettings = (data.settings || []).filter(
|
|
||||||
(s: any) => !AI_SETTING_KEYS.includes(s.key)
|
|
||||||
);
|
|
||||||
setSettings(filteredSettings);
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Failed to load settings:', error);
|
console.error('Failed to load settings:', error);
|
||||||
} finally {
|
} finally {
|
||||||
|
|||||||
@@ -12,15 +12,11 @@ import {
|
|||||||
Search,
|
Search,
|
||||||
ChevronDown,
|
ChevronDown,
|
||||||
ChevronUp,
|
ChevronUp,
|
||||||
ChevronLeft,
|
|
||||||
ChevronRight,
|
|
||||||
Gauge,
|
Gauge,
|
||||||
Users,
|
Users,
|
||||||
|
Power,
|
||||||
|
Play,
|
||||||
Square,
|
Square,
|
||||||
Plus,
|
|
||||||
X,
|
|
||||||
Calendar,
|
|
||||||
Trash2,
|
|
||||||
} from 'lucide-react';
|
} from 'lucide-react';
|
||||||
|
|
||||||
interface Task {
|
interface Task {
|
||||||
@@ -70,313 +66,6 @@ interface TaskCounts {
|
|||||||
stale: number;
|
stale: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface Store {
|
|
||||||
id: number;
|
|
||||||
name: string;
|
|
||||||
state_code: string;
|
|
||||||
crawl_enabled: boolean;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface CreateTaskModalProps {
|
|
||||||
isOpen: boolean;
|
|
||||||
onClose: () => void;
|
|
||||||
onTaskCreated: () => void;
|
|
||||||
}
|
|
||||||
|
|
||||||
const TASK_ROLES = [
|
|
||||||
{ id: 'product_refresh', name: 'Product Resync', description: 'Re-crawl products for price/stock changes' },
|
|
||||||
{ id: 'product_discovery', name: 'Product Discovery', description: 'Initial crawl for new dispensaries' },
|
|
||||||
{ id: 'store_discovery', name: 'Store Discovery', description: 'Discover new dispensary locations' },
|
|
||||||
{ id: 'entry_point_discovery', name: 'Entry Point Discovery', description: 'Resolve platform IDs from menu URLs' },
|
|
||||||
{ id: 'analytics_refresh', name: 'Analytics Refresh', description: 'Refresh materialized views' },
|
|
||||||
];
|
|
||||||
|
|
||||||
function CreateTaskModal({ isOpen, onClose, onTaskCreated }: CreateTaskModalProps) {
|
|
||||||
const [role, setRole] = useState('product_refresh');
|
|
||||||
const [priority, setPriority] = useState(10);
|
|
||||||
const [scheduleType, setScheduleType] = useState<'now' | 'scheduled'>('now');
|
|
||||||
const [scheduledFor, setScheduledFor] = useState('');
|
|
||||||
const [stores, setStores] = useState<Store[]>([]);
|
|
||||||
const [storeSearch, setStoreSearch] = useState('');
|
|
||||||
const [selectedStores, setSelectedStores] = useState<Store[]>([]);
|
|
||||||
const [loading, setLoading] = useState(false);
|
|
||||||
const [storesLoading, setStoresLoading] = useState(false);
|
|
||||||
const [error, setError] = useState<string | null>(null);
|
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
if (isOpen) {
|
|
||||||
fetchStores();
|
|
||||||
}
|
|
||||||
}, [isOpen]);
|
|
||||||
|
|
||||||
const fetchStores = async () => {
|
|
||||||
setStoresLoading(true);
|
|
||||||
try {
|
|
||||||
const res = await api.get('/api/stores?limit=500');
|
|
||||||
setStores(res.data.stores || res.data || []);
|
|
||||||
} catch (err) {
|
|
||||||
console.error('Failed to fetch stores:', err);
|
|
||||||
} finally {
|
|
||||||
setStoresLoading(false);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const filteredStores = stores.filter(s =>
|
|
||||||
s.name.toLowerCase().includes(storeSearch.toLowerCase()) ||
|
|
||||||
s.state_code?.toLowerCase().includes(storeSearch.toLowerCase())
|
|
||||||
);
|
|
||||||
|
|
||||||
const toggleStore = (store: Store) => {
|
|
||||||
if (selectedStores.find(s => s.id === store.id)) {
|
|
||||||
setSelectedStores(selectedStores.filter(s => s.id !== store.id));
|
|
||||||
} else {
|
|
||||||
setSelectedStores([...selectedStores, store]);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const selectAll = () => setSelectedStores(filteredStores);
|
|
||||||
const clearAll = () => setSelectedStores([]);
|
|
||||||
|
|
||||||
const handleSubmit = async () => {
|
|
||||||
setLoading(true);
|
|
||||||
setError(null);
|
|
||||||
|
|
||||||
try {
|
|
||||||
const scheduledDate = scheduleType === 'scheduled' && scheduledFor
|
|
||||||
? new Date(scheduledFor).toISOString()
|
|
||||||
: undefined;
|
|
||||||
|
|
||||||
if (role === 'store_discovery' || role === 'analytics_refresh') {
|
|
||||||
await api.post('/api/tasks', {
|
|
||||||
role,
|
|
||||||
priority,
|
|
||||||
scheduled_for: scheduledDate,
|
|
||||||
platform: 'dutchie',
|
|
||||||
});
|
|
||||||
} else if (selectedStores.length === 0) {
|
|
||||||
setError('Please select at least one store');
|
|
||||||
setLoading(false);
|
|
||||||
return;
|
|
||||||
} else {
|
|
||||||
for (const store of selectedStores) {
|
|
||||||
await api.post('/api/tasks', {
|
|
||||||
role,
|
|
||||||
dispensary_id: store.id,
|
|
||||||
priority,
|
|
||||||
scheduled_for: scheduledDate,
|
|
||||||
platform: 'dutchie',
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
onTaskCreated();
|
|
||||||
onClose();
|
|
||||||
setSelectedStores([]);
|
|
||||||
setPriority(10);
|
|
||||||
setScheduleType('now');
|
|
||||||
setScheduledFor('');
|
|
||||||
} catch (err: any) {
|
|
||||||
setError(err.response?.data?.error || err.message || 'Failed to create task');
|
|
||||||
} finally {
|
|
||||||
setLoading(false);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
if (!isOpen) return null;
|
|
||||||
|
|
||||||
const needsStore = role !== 'store_discovery' && role !== 'analytics_refresh';
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="fixed inset-0 z-50 overflow-y-auto">
|
|
||||||
<div className="flex min-h-full items-center justify-center p-4">
|
|
||||||
<div className="fixed inset-0 bg-black/50" onClick={onClose} />
|
|
||||||
<div className="relative bg-white rounded-xl shadow-xl max-w-2xl w-full max-h-[90vh] overflow-hidden">
|
|
||||||
<div className="px-6 py-4 border-b border-gray-200 flex items-center justify-between">
|
|
||||||
<h2 className="text-lg font-semibold text-gray-900">Create New Task</h2>
|
|
||||||
<button onClick={onClose} className="p-1 hover:bg-gray-100 rounded">
|
|
||||||
<X className="w-5 h-5 text-gray-500" />
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className="px-6 py-4 space-y-6 overflow-y-auto max-h-[calc(90vh-140px)]">
|
|
||||||
{error && (
|
|
||||||
<div className="bg-red-50 border border-red-200 rounded-lg p-3 text-red-700 text-sm">
|
|
||||||
{error}
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
|
|
||||||
<div>
|
|
||||||
<label className="block text-sm font-medium text-gray-700 mb-2">Task Role</label>
|
|
||||||
<div className="grid grid-cols-1 gap-2">
|
|
||||||
{TASK_ROLES.map(r => (
|
|
||||||
<button
|
|
||||||
key={r.id}
|
|
||||||
onClick={() => setRole(r.id)}
|
|
||||||
className={`flex items-start gap-3 p-3 rounded-lg border text-left transition-colors ${
|
|
||||||
role === r.id
|
|
||||||
? 'border-emerald-500 bg-emerald-50'
|
|
||||||
: 'border-gray-200 hover:border-gray-300'
|
|
||||||
}`}
|
|
||||||
>
|
|
||||||
<div className={`w-4 h-4 rounded-full border-2 mt-0.5 flex-shrink-0 ${
|
|
||||||
role === r.id ? 'border-emerald-500 bg-emerald-500' : 'border-gray-300'
|
|
||||||
}`}>
|
|
||||||
{role === r.id && (
|
|
||||||
<div className="w-full h-full flex items-center justify-center">
|
|
||||||
<div className="w-1.5 h-1.5 bg-white rounded-full" />
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
<div>
|
|
||||||
<p className="font-medium text-gray-900">{r.name}</p>
|
|
||||||
<p className="text-xs text-gray-500">{r.description}</p>
|
|
||||||
</div>
|
|
||||||
</button>
|
|
||||||
))}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{needsStore && (
|
|
||||||
<div>
|
|
||||||
<label className="block text-sm font-medium text-gray-700 mb-2">
|
|
||||||
Select Stores ({selectedStores.length} selected)
|
|
||||||
</label>
|
|
||||||
<div className="border border-gray-200 rounded-lg overflow-hidden">
|
|
||||||
<div className="p-2 border-b border-gray-200 bg-gray-50">
|
|
||||||
<div className="relative">
|
|
||||||
<Search className="absolute left-3 top-1/2 -translate-y-1/2 w-4 h-4 text-gray-400" />
|
|
||||||
<input
|
|
||||||
type="text"
|
|
||||||
value={storeSearch}
|
|
||||||
onChange={(e) => setStoreSearch(e.target.value)}
|
|
||||||
placeholder="Search stores..."
|
|
||||||
className="w-full pl-9 pr-3 py-2 text-sm border border-gray-200 rounded"
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
<div className="flex gap-2 mt-2">
|
|
||||||
<button onClick={selectAll} className="text-xs text-emerald-600 hover:underline">
|
|
||||||
Select all ({filteredStores.length})
|
|
||||||
</button>
|
|
||||||
<span className="text-gray-300">|</span>
|
|
||||||
<button onClick={clearAll} className="text-xs text-gray-500 hover:underline">
|
|
||||||
Clear
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div className="max-h-48 overflow-y-auto">
|
|
||||||
{storesLoading ? (
|
|
||||||
<div className="p-4 text-center text-gray-500">
|
|
||||||
<RefreshCw className="w-5 h-5 animate-spin mx-auto mb-1" />
|
|
||||||
Loading stores...
|
|
||||||
</div>
|
|
||||||
) : filteredStores.length === 0 ? (
|
|
||||||
<div className="p-4 text-center text-gray-500">No stores found</div>
|
|
||||||
) : (
|
|
||||||
filteredStores.map(store => (
|
|
||||||
<label key={store.id} className="flex items-center gap-3 px-3 py-2 hover:bg-gray-50 cursor-pointer">
|
|
||||||
<input
|
|
||||||
type="checkbox"
|
|
||||||
checked={!!selectedStores.find(s => s.id === store.id)}
|
|
||||||
onChange={() => toggleStore(store)}
|
|
||||||
className="w-4 h-4 text-emerald-600 rounded"
|
|
||||||
/>
|
|
||||||
<div className="flex-1 min-w-0">
|
|
||||||
<p className="text-sm text-gray-900 truncate">{store.name}</p>
|
|
||||||
<p className="text-xs text-gray-500">{store.state_code}</p>
|
|
||||||
</div>
|
|
||||||
{!store.crawl_enabled && (
|
|
||||||
<span className="text-xs text-orange-600 bg-orange-50 px-1.5 py-0.5 rounded">disabled</span>
|
|
||||||
)}
|
|
||||||
</label>
|
|
||||||
))
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
|
|
||||||
<div>
|
|
||||||
<label className="block text-sm font-medium text-gray-700 mb-2">Priority: {priority}</label>
|
|
||||||
<input
|
|
||||||
type="range"
|
|
||||||
min="0"
|
|
||||||
max="100"
|
|
||||||
value={priority}
|
|
||||||
onChange={(e) => setPriority(parseInt(e.target.value))}
|
|
||||||
className="w-full h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer"
|
|
||||||
/>
|
|
||||||
<div className="flex justify-between text-xs text-gray-500 mt-1">
|
|
||||||
<span>0 (Low)</span>
|
|
||||||
<span>10 (Normal)</span>
|
|
||||||
<span>50 (High)</span>
|
|
||||||
<span>100 (Urgent)</span>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div>
|
|
||||||
<label className="block text-sm font-medium text-gray-700 mb-2">Schedule</label>
|
|
||||||
<div className="flex gap-4">
|
|
||||||
<label className="flex items-center gap-2 cursor-pointer">
|
|
||||||
<input
|
|
||||||
type="radio"
|
|
||||||
name="schedule"
|
|
||||||
checked={scheduleType === 'now'}
|
|
||||||
onChange={() => setScheduleType('now')}
|
|
||||||
className="w-4 h-4 text-emerald-600"
|
|
||||||
/>
|
|
||||||
<span className="text-sm text-gray-700">Run immediately</span>
|
|
||||||
</label>
|
|
||||||
<label className="flex items-center gap-2 cursor-pointer">
|
|
||||||
<input
|
|
||||||
type="radio"
|
|
||||||
name="schedule"
|
|
||||||
checked={scheduleType === 'scheduled'}
|
|
||||||
onChange={() => setScheduleType('scheduled')}
|
|
||||||
className="w-4 h-4 text-emerald-600"
|
|
||||||
/>
|
|
||||||
<span className="text-sm text-gray-700">Schedule for later</span>
|
|
||||||
</label>
|
|
||||||
</div>
|
|
||||||
{scheduleType === 'scheduled' && (
|
|
||||||
<div className="mt-3 relative">
|
|
||||||
<Calendar className="absolute left-3 top-1/2 -translate-y-1/2 w-4 h-4 text-gray-400" />
|
|
||||||
<input
|
|
||||||
type="datetime-local"
|
|
||||||
value={scheduledFor}
|
|
||||||
onChange={(e) => setScheduledFor(e.target.value)}
|
|
||||||
className="w-full pl-9 pr-3 py-2 text-sm border border-gray-200 rounded"
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className="px-6 py-4 border-t border-gray-200 bg-gray-50 flex items-center justify-between">
|
|
||||||
<div className="text-sm text-gray-500">
|
|
||||||
{needsStore ? (
|
|
||||||
selectedStores.length > 0 ? `Will create ${selectedStores.length} task${selectedStores.length > 1 ? 's' : ''}` : 'Select stores to create tasks'
|
|
||||||
) : 'Will create 1 task'}
|
|
||||||
</div>
|
|
||||||
<div className="flex gap-3">
|
|
||||||
<button onClick={onClose} className="px-4 py-2 text-sm text-gray-700 hover:bg-gray-100 rounded-lg">
|
|
||||||
Cancel
|
|
||||||
</button>
|
|
||||||
<button
|
|
||||||
onClick={handleSubmit}
|
|
||||||
disabled={loading || (needsStore && selectedStores.length === 0)}
|
|
||||||
className="px-4 py-2 text-sm bg-emerald-600 text-white rounded-lg hover:bg-emerald-700 disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-2"
|
|
||||||
>
|
|
||||||
{loading && <RefreshCw className="w-4 h-4 animate-spin" />}
|
|
||||||
Create Task{selectedStores.length > 1 ? 's' : ''}
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
const ROLES = [
|
const ROLES = [
|
||||||
'store_discovery',
|
'store_discovery',
|
||||||
'entry_point_discovery',
|
'entry_point_discovery',
|
||||||
@@ -450,11 +139,7 @@ export default function TasksDashboard() {
|
|||||||
const [loading, setLoading] = useState(true);
|
const [loading, setLoading] = useState(true);
|
||||||
const [error, setError] = useState<string | null>(null);
|
const [error, setError] = useState<string | null>(null);
|
||||||
const [poolPaused, setPoolPaused] = useState(false);
|
const [poolPaused, setPoolPaused] = useState(false);
|
||||||
const [showCreateModal, setShowCreateModal] = useState(false);
|
const [poolLoading, setPoolLoading] = useState(false);
|
||||||
|
|
||||||
// Pagination
|
|
||||||
const [page, setPage] = useState(0);
|
|
||||||
const tasksPerPage = 25;
|
|
||||||
|
|
||||||
// Filters
|
// Filters
|
||||||
const [roleFilter, setRoleFilter] = useState<string>('');
|
const [roleFilter, setRoleFilter] = useState<string>('');
|
||||||
@@ -488,14 +173,20 @@ export default function TasksDashboard() {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const handleDeleteTask = async (taskId: number) => {
|
const togglePool = async () => {
|
||||||
if (!confirm('Delete this task?')) return;
|
setPoolLoading(true);
|
||||||
try {
|
try {
|
||||||
await api.delete(`/api/tasks/${taskId}`);
|
if (poolPaused) {
|
||||||
fetchData();
|
await api.resumeTaskPool();
|
||||||
|
setPoolPaused(false);
|
||||||
|
} else {
|
||||||
|
await api.pauseTaskPool();
|
||||||
|
setPoolPaused(true);
|
||||||
|
}
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
console.error('Delete error:', err);
|
setError(err.message || 'Failed to toggle pool');
|
||||||
alert(err.response?.data?.error || 'Failed to delete task');
|
} finally {
|
||||||
|
setPoolLoading(false);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -518,10 +209,6 @@ export default function TasksDashboard() {
|
|||||||
return true;
|
return true;
|
||||||
});
|
});
|
||||||
|
|
||||||
// Pagination
|
|
||||||
const paginatedTasks = filteredTasks.slice(page * tasksPerPage, (page + 1) * tasksPerPage);
|
|
||||||
const totalPages = Math.ceil(filteredTasks.length / tasksPerPage);
|
|
||||||
|
|
||||||
const totalActive = (counts?.claimed || 0) + (counts?.running || 0);
|
const totalActive = (counts?.claimed || 0) + (counts?.running || 0);
|
||||||
const totalPending = counts?.pending || 0;
|
const totalPending = counts?.pending || 0;
|
||||||
|
|
||||||
@@ -538,37 +225,42 @@ export default function TasksDashboard() {
|
|||||||
return (
|
return (
|
||||||
<Layout>
|
<Layout>
|
||||||
<div className="space-y-6">
|
<div className="space-y-6">
|
||||||
{/* Sticky Header */}
|
{/* Header */}
|
||||||
<div className="sticky top-0 z-10 bg-white pb-4 -mx-6 px-6 pt-2 border-b border-gray-200 shadow-sm">
|
<div className="flex flex-col sm:flex-row sm:items-center sm:justify-between gap-4">
|
||||||
<div className="flex flex-col sm:flex-row sm:items-center sm:justify-between gap-4">
|
<div>
|
||||||
<div>
|
<h1 className="text-2xl font-bold text-gray-900 flex items-center gap-2">
|
||||||
<h1 className="text-2xl font-bold text-gray-900 flex items-center gap-2">
|
<ListChecks className="w-7 h-7 text-emerald-600" />
|
||||||
<ListChecks className="w-7 h-7 text-emerald-600" />
|
Task Queue
|
||||||
Task Queue
|
</h1>
|
||||||
</h1>
|
<p className="text-gray-500 mt-1">
|
||||||
<p className="text-gray-500 mt-1">
|
{totalActive} active, {totalPending} pending tasks
|
||||||
{totalActive} active, {totalPending} pending tasks
|
</p>
|
||||||
</p>
|
</div>
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className="flex items-center gap-4">
|
<div className="flex items-center gap-4">
|
||||||
{/* Create Task Button */}
|
{/* Pool Toggle */}
|
||||||
<button
|
<button
|
||||||
onClick={() => setShowCreateModal(true)}
|
onClick={togglePool}
|
||||||
className="flex items-center gap-2 px-4 py-2 bg-emerald-600 text-white rounded-lg hover:bg-emerald-700 transition-colors"
|
disabled={poolLoading}
|
||||||
>
|
className={`flex items-center gap-2 px-4 py-2 rounded-lg font-medium transition-colors ${
|
||||||
<Plus className="w-4 h-4" />
|
poolPaused
|
||||||
Create Task
|
? 'bg-emerald-100 text-emerald-700 hover:bg-emerald-200'
|
||||||
</button>
|
: 'bg-red-100 text-red-700 hover:bg-red-200'
|
||||||
{/* Pool status indicator */}
|
}`}
|
||||||
{poolPaused && (
|
>
|
||||||
<span className="inline-flex items-center gap-1.5 px-3 py-1.5 rounded-full text-sm font-medium bg-yellow-100 text-yellow-800">
|
{poolPaused ? (
|
||||||
<Square className="w-4 h-4" />
|
<>
|
||||||
Pool Paused
|
<Play className={`w-5 h-5 ${poolLoading ? 'animate-pulse' : ''}`} />
|
||||||
</span>
|
Start Pool
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<Square className={`w-5 h-5 ${poolLoading ? 'animate-pulse' : ''}`} />
|
||||||
|
Stop Pool
|
||||||
|
</>
|
||||||
)}
|
)}
|
||||||
<span className="text-sm text-gray-400">Auto-refreshes every 15s</span>
|
</button>
|
||||||
</div>
|
<span className="text-sm text-gray-400">Auto-refreshes every 15s</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -576,13 +268,6 @@ export default function TasksDashboard() {
|
|||||||
<div className="p-4 bg-red-50 text-red-700 rounded-lg">{error}</div>
|
<div className="p-4 bg-red-50 text-red-700 rounded-lg">{error}</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{/* Create Task Modal */}
|
|
||||||
<CreateTaskModal
|
|
||||||
isOpen={showCreateModal}
|
|
||||||
onClose={() => setShowCreateModal(false)}
|
|
||||||
onTaskCreated={fetchData}
|
|
||||||
/>
|
|
||||||
|
|
||||||
{/* Status Summary Cards */}
|
{/* Status Summary Cards */}
|
||||||
<div className="grid grid-cols-2 sm:grid-cols-3 lg:grid-cols-6 gap-4">
|
<div className="grid grid-cols-2 sm:grid-cols-3 lg:grid-cols-6 gap-4">
|
||||||
{Object.entries(counts || {}).map(([status, count]) => (
|
{Object.entries(counts || {}).map(([status, count]) => (
|
||||||
@@ -785,19 +470,17 @@ export default function TasksDashboard() {
|
|||||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">
|
||||||
Error
|
Error
|
||||||
</th>
|
</th>
|
||||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase w-16">
|
|
||||||
</th>
|
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody className="divide-y divide-gray-200">
|
<tbody className="divide-y divide-gray-200">
|
||||||
{paginatedTasks.length === 0 ? (
|
{filteredTasks.length === 0 ? (
|
||||||
<tr>
|
<tr>
|
||||||
<td colSpan={9} className="px-4 py-8 text-center text-gray-500">
|
<td colSpan={8} className="px-4 py-8 text-center text-gray-500">
|
||||||
No tasks found
|
No tasks found
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
) : (
|
) : (
|
||||||
paginatedTasks.map((task) => (
|
filteredTasks.map((task) => (
|
||||||
<tr key={task.id} className="hover:bg-gray-50">
|
<tr key={task.id} className="hover:bg-gray-50">
|
||||||
<td className="px-4 py-3 text-sm font-mono text-gray-600">#{task.id}</td>
|
<td className="px-4 py-3 text-sm font-mono text-gray-600">#{task.id}</td>
|
||||||
<td className="px-4 py-3 text-sm text-gray-900">
|
<td className="px-4 py-3 text-sm text-gray-900">
|
||||||
@@ -828,47 +511,12 @@ export default function TasksDashboard() {
|
|||||||
<td className="px-4 py-3 text-sm text-red-600 max-w-xs truncate">
|
<td className="px-4 py-3 text-sm text-red-600 max-w-xs truncate">
|
||||||
{task.error_message || '-'}
|
{task.error_message || '-'}
|
||||||
</td>
|
</td>
|
||||||
<td className="px-4 py-3">
|
|
||||||
{(task.status === 'failed' || task.status === 'completed' || task.status === 'pending') && (
|
|
||||||
<button
|
|
||||||
onClick={() => handleDeleteTask(task.id)}
|
|
||||||
className="p-1 text-gray-400 hover:text-red-500 hover:bg-red-50 rounded transition-colors"
|
|
||||||
title="Delete task"
|
|
||||||
>
|
|
||||||
<Trash2 className="w-4 h-4" />
|
|
||||||
</button>
|
|
||||||
)}
|
|
||||||
</td>
|
|
||||||
</tr>
|
</tr>
|
||||||
))
|
))
|
||||||
)}
|
)}
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Pagination */}
|
|
||||||
<div className="px-4 py-3 border-t border-gray-200 bg-gray-50 flex items-center justify-between">
|
|
||||||
<div className="text-sm text-gray-500">
|
|
||||||
Showing {page * tasksPerPage + 1} - {Math.min((page + 1) * tasksPerPage, filteredTasks.length)} of {filteredTasks.length} tasks
|
|
||||||
</div>
|
|
||||||
<div className="flex items-center gap-2">
|
|
||||||
<button
|
|
||||||
onClick={() => setPage(p => Math.max(0, p - 1))}
|
|
||||||
disabled={page === 0}
|
|
||||||
className="px-3 py-1 text-sm border border-gray-200 rounded hover:bg-gray-100 disabled:opacity-50 disabled:cursor-not-allowed"
|
|
||||||
>
|
|
||||||
<ChevronLeft className="w-4 h-4" />
|
|
||||||
</button>
|
|
||||||
<span className="text-sm text-gray-600">Page {page + 1} of {totalPages || 1}</span>
|
|
||||||
<button
|
|
||||||
onClick={() => setPage(p => p + 1)}
|
|
||||||
disabled={page >= totalPages - 1}
|
|
||||||
className="px-3 py-1 text-sm border border-gray-200 rounded hover:bg-gray-100 disabled:opacity-50 disabled:cursor-not-allowed"
|
|
||||||
>
|
|
||||||
<ChevronRight className="w-4 h-4" />
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</Layout>
|
</Layout>
|
||||||
|
|||||||
@@ -18,11 +18,9 @@ import {
|
|||||||
Server,
|
Server,
|
||||||
MapPin,
|
MapPin,
|
||||||
Trash2,
|
Trash2,
|
||||||
PowerOff,
|
|
||||||
Undo2,
|
|
||||||
Plus,
|
Plus,
|
||||||
MemoryStick,
|
Minus,
|
||||||
AlertTriangle,
|
Loader2,
|
||||||
} from 'lucide-react';
|
} from 'lucide-react';
|
||||||
|
|
||||||
// Worker from registry
|
// Worker from registry
|
||||||
@@ -41,36 +39,16 @@ interface Worker {
|
|||||||
tasks_completed: number;
|
tasks_completed: number;
|
||||||
tasks_failed: number;
|
tasks_failed: number;
|
||||||
current_task_id: number | null;
|
current_task_id: number | null;
|
||||||
current_task_ids?: number[]; // Multiple concurrent tasks
|
|
||||||
active_task_count?: number;
|
|
||||||
max_concurrent_tasks?: number;
|
|
||||||
health_status: string;
|
health_status: string;
|
||||||
seconds_since_heartbeat: number;
|
seconds_since_heartbeat: number;
|
||||||
decommission_requested?: boolean;
|
|
||||||
decommission_reason?: string;
|
|
||||||
// Dual-transport preflight status
|
|
||||||
preflight_curl_status?: 'pending' | 'passed' | 'failed' | 'skipped';
|
|
||||||
preflight_http_status?: 'pending' | 'passed' | 'failed' | 'skipped';
|
|
||||||
preflight_curl_at?: string;
|
|
||||||
preflight_http_at?: string;
|
|
||||||
preflight_curl_error?: string;
|
|
||||||
preflight_http_error?: string;
|
|
||||||
preflight_curl_ms?: number;
|
|
||||||
preflight_http_ms?: number;
|
|
||||||
can_curl?: boolean;
|
|
||||||
can_http?: boolean;
|
|
||||||
metadata: {
|
metadata: {
|
||||||
cpu?: number;
|
cpu?: number;
|
||||||
memory?: number;
|
memory?: number;
|
||||||
memoryTotal?: number;
|
memoryTotal?: number;
|
||||||
memory_mb?: number;
|
memory_mb?: number;
|
||||||
memory_total_mb?: number;
|
memory_total_mb?: number;
|
||||||
memory_percent?: number; // NEW: memory as percentage
|
|
||||||
cpu_user_ms?: number;
|
cpu_user_ms?: number;
|
||||||
cpu_system_ms?: number;
|
cpu_system_ms?: number;
|
||||||
cpu_percent?: number; // NEW: CPU percentage
|
|
||||||
is_backing_off?: boolean; // NEW: resource backoff state
|
|
||||||
backoff_reason?: string; // NEW: why backing off
|
|
||||||
proxy_location?: {
|
proxy_location?: {
|
||||||
city?: string;
|
city?: string;
|
||||||
state?: string;
|
state?: string;
|
||||||
@@ -94,6 +72,14 @@ interface Task {
|
|||||||
worker_id: string | null;
|
worker_id: string | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// K8s replica info (added 2024-12-10)
|
||||||
|
interface K8sReplicas {
|
||||||
|
current: number;
|
||||||
|
desired: number;
|
||||||
|
available: number;
|
||||||
|
updated: number;
|
||||||
|
}
|
||||||
|
|
||||||
function formatRelativeTime(dateStr: string | null): string {
|
function formatRelativeTime(dateStr: string | null): string {
|
||||||
if (!dateStr) return '-';
|
if (!dateStr) return '-';
|
||||||
const date = new Date(dateStr);
|
const date = new Date(dateStr);
|
||||||
@@ -234,320 +220,69 @@ function HealthBadge({ status, healthStatus }: { status: string; healthStatus: s
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Format CPU time for display
|
|
||||||
function formatCpuTime(ms: number): string {
|
|
||||||
if (ms < 1000) return `${ms}ms`;
|
|
||||||
if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
|
|
||||||
return `${(ms / 60000).toFixed(1)}m`;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Resource usage badge showing memory%, CPU%, and backoff status
|
|
||||||
function ResourceBadge({ worker }: { worker: Worker }) {
|
|
||||||
const memPercent = worker.metadata?.memory_percent;
|
|
||||||
const cpuPercent = worker.metadata?.cpu_percent;
|
|
||||||
const isBackingOff = worker.metadata?.is_backing_off;
|
|
||||||
const backoffReason = worker.metadata?.backoff_reason;
|
|
||||||
|
|
||||||
if (isBackingOff) {
|
|
||||||
return (
|
|
||||||
<div className="flex items-center gap-1.5" title={backoffReason || 'Backing off due to resource pressure'}>
|
|
||||||
<AlertTriangle className="w-4 h-4 text-amber-500 animate-pulse" />
|
|
||||||
<span className="text-xs text-amber-600 font-medium">Backing off</span>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// No data yet
|
|
||||||
if (memPercent === undefined && cpuPercent === undefined) {
|
|
||||||
return <span className="text-gray-400 text-xs">-</span>;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Color based on usage level
|
|
||||||
const getColor = (pct: number) => {
|
|
||||||
if (pct >= 90) return 'text-red-600';
|
|
||||||
if (pct >= 75) return 'text-amber-600';
|
|
||||||
if (pct >= 50) return 'text-yellow-600';
|
|
||||||
return 'text-emerald-600';
|
|
||||||
};
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="flex flex-col gap-0.5 text-xs">
|
|
||||||
{memPercent !== undefined && (
|
|
||||||
<div className="flex items-center gap-1" title={`Memory: ${worker.metadata?.memory_mb || 0}MB / ${worker.metadata?.memory_total_mb || 0}MB`}>
|
|
||||||
<MemoryStick className={`w-3 h-3 ${getColor(memPercent)}`} />
|
|
||||||
<span className={getColor(memPercent)}>{memPercent}%</span>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
{cpuPercent !== undefined && (
|
|
||||||
<div className="flex items-center gap-1">
|
|
||||||
<Cpu className={`w-3 h-3 ${getColor(cpuPercent)}`} />
|
|
||||||
<span className={getColor(cpuPercent)}>{cpuPercent}%</span>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Transport capability badge showing curl/http preflight status
|
|
||||||
function TransportBadge({ worker }: { worker: Worker }) {
|
|
||||||
const curlStatus = worker.preflight_curl_status || 'pending';
|
|
||||||
const httpStatus = worker.preflight_http_status || 'pending';
|
|
||||||
|
|
||||||
const getStatusConfig = (status: string, label: string, ms?: number, error?: string) => {
|
|
||||||
switch (status) {
|
|
||||||
case 'passed':
|
|
||||||
return {
|
|
||||||
bg: 'bg-emerald-100',
|
|
||||||
text: 'text-emerald-700',
|
|
||||||
icon: <CheckCircle className="w-3 h-3" />,
|
|
||||||
tooltip: ms ? `${label}: Passed (${ms}ms)` : `${label}: Passed`,
|
|
||||||
};
|
|
||||||
case 'failed':
|
|
||||||
return {
|
|
||||||
bg: 'bg-red-100',
|
|
||||||
text: 'text-red-700',
|
|
||||||
icon: <XCircle className="w-3 h-3" />,
|
|
||||||
tooltip: error ? `${label}: Failed - ${error}` : `${label}: Failed`,
|
|
||||||
};
|
|
||||||
case 'skipped':
|
|
||||||
return {
|
|
||||||
bg: 'bg-gray-100',
|
|
||||||
text: 'text-gray-500',
|
|
||||||
icon: <Clock className="w-3 h-3" />,
|
|
||||||
tooltip: `${label}: Skipped`,
|
|
||||||
};
|
|
||||||
default:
|
|
||||||
return {
|
|
||||||
bg: 'bg-yellow-100',
|
|
||||||
text: 'text-yellow-700',
|
|
||||||
icon: <Clock className="w-3 h-3 animate-pulse" />,
|
|
||||||
tooltip: `${label}: Pending`,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const curlConfig = getStatusConfig(curlStatus, 'CURL', worker.preflight_curl_ms, worker.preflight_curl_error);
|
|
||||||
const httpConfig = getStatusConfig(httpStatus, 'HTTP', worker.preflight_http_ms, worker.preflight_http_error);
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="flex flex-col gap-1">
|
|
||||||
<div
|
|
||||||
className={`inline-flex items-center gap-1 px-1.5 py-0.5 rounded text-xs font-medium ${curlConfig.bg} ${curlConfig.text}`}
|
|
||||||
title={curlConfig.tooltip}
|
|
||||||
>
|
|
||||||
{curlConfig.icon}
|
|
||||||
<span>curl</span>
|
|
||||||
</div>
|
|
||||||
<div
|
|
||||||
className={`inline-flex items-center gap-1 px-1.5 py-0.5 rounded text-xs font-medium ${httpConfig.bg} ${httpConfig.text}`}
|
|
||||||
title={httpConfig.tooltip}
|
|
||||||
>
|
|
||||||
{httpConfig.icon}
|
|
||||||
<span>http</span>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Task count badge showing active/max concurrent tasks
|
|
||||||
function TaskCountBadge({ worker, tasks }: { worker: Worker; tasks: Task[] }) {
|
|
||||||
const activeCount = worker.active_task_count ?? (worker.current_task_id ? 1 : 0);
|
|
||||||
const maxCount = worker.max_concurrent_tasks ?? 1;
|
|
||||||
const taskIds = worker.current_task_ids ?? (worker.current_task_id ? [worker.current_task_id] : []);
|
|
||||||
|
|
||||||
if (activeCount === 0) {
|
|
||||||
return <span className="text-gray-400 text-sm">Idle</span>;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get task names for tooltip
|
|
||||||
const taskNames = taskIds.map(id => {
|
|
||||||
const task = tasks.find(t => t.id === id);
|
|
||||||
return task ? `#${id}: ${task.role}${task.dispensary_name ? ` (${task.dispensary_name})` : ''}` : `#${id}`;
|
|
||||||
}).join('\n');
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="flex items-center gap-2" title={taskNames}>
|
|
||||||
<span className="text-sm font-medium text-blue-600">
|
|
||||||
{activeCount}/{maxCount} tasks
|
|
||||||
</span>
|
|
||||||
{taskIds.length === 1 && (
|
|
||||||
<span className="text-xs text-gray-500">#{taskIds[0]}</span>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pod visualization - shows pod as hub with worker nodes radiating out
|
|
||||||
function PodVisualization({
|
|
||||||
podName,
|
|
||||||
workers,
|
|
||||||
isSelected = false,
|
|
||||||
onSelect
|
|
||||||
}: {
|
|
||||||
podName: string;
|
|
||||||
workers: Worker[];
|
|
||||||
isSelected?: boolean;
|
|
||||||
onSelect?: () => void;
|
|
||||||
}) {
|
|
||||||
const busyCount = workers.filter(w => w.current_task_id !== null).length;
|
|
||||||
const allBusy = busyCount === workers.length;
|
|
||||||
const allIdle = busyCount === 0;
|
|
||||||
|
|
||||||
// Aggregate resource stats for the pod
|
|
||||||
const totalMemoryMb = workers.reduce((sum, w) => sum + (w.metadata?.memory_mb || 0), 0);
|
|
||||||
const totalCpuUserMs = workers.reduce((sum, w) => sum + (w.metadata?.cpu_user_ms || 0), 0);
|
|
||||||
const totalCpuSystemMs = workers.reduce((sum, w) => sum + (w.metadata?.cpu_system_ms || 0), 0);
|
|
||||||
const totalCompleted = workers.reduce((sum, w) => sum + w.tasks_completed, 0);
|
|
||||||
const totalFailed = workers.reduce((sum, w) => sum + w.tasks_failed, 0);
|
|
||||||
|
|
||||||
// Pod color based on worker status
|
|
||||||
const podColor = allBusy ? 'bg-blue-500' : allIdle ? 'bg-emerald-500' : 'bg-yellow-500';
|
|
||||||
const podBorder = allBusy ? 'border-blue-400' : allIdle ? 'border-emerald-400' : 'border-yellow-400';
|
|
||||||
const podGlow = allBusy ? 'shadow-blue-200' : allIdle ? 'shadow-emerald-200' : 'shadow-yellow-200';
|
|
||||||
|
|
||||||
// Selection ring
|
|
||||||
const selectionRing = isSelected ? 'ring-4 ring-purple-400 ring-offset-2' : '';
|
|
||||||
|
|
||||||
// Build pod tooltip
|
|
||||||
const podTooltip = [
|
|
||||||
`Pod: ${podName}`,
|
|
||||||
`Workers: ${busyCount}/${workers.length} busy`,
|
|
||||||
`Memory: ${totalMemoryMb} MB (RSS)`,
|
|
||||||
`CPU: ${formatCpuTime(totalCpuUserMs)} user, ${formatCpuTime(totalCpuSystemMs)} system`,
|
|
||||||
`Tasks: ${totalCompleted} completed, ${totalFailed} failed`,
|
|
||||||
'Click to select',
|
|
||||||
].join('\n');
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="flex flex-col items-center p-4">
|
|
||||||
{/* Pod hub */}
|
|
||||||
<div className="relative">
|
|
||||||
{/* Center pod circle */}
|
|
||||||
<div
|
|
||||||
className={`w-20 h-20 rounded-full ${podColor} border-4 ${podBorder} shadow-lg ${podGlow} ${selectionRing} flex items-center justify-center text-white font-bold text-xs text-center leading-tight z-10 relative cursor-pointer hover:scale-105 transition-all`}
|
|
||||||
title={podTooltip}
|
|
||||||
onClick={onSelect}
|
|
||||||
>
|
|
||||||
<span className="px-1">{podName}</span>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Worker nodes radiating out */}
|
|
||||||
{workers.map((worker, index) => {
|
|
||||||
const angle = (index * 360) / workers.length - 90; // Start from top
|
|
||||||
const radians = (angle * Math.PI) / 180;
|
|
||||||
const radius = 55; // Distance from center
|
|
||||||
const x = Math.cos(radians) * radius;
|
|
||||||
const y = Math.sin(radians) * radius;
|
|
||||||
|
|
||||||
const isBusy = worker.current_task_id !== null;
|
|
||||||
const isDecommissioning = worker.decommission_requested;
|
|
||||||
const isBackingOff = worker.metadata?.is_backing_off;
|
|
||||||
// Color priority: decommissioning > backing off > busy > idle
|
|
||||||
const workerColor = isDecommissioning ? 'bg-orange-500' : isBackingOff ? 'bg-yellow-500' : isBusy ? 'bg-blue-500' : 'bg-emerald-500';
|
|
||||||
const workerBorder = isDecommissioning ? 'border-orange-300' : isBackingOff ? 'border-yellow-300' : isBusy ? 'border-blue-300' : 'border-emerald-300';
|
|
||||||
|
|
||||||
// Line from center to worker
|
|
||||||
const lineLength = radius - 10;
|
|
||||||
const lineX = Math.cos(radians) * (lineLength / 2 + 10);
|
|
||||||
const lineY = Math.sin(radians) * (lineLength / 2 + 10);
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div key={worker.id}>
|
|
||||||
{/* Connection line */}
|
|
||||||
<div
|
|
||||||
className={`absolute w-0.5 ${isDecommissioning ? 'bg-orange-300' : isBackingOff ? 'bg-yellow-300' : isBusy ? 'bg-blue-300' : 'bg-emerald-300'}`}
|
|
||||||
style={{
|
|
||||||
height: `${lineLength}px`,
|
|
||||||
left: '50%',
|
|
||||||
top: '50%',
|
|
||||||
transform: `translate(-50%, -50%) translate(${lineX}px, ${lineY}px) rotate(${angle + 90}deg)`,
|
|
||||||
transformOrigin: 'center',
|
|
||||||
}}
|
|
||||||
/>
|
|
||||||
{/* Worker node */}
|
|
||||||
<div
|
|
||||||
className={`absolute w-6 h-6 rounded-full ${workerColor} border-2 ${workerBorder} flex items-center justify-center text-white text-xs font-bold cursor-pointer hover:scale-110 transition-transform`}
|
|
||||||
style={{
|
|
||||||
left: '50%',
|
|
||||||
top: '50%',
|
|
||||||
transform: `translate(-50%, -50%) translate(${x}px, ${y}px)`,
|
|
||||||
}}
|
|
||||||
title={`${worker.friendly_name}\nStatus: ${isDecommissioning ? 'Stopping after current task' : isBackingOff ? `Backing off: ${worker.metadata?.backoff_reason || 'resource pressure'}` : isBusy ? `Working on task #${worker.current_task_id}` : 'Ready - waiting for tasks'}\nMemory: ${worker.metadata?.memory_mb || 0} MB (${worker.metadata?.memory_percent || 0}%)\nCPU: ${formatCpuTime(worker.metadata?.cpu_user_ms || 0)} user, ${formatCpuTime(worker.metadata?.cpu_system_ms || 0)} sys\nCompleted: ${worker.tasks_completed} | Failed: ${worker.tasks_failed}\nLast heartbeat: ${new Date(worker.last_heartbeat_at).toLocaleTimeString()}`}
|
|
||||||
>
|
|
||||||
{index + 1}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
})}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Pod stats */}
|
|
||||||
<div className="mt-12 text-center">
|
|
||||||
<p className="text-xs text-gray-500">
|
|
||||||
{busyCount}/{workers.length} busy
|
|
||||||
</p>
|
|
||||||
{isSelected && (
|
|
||||||
<p className="text-xs text-purple-600 font-medium mt-1">Selected</p>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Group workers by pod
|
|
||||||
function groupWorkersByPod(workers: Worker[]): Map<string, Worker[]> {
|
|
||||||
const pods = new Map<string, Worker[]>();
|
|
||||||
for (const worker of workers) {
|
|
||||||
const podName = worker.pod_name || 'Unknown';
|
|
||||||
if (!pods.has(podName)) {
|
|
||||||
pods.set(podName, []);
|
|
||||||
}
|
|
||||||
pods.get(podName)!.push(worker);
|
|
||||||
}
|
|
||||||
return pods;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Format estimated time remaining
|
|
||||||
function formatEstimatedTime(hours: number): string {
|
|
||||||
if (hours < 1) {
|
|
||||||
return `${Math.round(hours * 60)} minutes`;
|
|
||||||
}
|
|
||||||
if (hours < 24) {
|
|
||||||
return `${hours.toFixed(1)} hours`;
|
|
||||||
}
|
|
||||||
const days = hours / 24;
|
|
||||||
if (days < 7) {
|
|
||||||
return `${days.toFixed(1)} days`;
|
|
||||||
}
|
|
||||||
return `${(days / 7).toFixed(1)} weeks`;
|
|
||||||
}
|
|
||||||
|
|
||||||
export function WorkersDashboard() {
|
export function WorkersDashboard() {
|
||||||
const [workers, setWorkers] = useState<Worker[]>([]);
|
const [workers, setWorkers] = useState<Worker[]>([]);
|
||||||
const [tasks, setTasks] = useState<Task[]>([]);
|
const [tasks, setTasks] = useState<Task[]>([]);
|
||||||
const [pendingTaskCount, setPendingTaskCount] = useState<number>(0);
|
|
||||||
const [loading, setLoading] = useState(true);
|
const [loading, setLoading] = useState(true);
|
||||||
const [error, setError] = useState<string | null>(null);
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
|
||||||
// Pod selection state
|
// K8s scaling state (added 2024-12-10)
|
||||||
const [selectedPod, setSelectedPod] = useState<string | null>(null);
|
const [k8sReplicas, setK8sReplicas] = useState<K8sReplicas | null>(null);
|
||||||
|
const [k8sError, setK8sError] = useState<string | null>(null);
|
||||||
|
const [scaling, setScaling] = useState(false);
|
||||||
|
const [targetReplicas, setTargetReplicas] = useState<number | null>(null);
|
||||||
|
|
||||||
// Pagination
|
// Pagination
|
||||||
const [page, setPage] = useState(0);
|
const [page, setPage] = useState(0);
|
||||||
const workersPerPage = 15;
|
const workersPerPage = 15;
|
||||||
|
|
||||||
|
// Fetch K8s replica count (added 2024-12-10)
|
||||||
|
const fetchK8sReplicas = useCallback(async () => {
|
||||||
|
try {
|
||||||
|
const res = await api.get('/api/workers/k8s/replicas');
|
||||||
|
if (res.data.success && res.data.replicas) {
|
||||||
|
setK8sReplicas(res.data.replicas);
|
||||||
|
if (targetReplicas === null) {
|
||||||
|
setTargetReplicas(res.data.replicas.desired);
|
||||||
|
}
|
||||||
|
setK8sError(null);
|
||||||
|
}
|
||||||
|
} catch (err: any) {
|
||||||
|
// K8s not available (local dev or no RBAC)
|
||||||
|
setK8sError(err.response?.data?.error || 'K8s not available');
|
||||||
|
setK8sReplicas(null);
|
||||||
|
}
|
||||||
|
}, [targetReplicas]);
|
||||||
|
|
||||||
|
// Scale workers (added 2024-12-10)
|
||||||
|
const handleScale = useCallback(async (replicas: number) => {
|
||||||
|
if (replicas < 0 || replicas > 20) return;
|
||||||
|
setScaling(true);
|
||||||
|
try {
|
||||||
|
const res = await api.post('/api/workers/k8s/scale', { replicas });
|
||||||
|
if (res.data.success) {
|
||||||
|
setTargetReplicas(replicas);
|
||||||
|
// Refresh after a short delay to see the change
|
||||||
|
setTimeout(fetchK8sReplicas, 1000);
|
||||||
|
}
|
||||||
|
} catch (err: any) {
|
||||||
|
console.error('Scale error:', err);
|
||||||
|
setK8sError(err.response?.data?.error || 'Failed to scale');
|
||||||
|
} finally {
|
||||||
|
setScaling(false);
|
||||||
|
}
|
||||||
|
}, [fetchK8sReplicas]);
|
||||||
|
|
||||||
const fetchData = useCallback(async () => {
|
const fetchData = useCallback(async () => {
|
||||||
try {
|
try {
|
||||||
// Fetch workers from registry, running tasks, and task counts
|
// Fetch workers from registry
|
||||||
const [workersRes, tasksRes, countsRes] = await Promise.all([
|
const workersRes = await api.get('/api/worker-registry/workers');
|
||||||
api.get('/api/worker-registry/workers'),
|
|
||||||
api.get('/api/tasks?status=running&limit=100'),
|
// Fetch running tasks to get current task details
|
||||||
api.get('/api/tasks/counts'),
|
const tasksRes = await api.get('/api/tasks?status=running&limit=100');
|
||||||
]);
|
|
||||||
|
|
||||||
setWorkers(workersRes.data.workers || []);
|
setWorkers(workersRes.data.workers || []);
|
||||||
setTasks(tasksRes.data.tasks || []);
|
setTasks(tasksRes.data.tasks || []);
|
||||||
setPendingTaskCount(countsRes.data?.pending || 0);
|
|
||||||
setError(null);
|
setError(null);
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
console.error('Fetch error:', err);
|
console.error('Fetch error:', err);
|
||||||
@@ -568,51 +303,16 @@ export function WorkersDashboard() {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Decommission a worker (graceful shutdown after current task)
|
|
||||||
const handleDecommissionWorker = async (workerId: string, friendlyName: string) => {
|
|
||||||
if (!confirm(`Decommission ${friendlyName}? Worker will stop after completing its current task.`)) return;
|
|
||||||
try {
|
|
||||||
const res = await api.post(`/api/worker-registry/workers/${workerId}/decommission`, {
|
|
||||||
reason: 'Manual decommission from admin UI'
|
|
||||||
});
|
|
||||||
if (res.data.success) {
|
|
||||||
fetchData();
|
|
||||||
}
|
|
||||||
} catch (err: any) {
|
|
||||||
console.error('Decommission error:', err);
|
|
||||||
alert(err.response?.data?.error || 'Failed to decommission worker');
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Cancel decommission
|
|
||||||
const handleCancelDecommission = async (workerId: string) => {
|
|
||||||
try {
|
|
||||||
await api.post(`/api/worker-registry/workers/${workerId}/cancel-decommission`);
|
|
||||||
fetchData();
|
|
||||||
} catch (err: any) {
|
|
||||||
console.error('Cancel decommission error:', err);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Add a worker by scaling up the K8s deployment
|
|
||||||
const handleAddWorker = async () => {
|
|
||||||
try {
|
|
||||||
const res = await api.post('/api/workers/k8s/scale-up');
|
|
||||||
if (res.data.success) {
|
|
||||||
// Refresh after a short delay to see the new worker
|
|
||||||
setTimeout(fetchData, 2000);
|
|
||||||
}
|
|
||||||
} catch (err: any) {
|
|
||||||
console.error('Add worker error:', err);
|
|
||||||
alert(err.response?.data?.error || 'Failed to add worker. K8s scaling may not be available.');
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
fetchData();
|
fetchData();
|
||||||
|
fetchK8sReplicas(); // Added 2024-12-10
|
||||||
const interval = setInterval(fetchData, 5000);
|
const interval = setInterval(fetchData, 5000);
|
||||||
return () => clearInterval(interval);
|
const k8sInterval = setInterval(fetchK8sReplicas, 10000); // K8s refresh every 10s
|
||||||
}, [fetchData]);
|
return () => {
|
||||||
|
clearInterval(interval);
|
||||||
|
clearInterval(k8sInterval);
|
||||||
|
};
|
||||||
|
}, [fetchData, fetchK8sReplicas]);
|
||||||
|
|
||||||
// Paginated workers
|
// Paginated workers
|
||||||
const paginatedWorkers = workers.slice(
|
const paginatedWorkers = workers.slice(
|
||||||
@@ -652,9 +352,15 @@ export function WorkersDashboard() {
|
|||||||
<h1 className="text-2xl font-bold text-gray-900">Workers</h1>
|
<h1 className="text-2xl font-bold text-gray-900">Workers</h1>
|
||||||
<p className="text-gray-500 mt-1">
|
<p className="text-gray-500 mt-1">
|
||||||
{workers.length} registered workers ({busyWorkers.length} busy, {idleWorkers.length} idle)
|
{workers.length} registered workers ({busyWorkers.length} busy, {idleWorkers.length} idle)
|
||||||
<span className="text-xs text-gray-400 ml-2">(auto-refresh 5s)</span>
|
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
<button
|
||||||
|
onClick={() => fetchData()}
|
||||||
|
className="flex items-center gap-2 px-4 py-2 bg-emerald-600 text-white rounded-lg hover:bg-emerald-700 transition-colors"
|
||||||
|
>
|
||||||
|
<RefreshCw className="w-4 h-4" />
|
||||||
|
Refresh
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{error && (
|
{error && (
|
||||||
@@ -663,6 +369,68 @@ export function WorkersDashboard() {
|
|||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{/* K8s Scaling Card (added 2024-12-10) */}
|
||||||
|
{k8sReplicas && (
|
||||||
|
<div className="bg-white rounded-lg border border-gray-200 p-4">
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<div className="flex items-center gap-3">
|
||||||
|
<div className="w-10 h-10 bg-purple-100 rounded-lg flex items-center justify-center">
|
||||||
|
<Server className="w-5 h-5 text-purple-600" />
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<p className="text-sm text-gray-500">K8s Worker Pods</p>
|
||||||
|
<p className="text-xl font-semibold">
|
||||||
|
{k8sReplicas.current} / {k8sReplicas.desired}
|
||||||
|
{k8sReplicas.current !== k8sReplicas.desired && (
|
||||||
|
<span className="text-sm font-normal text-yellow-600 ml-2">scaling...</span>
|
||||||
|
)}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<button
|
||||||
|
onClick={() => handleScale((targetReplicas || k8sReplicas.desired) - 1)}
|
||||||
|
disabled={scaling || (targetReplicas || k8sReplicas.desired) <= 0}
|
||||||
|
className="w-8 h-8 flex items-center justify-center bg-gray-100 text-gray-700 rounded-lg hover:bg-gray-200 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
|
||||||
|
title="Scale down"
|
||||||
|
>
|
||||||
|
<Minus className="w-4 h-4" />
|
||||||
|
</button>
|
||||||
|
<input
|
||||||
|
type="number"
|
||||||
|
min="0"
|
||||||
|
max="20"
|
||||||
|
value={targetReplicas ?? k8sReplicas.desired}
|
||||||
|
onChange={(e) => setTargetReplicas(Math.max(0, Math.min(20, parseInt(e.target.value) || 0)))}
|
||||||
|
onBlur={() => {
|
||||||
|
if (targetReplicas !== null && targetReplicas !== k8sReplicas.desired) {
|
||||||
|
handleScale(targetReplicas);
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
onKeyDown={(e) => {
|
||||||
|
if (e.key === 'Enter' && targetReplicas !== null && targetReplicas !== k8sReplicas.desired) {
|
||||||
|
handleScale(targetReplicas);
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
className="w-16 text-center border border-gray-300 rounded-lg px-2 py-1 text-lg font-semibold"
|
||||||
|
/>
|
||||||
|
<button
|
||||||
|
onClick={() => handleScale((targetReplicas || k8sReplicas.desired) + 1)}
|
||||||
|
disabled={scaling || (targetReplicas || k8sReplicas.desired) >= 20}
|
||||||
|
className="w-8 h-8 flex items-center justify-center bg-gray-100 text-gray-700 rounded-lg hover:bg-gray-200 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
|
||||||
|
title="Scale up"
|
||||||
|
>
|
||||||
|
<Plus className="w-4 h-4" />
|
||||||
|
</button>
|
||||||
|
{scaling && <Loader2 className="w-4 h-4 text-purple-600 animate-spin ml-2" />}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{k8sError && (
|
||||||
|
<p className="text-xs text-red-500 mt-2">{k8sError}</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* Stats Cards */}
|
{/* Stats Cards */}
|
||||||
<div className="grid grid-cols-5 gap-4">
|
<div className="grid grid-cols-5 gap-4">
|
||||||
<div className="bg-white rounded-lg border border-gray-200 p-4">
|
<div className="bg-white rounded-lg border border-gray-200 p-4">
|
||||||
@@ -722,197 +490,6 @@ export function WorkersDashboard() {
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Estimated Completion Time Card */}
|
|
||||||
{pendingTaskCount > 0 && activeWorkers.length > 0 && (() => {
|
|
||||||
// Calculate average task rate across all workers
|
|
||||||
const totalHoursUp = activeWorkers.reduce((sum, w) => {
|
|
||||||
if (!w.started_at) return sum;
|
|
||||||
const start = new Date(w.started_at);
|
|
||||||
const now = new Date();
|
|
||||||
return sum + (now.getTime() - start.getTime()) / (1000 * 60 * 60);
|
|
||||||
}, 0);
|
|
||||||
|
|
||||||
const totalTasksDone = totalCompleted + totalFailed;
|
|
||||||
const avgTasksPerHour = totalHoursUp > 0.1 ? totalTasksDone / totalHoursUp : 0;
|
|
||||||
const estimatedHours = avgTasksPerHour > 0 ? pendingTaskCount / avgTasksPerHour : null;
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="bg-gradient-to-r from-amber-50 to-orange-50 rounded-lg border border-amber-200 p-4">
|
|
||||||
<div className="flex items-center justify-between">
|
|
||||||
<div className="flex items-center gap-3">
|
|
||||||
<div className="w-10 h-10 bg-amber-100 rounded-lg flex items-center justify-center">
|
|
||||||
<Clock className="w-5 h-5 text-amber-600" />
|
|
||||||
</div>
|
|
||||||
<div>
|
|
||||||
<p className="text-sm text-amber-700 font-medium">Estimated Time to Complete Queue</p>
|
|
||||||
<p className="text-2xl font-bold text-amber-900">
|
|
||||||
{estimatedHours !== null ? formatEstimatedTime(estimatedHours) : 'Calculating...'}
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div className="text-right text-sm text-amber-700">
|
|
||||||
<p><span className="font-semibold">{pendingTaskCount}</span> pending tasks</p>
|
|
||||||
<p><span className="font-semibold">{activeWorkers.length}</span> active workers</p>
|
|
||||||
{avgTasksPerHour > 0 && (
|
|
||||||
<p className="text-xs text-amber-600 mt-1">
|
|
||||||
~{avgTasksPerHour.toFixed(1)} tasks/hour
|
|
||||||
</p>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
})()}
|
|
||||||
|
|
||||||
{/* Worker Pods Visualization */}
|
|
||||||
<div className="bg-white rounded-lg border border-gray-200 overflow-hidden">
|
|
||||||
<div className="px-4 py-3 border-b border-gray-200 bg-gray-50">
|
|
||||||
<div className="flex items-center justify-between">
|
|
||||||
<div>
|
|
||||||
<h3 className="text-sm font-semibold text-gray-900 flex items-center gap-2">
|
|
||||||
<Zap className="w-4 h-4 text-emerald-500" />
|
|
||||||
Worker Pods ({Array.from(groupWorkersByPod(workers)).length} pods, {activeWorkers.length} workers)
|
|
||||||
</h3>
|
|
||||||
<p className="text-xs text-gray-500 mt-0.5">
|
|
||||||
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-emerald-500"></span> ready</span>
|
|
||||||
<span className="mx-2">|</span>
|
|
||||||
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-blue-500"></span> busy</span>
|
|
||||||
<span className="mx-2">|</span>
|
|
||||||
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-yellow-500"></span> backing off</span>
|
|
||||||
<span className="mx-2">|</span>
|
|
||||||
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-orange-500"></span> stopping</span>
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
<div className="text-sm text-gray-500">
|
|
||||||
{busyWorkers.length} busy, {activeWorkers.length - busyWorkers.length} idle
|
|
||||||
{selectedPod && (
|
|
||||||
<button
|
|
||||||
onClick={() => setSelectedPod(null)}
|
|
||||||
className="ml-3 text-xs text-purple-600 hover:text-purple-800 underline"
|
|
||||||
>
|
|
||||||
Clear selection
|
|
||||||
</button>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{workers.length === 0 ? (
|
|
||||||
<div className="px-4 py-12 text-center text-gray-500">
|
|
||||||
<Users className="w-12 h-12 mx-auto mb-3 text-gray-300" />
|
|
||||||
<p className="font-medium">No worker pods running</p>
|
|
||||||
<p className="text-xs mt-1">Start pods to process tasks from the queue</p>
|
|
||||||
</div>
|
|
||||||
) : (
|
|
||||||
<div className="p-6">
|
|
||||||
<div className="flex flex-wrap justify-center gap-8">
|
|
||||||
{Array.from(groupWorkersByPod(workers)).map(([podName, podWorkers]) => (
|
|
||||||
<PodVisualization
|
|
||||||
key={podName}
|
|
||||||
podName={podName}
|
|
||||||
workers={podWorkers}
|
|
||||||
isSelected={selectedPod === podName}
|
|
||||||
onSelect={() => setSelectedPod(selectedPod === podName ? null : podName)}
|
|
||||||
/>
|
|
||||||
))}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Selected Pod Control Panel */}
|
|
||||||
{selectedPod && (() => {
|
|
||||||
const podWorkers = groupWorkersByPod(workers).get(selectedPod) || [];
|
|
||||||
const busyInPod = podWorkers.filter(w => w.current_task_id !== null).length;
|
|
||||||
const idleInPod = podWorkers.filter(w => w.current_task_id === null && !w.decommission_requested).length;
|
|
||||||
const stoppingInPod = podWorkers.filter(w => w.decommission_requested).length;
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="mt-6 border-t border-gray-200 pt-6">
|
|
||||||
<div className="bg-purple-50 rounded-lg border border-purple-200 p-4">
|
|
||||||
<div className="flex items-center justify-between mb-4">
|
|
||||||
<div className="flex items-center gap-3">
|
|
||||||
<div className="w-10 h-10 bg-purple-100 rounded-lg flex items-center justify-center">
|
|
||||||
<Server className="w-5 h-5 text-purple-600" />
|
|
||||||
</div>
|
|
||||||
<div>
|
|
||||||
<h4 className="font-semibold text-purple-900">{selectedPod}</h4>
|
|
||||||
<p className="text-xs text-purple-600">
|
|
||||||
{podWorkers.length} workers: {busyInPod} busy, {idleInPod} idle{stoppingInPod > 0 && `, ${stoppingInPod} stopping`}
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Worker list in selected pod */}
|
|
||||||
<div className="space-y-2">
|
|
||||||
{podWorkers.map((worker) => {
|
|
||||||
const isBusy = worker.current_task_id !== null;
|
|
||||||
const isDecommissioning = worker.decommission_requested;
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div key={worker.id} className="flex items-center justify-between bg-white rounded-lg px-3 py-2 border border-purple-100">
|
|
||||||
<div className="flex items-center gap-3">
|
|
||||||
<div className={`w-8 h-8 rounded-full flex items-center justify-center text-white text-sm font-bold ${
|
|
||||||
isDecommissioning ? 'bg-orange-500' :
|
|
||||||
isBusy ? 'bg-blue-500' : 'bg-emerald-500'
|
|
||||||
}`}>
|
|
||||||
{worker.friendly_name?.charAt(0) || '?'}
|
|
||||||
</div>
|
|
||||||
<div>
|
|
||||||
<p className="text-sm font-medium text-gray-900">{worker.friendly_name}</p>
|
|
||||||
<p className="text-xs text-gray-500">
|
|
||||||
{isDecommissioning ? (
|
|
||||||
<span className="text-orange-600">Stopping after current task...</span>
|
|
||||||
) : isBusy ? (
|
|
||||||
<span className="text-blue-600">Working on task #{worker.current_task_id}</span>
|
|
||||||
) : (
|
|
||||||
<span className="text-emerald-600">Idle - ready for tasks</span>
|
|
||||||
)}
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div className="flex items-center gap-2">
|
|
||||||
{isDecommissioning ? (
|
|
||||||
<button
|
|
||||||
onClick={() => handleCancelDecommission(worker.worker_id)}
|
|
||||||
className="flex items-center gap-1.5 px-3 py-1.5 text-sm bg-white border border-gray-300 text-gray-700 rounded-lg hover:bg-gray-50 transition-colors"
|
|
||||||
title="Cancel decommission"
|
|
||||||
>
|
|
||||||
<Undo2 className="w-4 h-4" />
|
|
||||||
Cancel
|
|
||||||
</button>
|
|
||||||
) : (
|
|
||||||
<button
|
|
||||||
onClick={() => handleDecommissionWorker(worker.worker_id, worker.friendly_name)}
|
|
||||||
className="flex items-center gap-1.5 px-3 py-1.5 text-sm bg-orange-100 text-orange-700 rounded-lg hover:bg-orange-200 transition-colors"
|
|
||||||
title={isBusy ? 'Worker will stop after completing current task' : 'Remove idle worker'}
|
|
||||||
>
|
|
||||||
<PowerOff className="w-4 h-4" />
|
|
||||||
{isBusy ? 'Stop after task' : 'Remove'}
|
|
||||||
</button>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
})}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Add Worker button */}
|
|
||||||
<div className="mt-4 pt-4 border-t border-purple-200">
|
|
||||||
<button
|
|
||||||
onClick={handleAddWorker}
|
|
||||||
className="flex items-center gap-1.5 px-3 py-2 text-sm bg-emerald-100 text-emerald-700 rounded-lg hover:bg-emerald-200 transition-colors"
|
|
||||||
>
|
|
||||||
<Plus className="w-4 h-4" />
|
|
||||||
Add Worker
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
})()}
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Workers Table */}
|
{/* Workers Table */}
|
||||||
<div className="bg-white rounded-lg border border-gray-200 overflow-hidden">
|
<div className="bg-white rounded-lg border border-gray-200 overflow-hidden">
|
||||||
<div className="px-4 py-3 border-b border-gray-200 bg-gray-50 flex items-center justify-between">
|
<div className="px-4 py-3 border-b border-gray-200 bg-gray-50 flex items-center justify-between">
|
||||||
@@ -955,11 +532,10 @@ export function WorkersDashboard() {
|
|||||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Worker</th>
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Worker</th>
|
||||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Role</th>
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Role</th>
|
||||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Status</th>
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Status</th>
|
||||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Transport</th>
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Exit Location</th>
|
||||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Resources</th>
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Current Task</th>
|
||||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Tasks</th>
|
|
||||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Duration</th>
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Duration</th>
|
||||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Throughput</th>
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Utilization</th>
|
||||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Heartbeat</th>
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Heartbeat</th>
|
||||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase"></th>
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase"></th>
|
||||||
</tr>
|
</tr>
|
||||||
@@ -974,29 +550,16 @@ export function WorkersDashboard() {
|
|||||||
<tr key={worker.id} className="hover:bg-gray-50">
|
<tr key={worker.id} className="hover:bg-gray-50">
|
||||||
<td className="px-4 py-3">
|
<td className="px-4 py-3">
|
||||||
<div className="flex items-center gap-3">
|
<div className="flex items-center gap-3">
|
||||||
<div className={`w-10 h-10 rounded-full flex items-center justify-center text-white font-bold text-sm relative ${
|
<div className={`w-10 h-10 rounded-full flex items-center justify-center text-white font-bold text-sm ${
|
||||||
worker.decommission_requested ? 'bg-orange-500' :
|
|
||||||
worker.health_status === 'offline' ? 'bg-gray-400' :
|
worker.health_status === 'offline' ? 'bg-gray-400' :
|
||||||
worker.health_status === 'stale' ? 'bg-yellow-500' :
|
worker.health_status === 'stale' ? 'bg-yellow-500' :
|
||||||
worker.health_status === 'busy' ? 'bg-blue-500' :
|
worker.health_status === 'busy' ? 'bg-blue-500' :
|
||||||
'bg-emerald-500'
|
'bg-emerald-500'
|
||||||
}`}>
|
}`}>
|
||||||
{worker.friendly_name?.charAt(0) || '?'}
|
{worker.friendly_name?.charAt(0) || '?'}
|
||||||
{worker.decommission_requested && (
|
|
||||||
<div className="absolute -top-1 -right-1 w-4 h-4 bg-red-500 rounded-full flex items-center justify-center">
|
|
||||||
<PowerOff className="w-2.5 h-2.5 text-white" />
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<p className="font-medium text-gray-900 flex items-center gap-1.5">
|
<p className="font-medium text-gray-900">{worker.friendly_name}</p>
|
||||||
{worker.friendly_name}
|
|
||||||
{worker.decommission_requested && (
|
|
||||||
<span className="text-xs text-orange-600 bg-orange-100 px-1.5 py-0.5 rounded" title={worker.decommission_reason || 'Pending decommission'}>
|
|
||||||
stopping
|
|
||||||
</span>
|
|
||||||
)}
|
|
||||||
</p>
|
|
||||||
<p className="text-xs text-gray-400 font-mono">{worker.worker_id.slice(0, 20)}...</p>
|
<p className="text-xs text-gray-400 font-mono">{worker.worker_id.slice(0, 20)}...</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -1008,13 +571,45 @@ export function WorkersDashboard() {
|
|||||||
<HealthBadge status={worker.status} healthStatus={worker.health_status} />
|
<HealthBadge status={worker.status} healthStatus={worker.health_status} />
|
||||||
</td>
|
</td>
|
||||||
<td className="px-4 py-3">
|
<td className="px-4 py-3">
|
||||||
<TransportBadge worker={worker} />
|
{(() => {
|
||||||
|
const loc = worker.metadata?.proxy_location;
|
||||||
|
if (!loc) {
|
||||||
|
return <span className="text-gray-400 text-sm">-</span>;
|
||||||
|
}
|
||||||
|
const parts = [loc.city, loc.state, loc.country].filter(Boolean);
|
||||||
|
if (parts.length === 0) {
|
||||||
|
return loc.isRotating ? (
|
||||||
|
<span className="text-xs text-purple-600 font-medium" title="Rotating proxy - exit location varies per request">
|
||||||
|
Rotating
|
||||||
|
</span>
|
||||||
|
) : (
|
||||||
|
<span className="text-gray-400 text-sm">Unknown</span>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return (
|
||||||
|
<div className="flex items-center gap-1.5" title={loc.timezone || ''}>
|
||||||
|
<MapPin className="w-3 h-3 text-gray-400" />
|
||||||
|
<span className="text-sm text-gray-700">
|
||||||
|
{parts.join(', ')}
|
||||||
|
</span>
|
||||||
|
{loc.isRotating && (
|
||||||
|
<span className="text-xs text-purple-500" title="Rotating proxy">*</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
})()}
|
||||||
</td>
|
</td>
|
||||||
<td className="px-4 py-3">
|
<td className="px-4 py-3">
|
||||||
<ResourceBadge worker={worker} />
|
{worker.current_task_id ? (
|
||||||
</td>
|
<div>
|
||||||
<td className="px-4 py-3">
|
<span className="text-sm text-gray-900">Task #{worker.current_task_id}</span>
|
||||||
<TaskCountBadge worker={worker} tasks={tasks} />
|
{currentTask?.dispensary_name && (
|
||||||
|
<p className="text-xs text-gray-500">{currentTask.dispensary_name}</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<span className="text-gray-400 text-sm">Idle</span>
|
||||||
|
)}
|
||||||
</td>
|
</td>
|
||||||
<td className="px-4 py-3">
|
<td className="px-4 py-3">
|
||||||
{currentTask?.started_at ? (
|
{currentTask?.started_at ? (
|
||||||
|
|||||||
@@ -1,36 +0,0 @@
|
|||||||
# RBAC configuration for scraper pod to control worker scaling
|
|
||||||
# Allows the scraper to read and scale the scraper-worker statefulset
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ServiceAccount
|
|
||||||
metadata:
|
|
||||||
name: scraper-sa
|
|
||||||
namespace: dispensary-scraper
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: Role
|
|
||||||
metadata:
|
|
||||||
name: worker-scaler
|
|
||||||
namespace: dispensary-scraper
|
|
||||||
rules:
|
|
||||||
# Allow reading deployment and statefulset status
|
|
||||||
- apiGroups: ["apps"]
|
|
||||||
resources: ["deployments", "statefulsets"]
|
|
||||||
verbs: ["get", "list"]
|
|
||||||
# Allow scaling deployments and statefulsets
|
|
||||||
- apiGroups: ["apps"]
|
|
||||||
resources: ["deployments/scale", "statefulsets/scale"]
|
|
||||||
verbs: ["get", "patch", "update"]
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: RoleBinding
|
|
||||||
metadata:
|
|
||||||
name: scraper-worker-scaler
|
|
||||||
namespace: dispensary-scraper
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: scraper-sa
|
|
||||||
namespace: dispensary-scraper
|
|
||||||
roleRef:
|
|
||||||
kind: Role
|
|
||||||
name: worker-scaler
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
@@ -40,16 +40,12 @@ spec:
|
|||||||
valueFrom:
|
valueFrom:
|
||||||
fieldRef:
|
fieldRef:
|
||||||
fieldPath: metadata.name
|
fieldPath: metadata.name
|
||||||
- name: API_BASE_URL
|
|
||||||
value: "http://scraper"
|
|
||||||
- name: NODE_OPTIONS
|
|
||||||
value: "--max-old-space-size=1500"
|
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
memory: "1Gi"
|
memory: "256Mi"
|
||||||
cpu: "100m"
|
cpu: "100m"
|
||||||
limits:
|
limits:
|
||||||
memory: "2Gi"
|
memory: "512Mi"
|
||||||
cpu: "500m"
|
cpu: "500m"
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
exec:
|
exec:
|
||||||
|
|||||||
@@ -25,7 +25,6 @@ spec:
|
|||||||
labels:
|
labels:
|
||||||
app: scraper
|
app: scraper
|
||||||
spec:
|
spec:
|
||||||
serviceAccountName: scraper-sa
|
|
||||||
imagePullSecrets:
|
imagePullSecrets:
|
||||||
- name: regcred
|
- name: regcred
|
||||||
containers:
|
containers:
|
||||||
|
|||||||
@@ -1,18 +0,0 @@
|
|||||||
# Woodpecker Agent Docker Compose
|
|
||||||
# Path: /opt/woodpecker/docker-compose.yml
|
|
||||||
# Deploy: cd /opt/woodpecker && docker compose up -d
|
|
||||||
version: '3.8'
|
|
||||||
|
|
||||||
services:
|
|
||||||
woodpecker-agent:
|
|
||||||
image: woodpeckerci/woodpecker-agent:latest
|
|
||||||
container_name: woodpecker-agent
|
|
||||||
restart: always
|
|
||||||
volumes:
|
|
||||||
- /var/run/docker.sock:/var/run/docker.sock
|
|
||||||
environment:
|
|
||||||
- WOODPECKER_SERVER=localhost:9000
|
|
||||||
- WOODPECKER_AGENT_SECRET=${WOODPECKER_AGENT_SECRET}
|
|
||||||
- WOODPECKER_MAX_WORKFLOWS=5
|
|
||||||
- WOODPECKER_HEALTHCHECK=true
|
|
||||||
- WOODPECKER_LOG_LEVEL=info
|
|
||||||
@@ -6,19 +6,6 @@ kind: Namespace
|
|||||||
metadata:
|
metadata:
|
||||||
name: woodpecker
|
name: woodpecker
|
||||||
---
|
---
|
||||||
# PVC for npm cache - shared across CI jobs
|
|
||||||
apiVersion: v1
|
|
||||||
kind: PersistentVolumeClaim
|
|
||||||
metadata:
|
|
||||||
name: npm-cache
|
|
||||||
namespace: woodpecker
|
|
||||||
spec:
|
|
||||||
accessModes:
|
|
||||||
- ReadWriteMany
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
storage: 5Gi
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Secret
|
kind: Secret
|
||||||
metadata:
|
metadata:
|
||||||
@@ -65,9 +52,6 @@ spec:
|
|||||||
value: "woodpecker"
|
value: "woodpecker"
|
||||||
- name: WOODPECKER_BACKEND_K8S_VOLUME_SIZE
|
- name: WOODPECKER_BACKEND_K8S_VOLUME_SIZE
|
||||||
value: "10G"
|
value: "10G"
|
||||||
# Allow CI steps to mount the npm-cache PVC
|
|
||||||
- name: WOODPECKER_BACKEND_K8S_VOLUMES
|
|
||||||
value: "npm-cache:/npm-cache"
|
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
memory: "512Mi"
|
memory: "512Mi"
|
||||||
|
|||||||
Reference in New Issue
Block a user