Files
cannaiq/k8s/scraper-worker.yaml
Kelly fdce5e0302 fix(workers): Fix false memory backoff and add backing-off color coding
- Fix memory calculation to use max-old-space-size (1500MB) instead of
  V8's dynamic heapTotal. This prevents false 95%+ readings when idle.
- Add yellow color for backing-off workers in pod visualization
- Update legend and tooltips with backing-off status
- Remove pool toggle from TasksDashboard (moved to Workers page)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-11 19:11:42 -07:00

230 lines
5.9 KiB
YAML

# Task Worker Deployment
#
# Simple Deployment that runs task-worker.js to process tasks from worker_tasks queue.
# Workers pull tasks using DB-level locking (FOR UPDATE SKIP LOCKED).
#
# The worker will wait up to 60 minutes for active proxies to be added before failing.
# This allows deployment to succeed even if proxies aren't configured yet.
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: scraper-worker
namespace: dispensary-scraper
spec:
replicas: 25
selector:
matchLabels:
app: scraper-worker
template:
metadata:
labels:
app: scraper-worker
spec:
imagePullSecrets:
- name: regcred
containers:
- name: worker
image: code.cannabrands.app/creationshop/dispensary-scraper:latest
command: ["node"]
args: ["dist/tasks/task-worker.js"]
envFrom:
- configMapRef:
name: scraper-config
- secretRef:
name: scraper-secrets
env:
- name: WORKER_MODE
value: "true"
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: API_BASE_URL
value: "http://scraper"
- name: NODE_OPTIONS
value: "--max-old-space-size=1500"
resources:
requests:
memory: "1Gi"
cpu: "100m"
limits:
memory: "2Gi"
cpu: "500m"
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "pgrep -f 'task-worker' > /dev/null"
initialDelaySeconds: 60
periodSeconds: 30
failureThreshold: 3
terminationGracePeriodSeconds: 60
---
# =============================================================================
# ALTERNATIVE: StatefulSet with multiple workers per pod (not currently used)
# =============================================================================
# Task Worker Pods (StatefulSet)
# Each pod runs 5 role-agnostic workers that pull tasks from worker_tasks queue.
#
# Architecture:
# - Pods are named from a predefined list (Aethelgard, Xylos, etc.)
# - Each pod spawns 5 worker processes
# - Workers register with API and show their pod name
# - HPA scales pods 5-15 based on pending task count
# - Workers use DB-level locking (FOR UPDATE SKIP LOCKED) to prevent conflicts
#
# Pod Names (up to 25):
# Aethelgard, Xylos, Kryll, Coriolis, Dimidium, Veridia, Zetani, Talos IV,
# Onyx, Celestia, Gormand, Betha, Ragnar, Syphon, Axiom, Nadir, Terra Nova,
# Acheron, Nexus, Vespera, Helios Prime, Oasis, Mordina, Cygnus, Umbra
---
apiVersion: v1
kind: ConfigMap
metadata:
name: pod-names
namespace: dispensary-scraper
data:
names: |
Aethelgard
Xylos
Kryll
Coriolis
Dimidium
Veridia
Zetani
Talos IV
Onyx
Celestia
Gormand
Betha
Ragnar
Syphon
Axiom
Nadir
Terra Nova
Acheron
Nexus
Vespera
Helios Prime
Oasis
Mordina
Cygnus
Umbra
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: worker-pod
namespace: dispensary-scraper
spec:
serviceName: worker-pods
replicas: 5
podManagementPolicy: Parallel
selector:
matchLabels:
app: worker-pod
template:
metadata:
labels:
app: worker-pod
spec:
imagePullSecrets:
- name: regcred
containers:
- name: workers
image: code.cannabrands.app/creationshop/dispensary-scraper:latest
# Run 5 workers per pod
command: ["/bin/sh", "-c"]
args:
- |
# Get pod ordinal (0, 1, 2, etc.)
ORDINAL=$(echo $HOSTNAME | rev | cut -d'-' -f1 | rev)
# Get pod name from configmap
POD_NAME=$(sed -n "$((ORDINAL + 1))p" /etc/pod-names/names)
echo "Starting pod: $POD_NAME (ordinal: $ORDINAL)"
# Start 5 workers in this pod
for i in 1 2 3 4 5; do
WORKER_ID="${POD_NAME}-worker-${i}" \
POD_NAME="$POD_NAME" \
node dist/tasks/task-worker.js &
done
# Wait for all workers
wait
envFrom:
- configMapRef:
name: scraper-config
- secretRef:
name: scraper-secrets
env:
- name: API_BASE_URL
value: "http://scraper:3010"
- name: WORKERS_PER_POD
value: "5"
volumeMounts:
- name: pod-names
mountPath: /etc/pod-names
resources:
requests:
memory: "512Mi"
cpu: "200m"
limits:
memory: "1Gi"
cpu: "1000m"
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "pgrep -f 'task-worker' > /dev/null"
initialDelaySeconds: 15
periodSeconds: 30
failureThreshold: 3
volumes:
- name: pod-names
configMap:
name: pod-names
terminationGracePeriodSeconds: 60
---
# Headless service for StatefulSet
apiVersion: v1
kind: Service
metadata:
name: worker-pods
namespace: dispensary-scraper
spec:
clusterIP: None
selector:
app: worker-pod
ports:
- port: 80
name: placeholder
---
# HPA to scale pods based on pending tasks
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: worker-pod-hpa
namespace: dispensary-scraper
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: StatefulSet
name: worker-pod
minReplicas: 5
maxReplicas: 15
metrics:
- type: External
external:
metric:
name: pending_tasks
selector:
matchLabels:
queue: worker_tasks
target:
type: AverageValue
averageValue: "10"