- Namespace: dispensary-scraper → cannaiq - Registry: code.cannabrands.app → git.spdy.io - Database: External PostgreSQL at 10.100.6.50 - MinIO: Internal at 10.100.9.80:9000 - CI: ci.spdy.io 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
78 lines
2.4 KiB
YAML
78 lines
2.4 KiB
YAML
# Task Worker Deployment
|
||
#
|
||
# Simple Deployment that runs task-worker.js to process tasks from worker_tasks queue.
|
||
# Workers pull tasks using DB-level locking (FOR UPDATE SKIP LOCKED).
|
||
#
|
||
# The worker will wait up to 60 minutes for active proxies to be added before failing.
|
||
# This allows deployment to succeed even if proxies aren't configured yet.
|
||
---
|
||
apiVersion: apps/v1
|
||
kind: Deployment
|
||
metadata:
|
||
name: scraper-worker
|
||
namespace: cannaiq
|
||
spec:
|
||
# MAX 8 PODS - See CLAUDE.md rule #6
|
||
# Each pod runs up to MAX_CONCURRENT_TASKS browsers (~400MB each)
|
||
# Scale pods for throughput, not concurrent tasks per pod
|
||
replicas: 8
|
||
selector:
|
||
matchLabels:
|
||
app: scraper-worker
|
||
template:
|
||
metadata:
|
||
labels:
|
||
app: scraper-worker
|
||
spec:
|
||
imagePullSecrets:
|
||
- name: regcred
|
||
containers:
|
||
- name: worker
|
||
image: git.spdy.io/creationshop/cannaiq:latest
|
||
command: ["node"]
|
||
args: ["dist/tasks/task-worker.js"]
|
||
envFrom:
|
||
- configMapRef:
|
||
name: scraper-config
|
||
- secretRef:
|
||
name: scraper-secrets
|
||
env:
|
||
- name: WORKER_MODE
|
||
value: "true"
|
||
- name: POD_NAME
|
||
valueFrom:
|
||
fieldRef:
|
||
fieldPath: metadata.name
|
||
- name: API_BASE_URL
|
||
value: "http://scraper"
|
||
- name: NODE_OPTIONS
|
||
value: "--max-old-space-size=1500"
|
||
# Browser memory limits - see docs/WORKER_TASK_ARCHITECTURE.md
|
||
# 3 browsers × ~400MB = ~1.3GB (safe for 2GB pod limit)
|
||
- name: MAX_CONCURRENT_TASKS
|
||
value: "3"
|
||
# Task Pool System (geo-based pools)
|
||
# Correct flow: check pools → claim pool → get proxy → preflight → pull tasks
|
||
- name: USE_TASK_POOLS
|
||
value: "true"
|
||
# Disable legacy identity pool
|
||
- name: USE_IDENTITY_POOL
|
||
value: "false"
|
||
resources:
|
||
requests:
|
||
memory: "1Gi"
|
||
cpu: "100m"
|
||
limits:
|
||
memory: "2Gi"
|
||
cpu: "500m"
|
||
livenessProbe:
|
||
exec:
|
||
command:
|
||
- /bin/sh
|
||
- -c
|
||
- "pgrep -f 'task-worker' > /dev/null"
|
||
initialDelaySeconds: 60
|
||
periodSeconds: 30
|
||
failureThreshold: 3
|
||
terminationGracePeriodSeconds: 60
|