Files
cannaiq/k8s/scraper-worker.yaml
Kelly a8360c7260 feat: Migrate to spdy.io infrastructure
- Namespace: dispensary-scraper → cannaiq
- Registry: code.cannabrands.app → git.spdy.io
- Database: External PostgreSQL at 10.100.6.50
- MinIO: Internal at 10.100.9.80:9000
- CI: ci.spdy.io

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-15 06:40:48 -07:00

78 lines
2.4 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# Task Worker Deployment
#
# Simple Deployment that runs task-worker.js to process tasks from worker_tasks queue.
# Workers pull tasks using DB-level locking (FOR UPDATE SKIP LOCKED).
#
# The worker will wait up to 60 minutes for active proxies to be added before failing.
# This allows deployment to succeed even if proxies aren't configured yet.
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: scraper-worker
namespace: cannaiq
spec:
# MAX 8 PODS - See CLAUDE.md rule #6
# Each pod runs up to MAX_CONCURRENT_TASKS browsers (~400MB each)
# Scale pods for throughput, not concurrent tasks per pod
replicas: 8
selector:
matchLabels:
app: scraper-worker
template:
metadata:
labels:
app: scraper-worker
spec:
imagePullSecrets:
- name: regcred
containers:
- name: worker
image: git.spdy.io/creationshop/cannaiq:latest
command: ["node"]
args: ["dist/tasks/task-worker.js"]
envFrom:
- configMapRef:
name: scraper-config
- secretRef:
name: scraper-secrets
env:
- name: WORKER_MODE
value: "true"
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: API_BASE_URL
value: "http://scraper"
- name: NODE_OPTIONS
value: "--max-old-space-size=1500"
# Browser memory limits - see docs/WORKER_TASK_ARCHITECTURE.md
# 3 browsers × ~400MB = ~1.3GB (safe for 2GB pod limit)
- name: MAX_CONCURRENT_TASKS
value: "3"
# Task Pool System (geo-based pools)
# Correct flow: check pools → claim pool → get proxy → preflight → pull tasks
- name: USE_TASK_POOLS
value: "true"
# Disable legacy identity pool
- name: USE_IDENTITY_POOL
value: "false"
resources:
requests:
memory: "1Gi"
cpu: "100m"
limits:
memory: "2Gi"
cpu: "500m"
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "pgrep -f 'task-worker' > /dev/null"
initialDelaySeconds: 60
periodSeconds: 30
failureThreshold: 3
terminationGracePeriodSeconds: 60