# Hydration Worker Deployment # These workers process raw_payloads → canonical tables. # Scale this deployment to increase hydration throughput. # # Architecture: # - The main 'scraper' deployment runs the API server + scheduler (1 replica) # - This 'scraper-worker' deployment runs hydration workers (5 replicas) # - Workers use DB-level locking to prevent double-processing # - Each worker processes payloads in batches with configurable limits apiVersion: apps/v1 kind: Deployment metadata: name: scraper-worker namespace: dispensary-scraper spec: replicas: 5 selector: matchLabels: app: scraper-worker template: metadata: labels: app: scraper-worker spec: imagePullSecrets: - name: regcred containers: - name: worker image: code.cannabrands.app/creationshop/dispensary-scraper:latest # Run the hydration worker in loop mode command: ["node"] args: ["dist/scripts/run-hydration.js", "--mode=payload", "--loop"] envFrom: - configMapRef: name: scraper-config - secretRef: name: scraper-secrets env: # Worker-specific environment variables - name: WORKER_MODE value: "true" # Pod name becomes part of worker ID for debugging - name: POD_NAME valueFrom: fieldRef: fieldPath: metadata.name resources: requests: memory: "256Mi" cpu: "100m" limits: memory: "512Mi" cpu: "500m" # Health check - workers don't expose ports, but we can use a file check livenessProbe: exec: command: - /bin/sh - -c - "pgrep -f 'run-hydration' > /dev/null" initialDelaySeconds: 10 periodSeconds: 30 failureThreshold: 3 # Graceful shutdown - give workers time to complete current batch terminationGracePeriodSeconds: 60