# Dutchie AZ Worker Deployment # These workers poll the job queue and process crawl jobs. # Scale this deployment to increase crawl throughput. # # Architecture: # - The main 'scraper' deployment runs the API server + scheduler (1 replica) # - This 'scraper-worker' deployment runs workers that poll and claim jobs (5 replicas) # - Workers use DB-level locking (FOR UPDATE SKIP LOCKED) to prevent double-crawls # - Each worker sends heartbeats; stale jobs are recovered automatically apiVersion: apps/v1 kind: Deployment metadata: name: scraper-worker namespace: dispensary-scraper spec: replicas: 5 selector: matchLabels: app: scraper-worker template: metadata: labels: app: scraper-worker spec: imagePullSecrets: - name: regcred containers: - name: worker image: code.cannabrands.app/creationshop/dispensary-scraper:latest # Run the worker process instead of the main server command: ["node"] args: ["dist/dutchie-az/services/worker.js"] envFrom: - configMapRef: name: scraper-config - secretRef: name: scraper-secrets env: # Worker-specific environment variables - name: WORKER_MODE value: "true" # Pod name becomes part of worker ID for debugging - name: POD_NAME valueFrom: fieldRef: fieldPath: metadata.name resources: requests: memory: "256Mi" cpu: "100m" limits: memory: "512Mi" cpu: "500m" # Health check - workers don't expose ports, but we can use a file check livenessProbe: exec: command: - /bin/sh - -c - "pgrep -f 'worker.js' > /dev/null" initialDelaySeconds: 10 periodSeconds: 30 failureThreshold: 3 # Graceful shutdown - give workers time to complete current job terminationGracePeriodSeconds: 60