fix(monitor): remove non-existent worker columns from job_run_logs query

The job_run_logs table tracks scheduled job orchestration, not individual
worker jobs. Worker info (worker_id, worker_hostname) belongs on
dispensary_crawl_jobs, not job_run_logs.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-03 18:45:05 -07:00
parent 54f40d26bb
commit 66e07b2009
466 changed files with 84988 additions and 9226 deletions

65
k8s/scraper-worker.yaml Normal file
View File

@@ -0,0 +1,65 @@
# Dutchie AZ Worker Deployment
# These workers poll the job queue and process crawl jobs.
# Scale this deployment to increase crawl throughput.
#
# Architecture:
# - The main 'scraper' deployment runs the API server + scheduler (1 replica)
# - This 'scraper-worker' deployment runs workers that poll and claim jobs (5 replicas)
# - Workers use DB-level locking (FOR UPDATE SKIP LOCKED) to prevent double-crawls
# - Each worker sends heartbeats; stale jobs are recovered automatically
apiVersion: apps/v1
kind: Deployment
metadata:
name: scraper-worker
namespace: dispensary-scraper
spec:
replicas: 5
selector:
matchLabels:
app: scraper-worker
template:
metadata:
labels:
app: scraper-worker
spec:
imagePullSecrets:
- name: regcred
containers:
- name: worker
image: code.cannabrands.app/creationshop/dispensary-scraper:latest
# Run the worker process instead of the main server
command: ["node"]
args: ["dist/dutchie-az/services/worker.js"]
envFrom:
- configMapRef:
name: scraper-config
- secretRef:
name: scraper-secrets
env:
# Worker-specific environment variables
- name: WORKER_MODE
value: "true"
# Pod name becomes part of worker ID for debugging
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
resources:
requests:
memory: "256Mi"
cpu: "100m"
limits:
memory: "512Mi"
cpu: "500m"
# Health check - workers don't expose ports, but we can use a file check
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "pgrep -f 'worker.js' > /dev/null"
initialDelaySeconds: 10
periodSeconds: 30
failureThreshold: 3
# Graceful shutdown - give workers time to complete current job
terminationGracePeriodSeconds: 60