## Worker System - Role-agnostic workers that can handle any task type - Pod-based architecture with StatefulSet (5-15 pods, 5 workers each) - Custom pod names (Aethelgard, Xylos, Kryll, etc.) - Worker registry with friendly names and resource monitoring - Hub-and-spoke visualization on JobQueue page ## Stealth & Anti-Detection (REQUIRED) - Proxies are MANDATORY - workers fail to start without active proxies - CrawlRotator initializes on worker startup - Loads proxies from `proxies` table - Auto-rotates proxy + fingerprint on 403 errors - 12 browser fingerprints (Chrome, Firefox, Safari, Edge) - Locale/timezone matching for geographic consistency ## Task System - Renamed product_resync → product_refresh - Task chaining: store_discovery → entry_point → product_discovery - Priority-based claiming with FOR UPDATE SKIP LOCKED - Heartbeat and stale task recovery ## UI Updates - JobQueue: Pod visualization, resource monitoring on hover - WorkersDashboard: Simplified worker list - Removed unused filters from task list ## Other - IP2Location service for visitor analytics - Findagram consumer features scaffolding - Documentation updates 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
163 lines
3.9 KiB
YAML
163 lines
3.9 KiB
YAML
# Task Worker Pods
|
|
# Each pod runs 5 role-agnostic workers that pull tasks from worker_tasks queue.
|
|
#
|
|
# Architecture:
|
|
# - Pods are named from a predefined list (Aethelgard, Xylos, etc.)
|
|
# - Each pod spawns 5 worker processes
|
|
# - Workers register with API and show their pod name
|
|
# - HPA scales pods 5-15 based on pending task count
|
|
# - Workers use DB-level locking (FOR UPDATE SKIP LOCKED) to prevent conflicts
|
|
#
|
|
# Pod Names (up to 25):
|
|
# Aethelgard, Xylos, Kryll, Coriolis, Dimidium, Veridia, Zetani, Talos IV,
|
|
# Onyx, Celestia, Gormand, Betha, Ragnar, Syphon, Axiom, Nadir, Terra Nova,
|
|
# Acheron, Nexus, Vespera, Helios Prime, Oasis, Mordina, Cygnus, Umbra
|
|
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: pod-names
|
|
namespace: dispensary-scraper
|
|
data:
|
|
names: |
|
|
Aethelgard
|
|
Xylos
|
|
Kryll
|
|
Coriolis
|
|
Dimidium
|
|
Veridia
|
|
Zetani
|
|
Talos IV
|
|
Onyx
|
|
Celestia
|
|
Gormand
|
|
Betha
|
|
Ragnar
|
|
Syphon
|
|
Axiom
|
|
Nadir
|
|
Terra Nova
|
|
Acheron
|
|
Nexus
|
|
Vespera
|
|
Helios Prime
|
|
Oasis
|
|
Mordina
|
|
Cygnus
|
|
Umbra
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: StatefulSet
|
|
metadata:
|
|
name: worker-pod
|
|
namespace: dispensary-scraper
|
|
spec:
|
|
serviceName: worker-pods
|
|
replicas: 5
|
|
podManagementPolicy: Parallel
|
|
selector:
|
|
matchLabels:
|
|
app: worker-pod
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: worker-pod
|
|
spec:
|
|
imagePullSecrets:
|
|
- name: regcred
|
|
containers:
|
|
- name: workers
|
|
image: code.cannabrands.app/creationshop/dispensary-scraper:latest
|
|
# Run 5 workers per pod
|
|
command: ["/bin/sh", "-c"]
|
|
args:
|
|
- |
|
|
# Get pod ordinal (0, 1, 2, etc.)
|
|
ORDINAL=$(echo $HOSTNAME | rev | cut -d'-' -f1 | rev)
|
|
# Get pod name from configmap
|
|
POD_NAME=$(sed -n "$((ORDINAL + 1))p" /etc/pod-names/names)
|
|
echo "Starting pod: $POD_NAME (ordinal: $ORDINAL)"
|
|
|
|
# Start 5 workers in this pod
|
|
for i in 1 2 3 4 5; do
|
|
WORKER_ID="${POD_NAME}-worker-${i}" \
|
|
POD_NAME="$POD_NAME" \
|
|
node dist/tasks/task-worker.js &
|
|
done
|
|
|
|
# Wait for all workers
|
|
wait
|
|
envFrom:
|
|
- configMapRef:
|
|
name: scraper-config
|
|
- secretRef:
|
|
name: scraper-secrets
|
|
env:
|
|
- name: API_BASE_URL
|
|
value: "http://scraper:3010"
|
|
- name: WORKERS_PER_POD
|
|
value: "5"
|
|
volumeMounts:
|
|
- name: pod-names
|
|
mountPath: /etc/pod-names
|
|
resources:
|
|
requests:
|
|
memory: "512Mi"
|
|
cpu: "200m"
|
|
limits:
|
|
memory: "1Gi"
|
|
cpu: "1000m"
|
|
livenessProbe:
|
|
exec:
|
|
command:
|
|
- /bin/sh
|
|
- -c
|
|
- "pgrep -f 'task-worker' > /dev/null"
|
|
initialDelaySeconds: 15
|
|
periodSeconds: 30
|
|
failureThreshold: 3
|
|
volumes:
|
|
- name: pod-names
|
|
configMap:
|
|
name: pod-names
|
|
terminationGracePeriodSeconds: 60
|
|
---
|
|
# Headless service for StatefulSet
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: worker-pods
|
|
namespace: dispensary-scraper
|
|
spec:
|
|
clusterIP: None
|
|
selector:
|
|
app: worker-pod
|
|
ports:
|
|
- port: 80
|
|
name: placeholder
|
|
---
|
|
# HPA to scale pods based on pending tasks
|
|
apiVersion: autoscaling/v2
|
|
kind: HorizontalPodAutoscaler
|
|
metadata:
|
|
name: worker-pod-hpa
|
|
namespace: dispensary-scraper
|
|
spec:
|
|
scaleTargetRef:
|
|
apiVersion: apps/v1
|
|
kind: StatefulSet
|
|
name: worker-pod
|
|
minReplicas: 5
|
|
maxReplicas: 15
|
|
metrics:
|
|
- type: External
|
|
external:
|
|
metric:
|
|
name: pending_tasks
|
|
selector:
|
|
matchLabels:
|
|
queue: worker_tasks
|
|
target:
|
|
type: AverageValue
|
|
averageValue: "10"
|