Compare commits

...

4 Commits

Author SHA1 Message Date
Kelly
fdce5e0302 fix(workers): Fix false memory backoff and add backing-off color coding
- Fix memory calculation to use max-old-space-size (1500MB) instead of
  V8's dynamic heapTotal. This prevents false 95%+ readings when idle.
- Add yellow color for backing-off workers in pod visualization
- Update legend and tooltips with backing-off status
- Remove pool toggle from TasksDashboard (moved to Workers page)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-11 19:11:42 -07:00
Kelly
4679b245de perf(ci): Enable Docker layer caching for faster builds
Add cache_from and cache_to settings to all docker-buildx steps.
Uses registry-based caching to avoid rebuilding npm install layer
when package.json hasn't changed.

Expected improvement: 14min backend build → ~3-4min on cache hit.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-11 18:43:43 -07:00
kelly
a837070f54 Merge pull request 'refactor(admin): Consolidate JobQueue into TasksDashboard + CI worker resilience' (#43) from fix/ci-worker-resilience into master 2025-12-12 01:21:28 +00:00
kelly
52b0fad410 Merge pull request 'ci: Add worker resilience check to deploy step' (#42) from fix/ci-worker-resilience into master
Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/42
2025-12-12 00:09:48 +00:00
6 changed files with 58 additions and 52 deletions

View File

@@ -86,6 +86,8 @@ steps:
from_secret: registry_password
platforms: linux/amd64
provenance: false
cache_from: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache
cache_to: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache,mode=max
build_args:
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
APP_GIT_SHA: ${CI_COMMIT_SHA}
@@ -112,6 +114,8 @@ steps:
from_secret: registry_password
platforms: linux/amd64
provenance: false
cache_from: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache
cache_to: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache,mode=max
depends_on: []
when:
branch: master
@@ -133,6 +137,8 @@ steps:
from_secret: registry_password
platforms: linux/amd64
provenance: false
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache,mode=max
depends_on: []
when:
branch: master
@@ -154,6 +160,8 @@ steps:
from_secret: registry_password
platforms: linux/amd64
provenance: false
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache,mode=max
depends_on: []
when:
branch: master

View File

@@ -86,6 +86,8 @@ steps:
from_secret: registry_password
platforms: linux/amd64
provenance: false
cache_from: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache
cache_to: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache,mode=max
build_args:
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
APP_GIT_SHA: ${CI_COMMIT_SHA}
@@ -112,6 +114,8 @@ steps:
from_secret: registry_password
platforms: linux/amd64
provenance: false
cache_from: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache
cache_to: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache,mode=max
depends_on: []
when:
branch: master
@@ -133,6 +137,8 @@ steps:
from_secret: registry_password
platforms: linux/amd64
provenance: false
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache,mode=max
depends_on: []
when:
branch: master
@@ -154,6 +160,8 @@ steps:
from_secret: registry_password
platforms: linux/amd64
provenance: false
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache,mode=max
depends_on: []
when:
branch: master

View File

@@ -84,6 +84,20 @@ const MAX_CONCURRENT_TASKS = parseInt(process.env.MAX_CONCURRENT_TASKS || '3');
// Default 85% - gives headroom before OOM
const MEMORY_BACKOFF_THRESHOLD = parseFloat(process.env.MEMORY_BACKOFF_THRESHOLD || '0.85');
// Parse max heap size from NODE_OPTIONS (--max-old-space-size=1500)
// This is used as the denominator for memory percentage calculation
// V8's heapTotal is dynamic and stays small when idle, causing false high percentages
function getMaxHeapSizeMb(): number {
const nodeOptions = process.env.NODE_OPTIONS || '';
const match = nodeOptions.match(/--max-old-space-size=(\d+)/);
if (match) {
return parseInt(match[1], 10);
}
// Fallback: use 512MB if not specified
return 512;
}
const MAX_HEAP_SIZE_MB = getMaxHeapSizeMb();
// When CPU usage exceeds this threshold (as decimal 0.0-1.0), stop claiming new tasks
// Default 90% - allows some burst capacity
const CPU_BACKOFF_THRESHOLD = parseFloat(process.env.CPU_BACKOFF_THRESHOLD || '0.90');
@@ -186,12 +200,16 @@ export class TaskWorker {
/**
* Get current resource usage
* Memory percentage is calculated against MAX_HEAP_SIZE_MB (from --max-old-space-size)
* NOT against V8's dynamic heapTotal which stays small when idle
*/
private getResourceStats(): ResourceStats {
const memUsage = process.memoryUsage();
const heapUsedMb = memUsage.heapUsed / 1024 / 1024;
const heapTotalMb = memUsage.heapTotal / 1024 / 1024;
const memoryPercent = heapUsedMb / heapTotalMb;
// Use MAX_HEAP_SIZE_MB as ceiling, not dynamic heapTotal
// V8's heapTotal stays small when idle (e.g., 36MB) causing false 95%+ readings
// With --max-old-space-size=1500, we should calculate against 1500MB
const memoryPercent = heapUsedMb / MAX_HEAP_SIZE_MB;
// Calculate CPU usage since last check
const cpuUsage = process.cpuUsage();
@@ -212,7 +230,7 @@ export class TaskWorker {
return {
memoryPercent,
memoryMb: Math.round(heapUsedMb),
memoryTotalMb: Math.round(heapTotalMb),
memoryTotalMb: MAX_HEAP_SIZE_MB, // Use max-old-space-size, not dynamic heapTotal
cpuPercent: Math.min(100, cpuPercent), // Cap at 100%
isBackingOff: this.isBackingOff,
backoffReason: this.backoffReason,

View File

@@ -16,7 +16,6 @@ import {
ChevronRight,
Gauge,
Users,
Play,
Square,
Plus,
X,
@@ -451,7 +450,6 @@ export default function TasksDashboard() {
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
const [poolPaused, setPoolPaused] = useState(false);
const [poolLoading, setPoolLoading] = useState(false);
const [showCreateModal, setShowCreateModal] = useState(false);
// Pagination
@@ -490,23 +488,6 @@ export default function TasksDashboard() {
}
};
const togglePool = async () => {
setPoolLoading(true);
try {
if (poolPaused) {
await api.resumeTaskPool();
setPoolPaused(false);
} else {
await api.pauseTaskPool();
setPoolPaused(true);
}
} catch (err: any) {
setError(err.message || 'Failed to toggle pool');
} finally {
setPoolLoading(false);
}
};
const handleDeleteTask = async (taskId: number) => {
if (!confirm('Delete this task?')) return;
try {
@@ -579,28 +560,13 @@ export default function TasksDashboard() {
<Plus className="w-4 h-4" />
Create Task
</button>
{/* Pool Toggle */}
<button
onClick={togglePool}
disabled={poolLoading}
className={`flex items-center gap-2 px-4 py-2 rounded-lg font-medium transition-colors ${
poolPaused
? 'bg-emerald-100 text-emerald-700 hover:bg-emerald-200'
: 'bg-red-100 text-red-700 hover:bg-red-200'
}`}
>
{poolPaused ? (
<>
<Play className={`w-5 h-5 ${poolLoading ? 'animate-pulse' : ''}`} />
Resume Pool
</>
) : (
<>
<Square className={`w-5 h-5 ${poolLoading ? 'animate-pulse' : ''}`} />
Pause Pool
</>
)}
</button>
{/* Pool status indicator */}
{poolPaused && (
<span className="inline-flex items-center gap-1.5 px-3 py-1.5 rounded-full text-sm font-medium bg-yellow-100 text-yellow-800">
<Square className="w-4 h-4" />
Pool Paused
</span>
)}
<span className="text-sm text-gray-400">Auto-refreshes every 15s</span>
</div>
</div>

View File

@@ -369,8 +369,10 @@ function PodVisualization({
const isBusy = worker.current_task_id !== null;
const isDecommissioning = worker.decommission_requested;
const workerColor = isDecommissioning ? 'bg-orange-500' : isBusy ? 'bg-blue-500' : 'bg-emerald-500';
const workerBorder = isDecommissioning ? 'border-orange-300' : isBusy ? 'border-blue-300' : 'border-emerald-300';
const isBackingOff = worker.metadata?.is_backing_off;
// Color priority: decommissioning > backing off > busy > idle
const workerColor = isDecommissioning ? 'bg-orange-500' : isBackingOff ? 'bg-yellow-500' : isBusy ? 'bg-blue-500' : 'bg-emerald-500';
const workerBorder = isDecommissioning ? 'border-orange-300' : isBackingOff ? 'border-yellow-300' : isBusy ? 'border-blue-300' : 'border-emerald-300';
// Line from center to worker
const lineLength = radius - 10;
@@ -381,7 +383,7 @@ function PodVisualization({
<div key={worker.id}>
{/* Connection line */}
<div
className={`absolute w-0.5 ${isDecommissioning ? 'bg-orange-300' : isBusy ? 'bg-blue-300' : 'bg-emerald-300'}`}
className={`absolute w-0.5 ${isDecommissioning ? 'bg-orange-300' : isBackingOff ? 'bg-yellow-300' : isBusy ? 'bg-blue-300' : 'bg-emerald-300'}`}
style={{
height: `${lineLength}px`,
left: '50%',
@@ -398,7 +400,7 @@ function PodVisualization({
top: '50%',
transform: `translate(-50%, -50%) translate(${x}px, ${y}px)`,
}}
title={`${worker.friendly_name}\nStatus: ${isDecommissioning ? 'Stopping after current task' : isBusy ? `Working on task #${worker.current_task_id}` : 'Idle - waiting for tasks'}\nMemory: ${worker.metadata?.memory_mb || 0} MB\nCPU: ${formatCpuTime(worker.metadata?.cpu_user_ms || 0)} user, ${formatCpuTime(worker.metadata?.cpu_system_ms || 0)} sys\nCompleted: ${worker.tasks_completed} | Failed: ${worker.tasks_failed}\nLast heartbeat: ${new Date(worker.last_heartbeat_at).toLocaleTimeString()}`}
title={`${worker.friendly_name}\nStatus: ${isDecommissioning ? 'Stopping after current task' : isBackingOff ? `Backing off: ${worker.metadata?.backoff_reason || 'resource pressure'}` : isBusy ? `Working on task #${worker.current_task_id}` : 'Ready - waiting for tasks'}\nMemory: ${worker.metadata?.memory_mb || 0} MB (${worker.metadata?.memory_percent || 0}%)\nCPU: ${formatCpuTime(worker.metadata?.cpu_user_ms || 0)} user, ${formatCpuTime(worker.metadata?.cpu_system_ms || 0)} sys\nCompleted: ${worker.tasks_completed} | Failed: ${worker.tasks_failed}\nLast heartbeat: ${new Date(worker.last_heartbeat_at).toLocaleTimeString()}`}
>
{index + 1}
</div>
@@ -700,11 +702,11 @@ export function WorkersDashboard() {
Worker Pods ({Array.from(groupWorkersByPod(workers)).length} pods, {activeWorkers.length} workers)
</h3>
<p className="text-xs text-gray-500 mt-0.5">
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-emerald-500"></span> idle</span>
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-emerald-500"></span> ready</span>
<span className="mx-2">|</span>
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-blue-500"></span> busy</span>
<span className="mx-2">|</span>
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-yellow-500"></span> mixed</span>
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-yellow-500"></span> backing off</span>
<span className="mx-2">|</span>
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-orange-500"></span> stopping</span>
</p>

View File

@@ -40,12 +40,16 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: API_BASE_URL
value: "http://scraper"
- name: NODE_OPTIONS
value: "--max-old-space-size=1500"
resources:
requests:
memory: "256Mi"
memory: "1Gi"
cpu: "100m"
limits:
memory: "512Mi"
memory: "2Gi"
cpu: "500m"
livenessProbe:
exec: