diff --git a/CLAUDE.md b/CLAUDE.md index afafa744..93859375 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -20,23 +20,27 @@ Never deploy unless user explicitly says: "CLAUDE — DEPLOYMENT IS NOW AUTHORIZ ### 5. DB POOL ONLY Never import `src/db/migrate.ts` at runtime. Use `src/db/pool.ts` for DB access. -### 6. CI/CD DEPLOYMENT — COMMIT AND WAIT +### 6. CI/CD DEPLOYMENT — BATCH CHANGES, PUSH ONCE **Never manually deploy or check deployment status.** The project uses Woodpecker CI. +**CRITICAL: Each CI build takes 30 minutes. NEVER push incrementally.** + **Workflow:** -1. Make code changes -2. `git add` + `git commit` -3. `git push origin master` -4. **STOP** - CI handles the rest -5. Wait for user to confirm deployment worked +1. Make ALL related code changes first +2. Test locally if possible (./setup-local.sh) +3. ONE commit with all changes +4. ONE push to master +5. **STOP** - CI handles the rest +6. Wait for user to confirm deployment worked **DO NOT:** +- Push multiple small commits (each triggers 30-min build) - Run `kubectl rollout status` to check deployment - Run `kubectl logs` to verify new code is running - Manually restart pods - Check CI pipeline status -Just commit, push, and wait for user feedback. +Batch everything, push once, wait for user feedback. ### 7. K8S POD LIMITS — CRITICAL **EXACTLY 8 PODS** for `scraper-worker` deployment. NEVER CHANGE THIS. diff --git a/backend/src/tasks/task-worker.ts b/backend/src/tasks/task-worker.ts index 73c10569..08caa94f 100644 --- a/backend/src/tasks/task-worker.ts +++ b/backend/src/tasks/task-worker.ts @@ -510,6 +510,17 @@ export class TaskWorker { console.log(`[TaskWorker] Step: ${step}${detail ? ` - ${detail}` : ''} (task #${taskId})`); } + /** + * Set preflight step (for dashboard visibility during preflight process) + * These steps are shown before any task is claimed/running + */ + private setPreflightStep(step: string, detail: string): void { + this.currentStep = step; + this.currentStepDetail = detail; + this.currentStepStartedAt = new Date(); + console.log(`[TaskWorker] Preflight step: ${step} - ${detail}`); + } + /** * Clear step tracking for a task (when task completes) */ @@ -1542,6 +1553,12 @@ export class TaskWorker { if (this.currentIdentity) { this.geoState = this.currentIdentity.state_code; this.geoCity = this.currentIdentity.city; + // Update worker_registry with geo info for dashboard + await this.pool.query(` + UPDATE worker_registry + SET current_state = $2, current_city = $3, updated_at = NOW() + WHERE worker_id = $1 + `, [this.workerId, this.geoState, this.geoCity]); } } else { geoValid = await this.ensureGeoSession(); @@ -1673,24 +1690,24 @@ export class TaskWorker { // If no active session, claim new batch of tasks if (!this.currentSession) { - console.log(`[TaskWorker] ${this.friendlyName} claiming new session...`); - - // Initialize stealth if needed (for fingerprint generation) + // Step 1: Initialize stealth + this.setPreflightStep('init', 'Initializing stealth plugins'); if (!this.stealthInitialized) { const initSuccess = await this.ensureStealthInitialized(); if (!initSuccess) { - console.log(`[TaskWorker] ${this.friendlyName} stealth init failed, waiting...`); + this.setPreflightStep('init_failed', 'Stealth init failed'); await this.sleep(30000); return; } } - // Claim tasks and establish session + // Step 2: Claim tasks from pool + this.setPreflightStep('claiming', 'Claiming tasks from pool'); + console.log(`[TaskWorker] ${this.friendlyName} claiming new session...`); const result = await WorkerSession.claimSessionWithTasks(this.workerId, this.role || undefined); if (!result) { - // No tasks available or couldn't get IP - console.log(`[TaskWorker] ${this.friendlyName} no session available, waiting...`); + this.setPreflightStep('waiting', 'No tasks available'); await this.sleep(30000); return; } @@ -1703,21 +1720,31 @@ export class TaskWorker { console.log(`[TaskWorker] ${this.friendlyName} new session: ${result.tasks.length} tasks for ${this.geoCity || 'any'}, ${this.geoState} (IP: ${result.session.ip_address})`); - // Configure proxy in crawl rotator + // Step 3: Configure proxy + this.setPreflightStep('proxy', `Setting proxy for ${this.geoCity || this.geoState}`); if (this.sessionProxyUrl) { this.crawlRotator.setFixedProxy(this.sessionProxyUrl); } - // Run preflight with this session's proxy + // Step 4: Run preflight validation + this.setPreflightStep('preflight', 'Running browser preflight'); console.log(`[TaskWorker] ${this.friendlyName} running preflight for session...`); try { + // Step 4a: Getting proxy IP + this.setPreflightStep('preflight_ip', 'Detecting proxy IP'); await this.runDualPreflights(); if (this.preflightHttpPassed) { + // Step 5: Preflight passed - setting antidetect + this.setPreflightStep('antidetect', 'Configuring timezone & geolocation'); this.sessionPreflightPassed = true; + + // Step 6: Ready + this.setPreflightStep('ready', `Qualified - ${this.geoCity || ''} ${this.geoState}`); console.log(`[TaskWorker] ${this.friendlyName} session preflight PASSED (IP: ${this.preflightHttpResult?.proxyIp || 'unknown'})`); } else { // Preflight failed - release tasks and session + this.setPreflightStep('failed', this.preflightHttpResult?.error || 'Preflight failed'); console.error(`[TaskWorker] ${this.friendlyName} session preflight FAILED, releasing tasks...`); await WorkerSession.releaseClaimedTasks(this.workerId); await WorkerSession.retireSession(this.workerId); @@ -1729,6 +1756,7 @@ export class TaskWorker { return; } } catch (err: any) { + this.setPreflightStep('error', err.message); console.error(`[TaskWorker] ${this.friendlyName} preflight error: ${err.message}`); await WorkerSession.releaseClaimedTasks(this.workerId); await WorkerSession.retireSession(this.workerId);