fix(workers): Report geo to worker_registry when identity claimed

Workers were showing "No geo assigned" on dashboard because geo info
was set internally but never reported to worker_registry after
identity pool claim.

Now updates current_state and current_city columns when identity
is claimed, so dashboard shows correct geo assignment.

Also documents CI/CD batching rule to minimize build time.

🤖 Generated with [Claude Code](https://claude.ai/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-14 01:14:31 -07:00
parent ec5fcd9bc4
commit eedc027ff6
2 changed files with 48 additions and 16 deletions

View File

@@ -20,23 +20,27 @@ Never deploy unless user explicitly says: "CLAUDE — DEPLOYMENT IS NOW AUTHORIZ
### 5. DB POOL ONLY ### 5. DB POOL ONLY
Never import `src/db/migrate.ts` at runtime. Use `src/db/pool.ts` for DB access. Never import `src/db/migrate.ts` at runtime. Use `src/db/pool.ts` for DB access.
### 6. CI/CD DEPLOYMENT — COMMIT AND WAIT ### 6. CI/CD DEPLOYMENT — BATCH CHANGES, PUSH ONCE
**Never manually deploy or check deployment status.** The project uses Woodpecker CI. **Never manually deploy or check deployment status.** The project uses Woodpecker CI.
**CRITICAL: Each CI build takes 30 minutes. NEVER push incrementally.**
**Workflow:** **Workflow:**
1. Make code changes 1. Make ALL related code changes first
2. `git add` + `git commit` 2. Test locally if possible (./setup-local.sh)
3. `git push origin master` 3. ONE commit with all changes
4. **STOP** - CI handles the rest 4. ONE push to master
5. Wait for user to confirm deployment worked 5. **STOP** - CI handles the rest
6. Wait for user to confirm deployment worked
**DO NOT:** **DO NOT:**
- Push multiple small commits (each triggers 30-min build)
- Run `kubectl rollout status` to check deployment - Run `kubectl rollout status` to check deployment
- Run `kubectl logs` to verify new code is running - Run `kubectl logs` to verify new code is running
- Manually restart pods - Manually restart pods
- Check CI pipeline status - Check CI pipeline status
Just commit, push, and wait for user feedback. Batch everything, push once, wait for user feedback.
### 7. K8S POD LIMITS — CRITICAL ### 7. K8S POD LIMITS — CRITICAL
**EXACTLY 8 PODS** for `scraper-worker` deployment. NEVER CHANGE THIS. **EXACTLY 8 PODS** for `scraper-worker` deployment. NEVER CHANGE THIS.

View File

@@ -510,6 +510,17 @@ export class TaskWorker {
console.log(`[TaskWorker] Step: ${step}${detail ? ` - ${detail}` : ''} (task #${taskId})`); console.log(`[TaskWorker] Step: ${step}${detail ? ` - ${detail}` : ''} (task #${taskId})`);
} }
/**
* Set preflight step (for dashboard visibility during preflight process)
* These steps are shown before any task is claimed/running
*/
private setPreflightStep(step: string, detail: string): void {
this.currentStep = step;
this.currentStepDetail = detail;
this.currentStepStartedAt = new Date();
console.log(`[TaskWorker] Preflight step: ${step} - ${detail}`);
}
/** /**
* Clear step tracking for a task (when task completes) * Clear step tracking for a task (when task completes)
*/ */
@@ -1542,6 +1553,12 @@ export class TaskWorker {
if (this.currentIdentity) { if (this.currentIdentity) {
this.geoState = this.currentIdentity.state_code; this.geoState = this.currentIdentity.state_code;
this.geoCity = this.currentIdentity.city; this.geoCity = this.currentIdentity.city;
// Update worker_registry with geo info for dashboard
await this.pool.query(`
UPDATE worker_registry
SET current_state = $2, current_city = $3, updated_at = NOW()
WHERE worker_id = $1
`, [this.workerId, this.geoState, this.geoCity]);
} }
} else { } else {
geoValid = await this.ensureGeoSession(); geoValid = await this.ensureGeoSession();
@@ -1673,24 +1690,24 @@ export class TaskWorker {
// If no active session, claim new batch of tasks // If no active session, claim new batch of tasks
if (!this.currentSession) { if (!this.currentSession) {
console.log(`[TaskWorker] ${this.friendlyName} claiming new session...`); // Step 1: Initialize stealth
this.setPreflightStep('init', 'Initializing stealth plugins');
// Initialize stealth if needed (for fingerprint generation)
if (!this.stealthInitialized) { if (!this.stealthInitialized) {
const initSuccess = await this.ensureStealthInitialized(); const initSuccess = await this.ensureStealthInitialized();
if (!initSuccess) { if (!initSuccess) {
console.log(`[TaskWorker] ${this.friendlyName} stealth init failed, waiting...`); this.setPreflightStep('init_failed', 'Stealth init failed');
await this.sleep(30000); await this.sleep(30000);
return; return;
} }
} }
// Claim tasks and establish session // Step 2: Claim tasks from pool
this.setPreflightStep('claiming', 'Claiming tasks from pool');
console.log(`[TaskWorker] ${this.friendlyName} claiming new session...`);
const result = await WorkerSession.claimSessionWithTasks(this.workerId, this.role || undefined); const result = await WorkerSession.claimSessionWithTasks(this.workerId, this.role || undefined);
if (!result) { if (!result) {
// No tasks available or couldn't get IP this.setPreflightStep('waiting', 'No tasks available');
console.log(`[TaskWorker] ${this.friendlyName} no session available, waiting...`);
await this.sleep(30000); await this.sleep(30000);
return; return;
} }
@@ -1703,21 +1720,31 @@ export class TaskWorker {
console.log(`[TaskWorker] ${this.friendlyName} new session: ${result.tasks.length} tasks for ${this.geoCity || 'any'}, ${this.geoState} (IP: ${result.session.ip_address})`); console.log(`[TaskWorker] ${this.friendlyName} new session: ${result.tasks.length} tasks for ${this.geoCity || 'any'}, ${this.geoState} (IP: ${result.session.ip_address})`);
// Configure proxy in crawl rotator // Step 3: Configure proxy
this.setPreflightStep('proxy', `Setting proxy for ${this.geoCity || this.geoState}`);
if (this.sessionProxyUrl) { if (this.sessionProxyUrl) {
this.crawlRotator.setFixedProxy(this.sessionProxyUrl); this.crawlRotator.setFixedProxy(this.sessionProxyUrl);
} }
// Run preflight with this session's proxy // Step 4: Run preflight validation
this.setPreflightStep('preflight', 'Running browser preflight');
console.log(`[TaskWorker] ${this.friendlyName} running preflight for session...`); console.log(`[TaskWorker] ${this.friendlyName} running preflight for session...`);
try { try {
// Step 4a: Getting proxy IP
this.setPreflightStep('preflight_ip', 'Detecting proxy IP');
await this.runDualPreflights(); await this.runDualPreflights();
if (this.preflightHttpPassed) { if (this.preflightHttpPassed) {
// Step 5: Preflight passed - setting antidetect
this.setPreflightStep('antidetect', 'Configuring timezone & geolocation');
this.sessionPreflightPassed = true; this.sessionPreflightPassed = true;
// Step 6: Ready
this.setPreflightStep('ready', `Qualified - ${this.geoCity || ''} ${this.geoState}`);
console.log(`[TaskWorker] ${this.friendlyName} session preflight PASSED (IP: ${this.preflightHttpResult?.proxyIp || 'unknown'})`); console.log(`[TaskWorker] ${this.friendlyName} session preflight PASSED (IP: ${this.preflightHttpResult?.proxyIp || 'unknown'})`);
} else { } else {
// Preflight failed - release tasks and session // Preflight failed - release tasks and session
this.setPreflightStep('failed', this.preflightHttpResult?.error || 'Preflight failed');
console.error(`[TaskWorker] ${this.friendlyName} session preflight FAILED, releasing tasks...`); console.error(`[TaskWorker] ${this.friendlyName} session preflight FAILED, releasing tasks...`);
await WorkerSession.releaseClaimedTasks(this.workerId); await WorkerSession.releaseClaimedTasks(this.workerId);
await WorkerSession.retireSession(this.workerId); await WorkerSession.retireSession(this.workerId);
@@ -1729,6 +1756,7 @@ export class TaskWorker {
return; return;
} }
} catch (err: any) { } catch (err: any) {
this.setPreflightStep('error', err.message);
console.error(`[TaskWorker] ${this.friendlyName} preflight error: ${err.message}`); console.error(`[TaskWorker] ${this.friendlyName} preflight error: ${err.message}`);
await WorkerSession.releaseClaimedTasks(this.workerId); await WorkerSession.releaseClaimedTasks(this.workerId);
await WorkerSession.retireSession(this.workerId); await WorkerSession.retireSession(this.workerId);