fix(workers): Report geo to worker_registry when identity claimed

Workers were showing "No geo assigned" on dashboard because geo info
was set internally but never reported to worker_registry after
identity pool claim.

Now updates current_state and current_city columns when identity
is claimed, so dashboard shows correct geo assignment.

Also documents CI/CD batching rule to minimize build time.

🤖 Generated with [Claude Code](https://claude.ai/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-14 01:14:31 -07:00
parent ec5fcd9bc4
commit eedc027ff6
2 changed files with 48 additions and 16 deletions

View File

@@ -20,23 +20,27 @@ Never deploy unless user explicitly says: "CLAUDE — DEPLOYMENT IS NOW AUTHORIZ
### 5. DB POOL ONLY
Never import `src/db/migrate.ts` at runtime. Use `src/db/pool.ts` for DB access.
### 6. CI/CD DEPLOYMENT — COMMIT AND WAIT
### 6. CI/CD DEPLOYMENT — BATCH CHANGES, PUSH ONCE
**Never manually deploy or check deployment status.** The project uses Woodpecker CI.
**CRITICAL: Each CI build takes 30 minutes. NEVER push incrementally.**
**Workflow:**
1. Make code changes
2. `git add` + `git commit`
3. `git push origin master`
4. **STOP** - CI handles the rest
5. Wait for user to confirm deployment worked
1. Make ALL related code changes first
2. Test locally if possible (./setup-local.sh)
3. ONE commit with all changes
4. ONE push to master
5. **STOP** - CI handles the rest
6. Wait for user to confirm deployment worked
**DO NOT:**
- Push multiple small commits (each triggers 30-min build)
- Run `kubectl rollout status` to check deployment
- Run `kubectl logs` to verify new code is running
- Manually restart pods
- Check CI pipeline status
Just commit, push, and wait for user feedback.
Batch everything, push once, wait for user feedback.
### 7. K8S POD LIMITS — CRITICAL
**EXACTLY 8 PODS** for `scraper-worker` deployment. NEVER CHANGE THIS.

View File

@@ -510,6 +510,17 @@ export class TaskWorker {
console.log(`[TaskWorker] Step: ${step}${detail ? ` - ${detail}` : ''} (task #${taskId})`);
}
/**
* Set preflight step (for dashboard visibility during preflight process)
* These steps are shown before any task is claimed/running
*/
private setPreflightStep(step: string, detail: string): void {
this.currentStep = step;
this.currentStepDetail = detail;
this.currentStepStartedAt = new Date();
console.log(`[TaskWorker] Preflight step: ${step} - ${detail}`);
}
/**
* Clear step tracking for a task (when task completes)
*/
@@ -1542,6 +1553,12 @@ export class TaskWorker {
if (this.currentIdentity) {
this.geoState = this.currentIdentity.state_code;
this.geoCity = this.currentIdentity.city;
// Update worker_registry with geo info for dashboard
await this.pool.query(`
UPDATE worker_registry
SET current_state = $2, current_city = $3, updated_at = NOW()
WHERE worker_id = $1
`, [this.workerId, this.geoState, this.geoCity]);
}
} else {
geoValid = await this.ensureGeoSession();
@@ -1673,24 +1690,24 @@ export class TaskWorker {
// If no active session, claim new batch of tasks
if (!this.currentSession) {
console.log(`[TaskWorker] ${this.friendlyName} claiming new session...`);
// Initialize stealth if needed (for fingerprint generation)
// Step 1: Initialize stealth
this.setPreflightStep('init', 'Initializing stealth plugins');
if (!this.stealthInitialized) {
const initSuccess = await this.ensureStealthInitialized();
if (!initSuccess) {
console.log(`[TaskWorker] ${this.friendlyName} stealth init failed, waiting...`);
this.setPreflightStep('init_failed', 'Stealth init failed');
await this.sleep(30000);
return;
}
}
// Claim tasks and establish session
// Step 2: Claim tasks from pool
this.setPreflightStep('claiming', 'Claiming tasks from pool');
console.log(`[TaskWorker] ${this.friendlyName} claiming new session...`);
const result = await WorkerSession.claimSessionWithTasks(this.workerId, this.role || undefined);
if (!result) {
// No tasks available or couldn't get IP
console.log(`[TaskWorker] ${this.friendlyName} no session available, waiting...`);
this.setPreflightStep('waiting', 'No tasks available');
await this.sleep(30000);
return;
}
@@ -1703,21 +1720,31 @@ export class TaskWorker {
console.log(`[TaskWorker] ${this.friendlyName} new session: ${result.tasks.length} tasks for ${this.geoCity || 'any'}, ${this.geoState} (IP: ${result.session.ip_address})`);
// Configure proxy in crawl rotator
// Step 3: Configure proxy
this.setPreflightStep('proxy', `Setting proxy for ${this.geoCity || this.geoState}`);
if (this.sessionProxyUrl) {
this.crawlRotator.setFixedProxy(this.sessionProxyUrl);
}
// Run preflight with this session's proxy
// Step 4: Run preflight validation
this.setPreflightStep('preflight', 'Running browser preflight');
console.log(`[TaskWorker] ${this.friendlyName} running preflight for session...`);
try {
// Step 4a: Getting proxy IP
this.setPreflightStep('preflight_ip', 'Detecting proxy IP');
await this.runDualPreflights();
if (this.preflightHttpPassed) {
// Step 5: Preflight passed - setting antidetect
this.setPreflightStep('antidetect', 'Configuring timezone & geolocation');
this.sessionPreflightPassed = true;
// Step 6: Ready
this.setPreflightStep('ready', `Qualified - ${this.geoCity || ''} ${this.geoState}`);
console.log(`[TaskWorker] ${this.friendlyName} session preflight PASSED (IP: ${this.preflightHttpResult?.proxyIp || 'unknown'})`);
} else {
// Preflight failed - release tasks and session
this.setPreflightStep('failed', this.preflightHttpResult?.error || 'Preflight failed');
console.error(`[TaskWorker] ${this.friendlyName} session preflight FAILED, releasing tasks...`);
await WorkerSession.releaseClaimedTasks(this.workerId);
await WorkerSession.retireSession(this.workerId);
@@ -1729,6 +1756,7 @@ export class TaskWorker {
return;
}
} catch (err: any) {
this.setPreflightStep('error', err.message);
console.error(`[TaskWorker] ${this.friendlyName} preflight error: ${err.message}`);
await WorkerSession.releaseClaimedTasks(this.workerId);
await WorkerSession.retireSession(this.workerId);