fix(workers): Report geo to worker_registry when identity claimed
Workers were showing "No geo assigned" on dashboard because geo info was set internally but never reported to worker_registry after identity pool claim. Now updates current_state and current_city columns when identity is claimed, so dashboard shows correct geo assignment. Also documents CI/CD batching rule to minimize build time. 🤖 Generated with [Claude Code](https://claude.ai/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
18
CLAUDE.md
18
CLAUDE.md
@@ -20,23 +20,27 @@ Never deploy unless user explicitly says: "CLAUDE — DEPLOYMENT IS NOW AUTHORIZ
|
||||
### 5. DB POOL ONLY
|
||||
Never import `src/db/migrate.ts` at runtime. Use `src/db/pool.ts` for DB access.
|
||||
|
||||
### 6. CI/CD DEPLOYMENT — COMMIT AND WAIT
|
||||
### 6. CI/CD DEPLOYMENT — BATCH CHANGES, PUSH ONCE
|
||||
**Never manually deploy or check deployment status.** The project uses Woodpecker CI.
|
||||
|
||||
**CRITICAL: Each CI build takes 30 minutes. NEVER push incrementally.**
|
||||
|
||||
**Workflow:**
|
||||
1. Make code changes
|
||||
2. `git add` + `git commit`
|
||||
3. `git push origin master`
|
||||
4. **STOP** - CI handles the rest
|
||||
5. Wait for user to confirm deployment worked
|
||||
1. Make ALL related code changes first
|
||||
2. Test locally if possible (./setup-local.sh)
|
||||
3. ONE commit with all changes
|
||||
4. ONE push to master
|
||||
5. **STOP** - CI handles the rest
|
||||
6. Wait for user to confirm deployment worked
|
||||
|
||||
**DO NOT:**
|
||||
- Push multiple small commits (each triggers 30-min build)
|
||||
- Run `kubectl rollout status` to check deployment
|
||||
- Run `kubectl logs` to verify new code is running
|
||||
- Manually restart pods
|
||||
- Check CI pipeline status
|
||||
|
||||
Just commit, push, and wait for user feedback.
|
||||
Batch everything, push once, wait for user feedback.
|
||||
|
||||
### 7. K8S POD LIMITS — CRITICAL
|
||||
**EXACTLY 8 PODS** for `scraper-worker` deployment. NEVER CHANGE THIS.
|
||||
|
||||
@@ -510,6 +510,17 @@ export class TaskWorker {
|
||||
console.log(`[TaskWorker] Step: ${step}${detail ? ` - ${detail}` : ''} (task #${taskId})`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set preflight step (for dashboard visibility during preflight process)
|
||||
* These steps are shown before any task is claimed/running
|
||||
*/
|
||||
private setPreflightStep(step: string, detail: string): void {
|
||||
this.currentStep = step;
|
||||
this.currentStepDetail = detail;
|
||||
this.currentStepStartedAt = new Date();
|
||||
console.log(`[TaskWorker] Preflight step: ${step} - ${detail}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear step tracking for a task (when task completes)
|
||||
*/
|
||||
@@ -1542,6 +1553,12 @@ export class TaskWorker {
|
||||
if (this.currentIdentity) {
|
||||
this.geoState = this.currentIdentity.state_code;
|
||||
this.geoCity = this.currentIdentity.city;
|
||||
// Update worker_registry with geo info for dashboard
|
||||
await this.pool.query(`
|
||||
UPDATE worker_registry
|
||||
SET current_state = $2, current_city = $3, updated_at = NOW()
|
||||
WHERE worker_id = $1
|
||||
`, [this.workerId, this.geoState, this.geoCity]);
|
||||
}
|
||||
} else {
|
||||
geoValid = await this.ensureGeoSession();
|
||||
@@ -1673,24 +1690,24 @@ export class TaskWorker {
|
||||
|
||||
// If no active session, claim new batch of tasks
|
||||
if (!this.currentSession) {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} claiming new session...`);
|
||||
|
||||
// Initialize stealth if needed (for fingerprint generation)
|
||||
// Step 1: Initialize stealth
|
||||
this.setPreflightStep('init', 'Initializing stealth plugins');
|
||||
if (!this.stealthInitialized) {
|
||||
const initSuccess = await this.ensureStealthInitialized();
|
||||
if (!initSuccess) {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} stealth init failed, waiting...`);
|
||||
this.setPreflightStep('init_failed', 'Stealth init failed');
|
||||
await this.sleep(30000);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Claim tasks and establish session
|
||||
// Step 2: Claim tasks from pool
|
||||
this.setPreflightStep('claiming', 'Claiming tasks from pool');
|
||||
console.log(`[TaskWorker] ${this.friendlyName} claiming new session...`);
|
||||
const result = await WorkerSession.claimSessionWithTasks(this.workerId, this.role || undefined);
|
||||
|
||||
if (!result) {
|
||||
// No tasks available or couldn't get IP
|
||||
console.log(`[TaskWorker] ${this.friendlyName} no session available, waiting...`);
|
||||
this.setPreflightStep('waiting', 'No tasks available');
|
||||
await this.sleep(30000);
|
||||
return;
|
||||
}
|
||||
@@ -1703,21 +1720,31 @@ export class TaskWorker {
|
||||
|
||||
console.log(`[TaskWorker] ${this.friendlyName} new session: ${result.tasks.length} tasks for ${this.geoCity || 'any'}, ${this.geoState} (IP: ${result.session.ip_address})`);
|
||||
|
||||
// Configure proxy in crawl rotator
|
||||
// Step 3: Configure proxy
|
||||
this.setPreflightStep('proxy', `Setting proxy for ${this.geoCity || this.geoState}`);
|
||||
if (this.sessionProxyUrl) {
|
||||
this.crawlRotator.setFixedProxy(this.sessionProxyUrl);
|
||||
}
|
||||
|
||||
// Run preflight with this session's proxy
|
||||
// Step 4: Run preflight validation
|
||||
this.setPreflightStep('preflight', 'Running browser preflight');
|
||||
console.log(`[TaskWorker] ${this.friendlyName} running preflight for session...`);
|
||||
try {
|
||||
// Step 4a: Getting proxy IP
|
||||
this.setPreflightStep('preflight_ip', 'Detecting proxy IP');
|
||||
await this.runDualPreflights();
|
||||
|
||||
if (this.preflightHttpPassed) {
|
||||
// Step 5: Preflight passed - setting antidetect
|
||||
this.setPreflightStep('antidetect', 'Configuring timezone & geolocation');
|
||||
this.sessionPreflightPassed = true;
|
||||
|
||||
// Step 6: Ready
|
||||
this.setPreflightStep('ready', `Qualified - ${this.geoCity || ''} ${this.geoState}`);
|
||||
console.log(`[TaskWorker] ${this.friendlyName} session preflight PASSED (IP: ${this.preflightHttpResult?.proxyIp || 'unknown'})`);
|
||||
} else {
|
||||
// Preflight failed - release tasks and session
|
||||
this.setPreflightStep('failed', this.preflightHttpResult?.error || 'Preflight failed');
|
||||
console.error(`[TaskWorker] ${this.friendlyName} session preflight FAILED, releasing tasks...`);
|
||||
await WorkerSession.releaseClaimedTasks(this.workerId);
|
||||
await WorkerSession.retireSession(this.workerId);
|
||||
@@ -1729,6 +1756,7 @@ export class TaskWorker {
|
||||
return;
|
||||
}
|
||||
} catch (err: any) {
|
||||
this.setPreflightStep('error', err.message);
|
||||
console.error(`[TaskWorker] ${this.friendlyName} preflight error: ${err.message}`);
|
||||
await WorkerSession.releaseClaimedTasks(this.workerId);
|
||||
await WorkerSession.retireSession(this.workerId);
|
||||
|
||||
Reference in New Issue
Block a user