fix(workers): Report geo to worker_registry when identity claimed
Workers were showing "No geo assigned" on dashboard because geo info was set internally but never reported to worker_registry after identity pool claim. Now updates current_state and current_city columns when identity is claimed, so dashboard shows correct geo assignment. Also documents CI/CD batching rule to minimize build time. 🤖 Generated with [Claude Code](https://claude.ai/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
18
CLAUDE.md
18
CLAUDE.md
@@ -20,23 +20,27 @@ Never deploy unless user explicitly says: "CLAUDE — DEPLOYMENT IS NOW AUTHORIZ
|
|||||||
### 5. DB POOL ONLY
|
### 5. DB POOL ONLY
|
||||||
Never import `src/db/migrate.ts` at runtime. Use `src/db/pool.ts` for DB access.
|
Never import `src/db/migrate.ts` at runtime. Use `src/db/pool.ts` for DB access.
|
||||||
|
|
||||||
### 6. CI/CD DEPLOYMENT — COMMIT AND WAIT
|
### 6. CI/CD DEPLOYMENT — BATCH CHANGES, PUSH ONCE
|
||||||
**Never manually deploy or check deployment status.** The project uses Woodpecker CI.
|
**Never manually deploy or check deployment status.** The project uses Woodpecker CI.
|
||||||
|
|
||||||
|
**CRITICAL: Each CI build takes 30 minutes. NEVER push incrementally.**
|
||||||
|
|
||||||
**Workflow:**
|
**Workflow:**
|
||||||
1. Make code changes
|
1. Make ALL related code changes first
|
||||||
2. `git add` + `git commit`
|
2. Test locally if possible (./setup-local.sh)
|
||||||
3. `git push origin master`
|
3. ONE commit with all changes
|
||||||
4. **STOP** - CI handles the rest
|
4. ONE push to master
|
||||||
5. Wait for user to confirm deployment worked
|
5. **STOP** - CI handles the rest
|
||||||
|
6. Wait for user to confirm deployment worked
|
||||||
|
|
||||||
**DO NOT:**
|
**DO NOT:**
|
||||||
|
- Push multiple small commits (each triggers 30-min build)
|
||||||
- Run `kubectl rollout status` to check deployment
|
- Run `kubectl rollout status` to check deployment
|
||||||
- Run `kubectl logs` to verify new code is running
|
- Run `kubectl logs` to verify new code is running
|
||||||
- Manually restart pods
|
- Manually restart pods
|
||||||
- Check CI pipeline status
|
- Check CI pipeline status
|
||||||
|
|
||||||
Just commit, push, and wait for user feedback.
|
Batch everything, push once, wait for user feedback.
|
||||||
|
|
||||||
### 7. K8S POD LIMITS — CRITICAL
|
### 7. K8S POD LIMITS — CRITICAL
|
||||||
**EXACTLY 8 PODS** for `scraper-worker` deployment. NEVER CHANGE THIS.
|
**EXACTLY 8 PODS** for `scraper-worker` deployment. NEVER CHANGE THIS.
|
||||||
|
|||||||
@@ -510,6 +510,17 @@ export class TaskWorker {
|
|||||||
console.log(`[TaskWorker] Step: ${step}${detail ? ` - ${detail}` : ''} (task #${taskId})`);
|
console.log(`[TaskWorker] Step: ${step}${detail ? ` - ${detail}` : ''} (task #${taskId})`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set preflight step (for dashboard visibility during preflight process)
|
||||||
|
* These steps are shown before any task is claimed/running
|
||||||
|
*/
|
||||||
|
private setPreflightStep(step: string, detail: string): void {
|
||||||
|
this.currentStep = step;
|
||||||
|
this.currentStepDetail = detail;
|
||||||
|
this.currentStepStartedAt = new Date();
|
||||||
|
console.log(`[TaskWorker] Preflight step: ${step} - ${detail}`);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Clear step tracking for a task (when task completes)
|
* Clear step tracking for a task (when task completes)
|
||||||
*/
|
*/
|
||||||
@@ -1542,6 +1553,12 @@ export class TaskWorker {
|
|||||||
if (this.currentIdentity) {
|
if (this.currentIdentity) {
|
||||||
this.geoState = this.currentIdentity.state_code;
|
this.geoState = this.currentIdentity.state_code;
|
||||||
this.geoCity = this.currentIdentity.city;
|
this.geoCity = this.currentIdentity.city;
|
||||||
|
// Update worker_registry with geo info for dashboard
|
||||||
|
await this.pool.query(`
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET current_state = $2, current_city = $3, updated_at = NOW()
|
||||||
|
WHERE worker_id = $1
|
||||||
|
`, [this.workerId, this.geoState, this.geoCity]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
geoValid = await this.ensureGeoSession();
|
geoValid = await this.ensureGeoSession();
|
||||||
@@ -1673,24 +1690,24 @@ export class TaskWorker {
|
|||||||
|
|
||||||
// If no active session, claim new batch of tasks
|
// If no active session, claim new batch of tasks
|
||||||
if (!this.currentSession) {
|
if (!this.currentSession) {
|
||||||
console.log(`[TaskWorker] ${this.friendlyName} claiming new session...`);
|
// Step 1: Initialize stealth
|
||||||
|
this.setPreflightStep('init', 'Initializing stealth plugins');
|
||||||
// Initialize stealth if needed (for fingerprint generation)
|
|
||||||
if (!this.stealthInitialized) {
|
if (!this.stealthInitialized) {
|
||||||
const initSuccess = await this.ensureStealthInitialized();
|
const initSuccess = await this.ensureStealthInitialized();
|
||||||
if (!initSuccess) {
|
if (!initSuccess) {
|
||||||
console.log(`[TaskWorker] ${this.friendlyName} stealth init failed, waiting...`);
|
this.setPreflightStep('init_failed', 'Stealth init failed');
|
||||||
await this.sleep(30000);
|
await this.sleep(30000);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Claim tasks and establish session
|
// Step 2: Claim tasks from pool
|
||||||
|
this.setPreflightStep('claiming', 'Claiming tasks from pool');
|
||||||
|
console.log(`[TaskWorker] ${this.friendlyName} claiming new session...`);
|
||||||
const result = await WorkerSession.claimSessionWithTasks(this.workerId, this.role || undefined);
|
const result = await WorkerSession.claimSessionWithTasks(this.workerId, this.role || undefined);
|
||||||
|
|
||||||
if (!result) {
|
if (!result) {
|
||||||
// No tasks available or couldn't get IP
|
this.setPreflightStep('waiting', 'No tasks available');
|
||||||
console.log(`[TaskWorker] ${this.friendlyName} no session available, waiting...`);
|
|
||||||
await this.sleep(30000);
|
await this.sleep(30000);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -1703,21 +1720,31 @@ export class TaskWorker {
|
|||||||
|
|
||||||
console.log(`[TaskWorker] ${this.friendlyName} new session: ${result.tasks.length} tasks for ${this.geoCity || 'any'}, ${this.geoState} (IP: ${result.session.ip_address})`);
|
console.log(`[TaskWorker] ${this.friendlyName} new session: ${result.tasks.length} tasks for ${this.geoCity || 'any'}, ${this.geoState} (IP: ${result.session.ip_address})`);
|
||||||
|
|
||||||
// Configure proxy in crawl rotator
|
// Step 3: Configure proxy
|
||||||
|
this.setPreflightStep('proxy', `Setting proxy for ${this.geoCity || this.geoState}`);
|
||||||
if (this.sessionProxyUrl) {
|
if (this.sessionProxyUrl) {
|
||||||
this.crawlRotator.setFixedProxy(this.sessionProxyUrl);
|
this.crawlRotator.setFixedProxy(this.sessionProxyUrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run preflight with this session's proxy
|
// Step 4: Run preflight validation
|
||||||
|
this.setPreflightStep('preflight', 'Running browser preflight');
|
||||||
console.log(`[TaskWorker] ${this.friendlyName} running preflight for session...`);
|
console.log(`[TaskWorker] ${this.friendlyName} running preflight for session...`);
|
||||||
try {
|
try {
|
||||||
|
// Step 4a: Getting proxy IP
|
||||||
|
this.setPreflightStep('preflight_ip', 'Detecting proxy IP');
|
||||||
await this.runDualPreflights();
|
await this.runDualPreflights();
|
||||||
|
|
||||||
if (this.preflightHttpPassed) {
|
if (this.preflightHttpPassed) {
|
||||||
|
// Step 5: Preflight passed - setting antidetect
|
||||||
|
this.setPreflightStep('antidetect', 'Configuring timezone & geolocation');
|
||||||
this.sessionPreflightPassed = true;
|
this.sessionPreflightPassed = true;
|
||||||
|
|
||||||
|
// Step 6: Ready
|
||||||
|
this.setPreflightStep('ready', `Qualified - ${this.geoCity || ''} ${this.geoState}`);
|
||||||
console.log(`[TaskWorker] ${this.friendlyName} session preflight PASSED (IP: ${this.preflightHttpResult?.proxyIp || 'unknown'})`);
|
console.log(`[TaskWorker] ${this.friendlyName} session preflight PASSED (IP: ${this.preflightHttpResult?.proxyIp || 'unknown'})`);
|
||||||
} else {
|
} else {
|
||||||
// Preflight failed - release tasks and session
|
// Preflight failed - release tasks and session
|
||||||
|
this.setPreflightStep('failed', this.preflightHttpResult?.error || 'Preflight failed');
|
||||||
console.error(`[TaskWorker] ${this.friendlyName} session preflight FAILED, releasing tasks...`);
|
console.error(`[TaskWorker] ${this.friendlyName} session preflight FAILED, releasing tasks...`);
|
||||||
await WorkerSession.releaseClaimedTasks(this.workerId);
|
await WorkerSession.releaseClaimedTasks(this.workerId);
|
||||||
await WorkerSession.retireSession(this.workerId);
|
await WorkerSession.retireSession(this.workerId);
|
||||||
@@ -1729,6 +1756,7 @@ export class TaskWorker {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
|
this.setPreflightStep('error', err.message);
|
||||||
console.error(`[TaskWorker] ${this.friendlyName} preflight error: ${err.message}`);
|
console.error(`[TaskWorker] ${this.friendlyName} preflight error: ${err.message}`);
|
||||||
await WorkerSession.releaseClaimedTasks(this.workerId);
|
await WorkerSession.releaseClaimedTasks(this.workerId);
|
||||||
await WorkerSession.retireSession(this.workerId);
|
await WorkerSession.retireSession(this.workerId);
|
||||||
|
|||||||
Reference in New Issue
Block a user