feat: Stealth worker system with mandatory proxy rotation

## Worker System
- Role-agnostic workers that can handle any task type
- Pod-based architecture with StatefulSet (5-15 pods, 5 workers each)
- Custom pod names (Aethelgard, Xylos, Kryll, etc.)
- Worker registry with friendly names and resource monitoring
- Hub-and-spoke visualization on JobQueue page

## Stealth & Anti-Detection (REQUIRED)
- Proxies are MANDATORY - workers fail to start without active proxies
- CrawlRotator initializes on worker startup
- Loads proxies from `proxies` table
- Auto-rotates proxy + fingerprint on 403 errors
- 12 browser fingerprints (Chrome, Firefox, Safari, Edge)
- Locale/timezone matching for geographic consistency

## Task System
- Renamed product_resync → product_refresh
- Task chaining: store_discovery → entry_point → product_discovery
- Priority-based claiming with FOR UPDATE SKIP LOCKED
- Heartbeat and stale task recovery

## UI Updates
- JobQueue: Pod visualization, resource monitoring on hover
- WorkersDashboard: Simplified worker list
- Removed unused filters from task list

## Other
- IP2Location service for visitor analytics
- Findagram consumer features scaffolding
- Documentation updates

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-10 00:44:59 -07:00
parent 0295637ed6
commit 56cc171287
61 changed files with 8591 additions and 2076 deletions

View File

@@ -191,6 +191,23 @@ export async function runFullDiscovery(
}
}
// Step 5: Detect dropped stores (in DB but not in discovery results)
if (!dryRun) {
console.log('\n[Discovery] Step 5: Detecting dropped stores...');
const droppedResult = await detectDroppedStores(pool, stateCode);
if (droppedResult.droppedCount > 0) {
console.log(`[Discovery] Found ${droppedResult.droppedCount} dropped stores:`);
droppedResult.droppedStores.slice(0, 10).forEach(s => {
console.log(` - ${s.name} (${s.city}, ${s.state}) - last seen: ${s.lastSeenAt}`);
});
if (droppedResult.droppedCount > 10) {
console.log(` ... and ${droppedResult.droppedCount - 10} more`);
}
} else {
console.log(`[Discovery] No dropped stores detected`);
}
}
return {
cities: cityResult,
locations: locationResults,
@@ -200,6 +217,107 @@ export async function runFullDiscovery(
};
}
// ============================================================
// DROPPED STORE DETECTION
// ============================================================
export interface DroppedStoreResult {
droppedCount: number;
droppedStores: Array<{
id: number;
name: string;
city: string;
state: string;
platformDispensaryId: string;
lastSeenAt: string;
}>;
}
/**
* Detect stores that exist in dispensaries but were not found in discovery.
* Marks them as status='dropped' for manual review.
*
* A store is considered "dropped" if:
* 1. It has a platform_dispensary_id (was verified via Dutchie)
* 2. It was NOT seen in the latest discovery crawl (last_seen_at in discovery < 24h ago)
* 3. It's currently marked as 'open' status
*/
export async function detectDroppedStores(
pool: Pool,
stateCode?: string
): Promise<DroppedStoreResult> {
// Find dispensaries that:
// 1. Have platform_dispensary_id (verified Dutchie stores)
// 2. Are currently 'open' status
// 3. Have a linked discovery record that wasn't seen in the last discovery run
// (last_seen_at in dutchie_discovery_locations is older than 24 hours)
const params: any[] = [];
let stateFilter = '';
if (stateCode) {
stateFilter = ` AND d.state = $1`;
params.push(stateCode);
}
const query = `
WITH recently_seen AS (
SELECT DISTINCT platform_location_id
FROM dutchie_discovery_locations
WHERE last_seen_at > NOW() - INTERVAL '24 hours'
AND active = true
)
SELECT
d.id,
d.name,
d.city,
d.state,
d.platform_dispensary_id,
d.updated_at as last_seen_at
FROM dispensaries d
WHERE d.platform_dispensary_id IS NOT NULL
AND d.platform = 'dutchie'
AND (d.status = 'open' OR d.status IS NULL)
AND d.crawl_enabled = true
AND d.platform_dispensary_id NOT IN (SELECT platform_location_id FROM recently_seen)
${stateFilter}
ORDER BY d.name
`;
const result = await pool.query(query, params);
const droppedStores = result.rows;
// Mark these stores as 'dropped' status
if (droppedStores.length > 0) {
const ids = droppedStores.map(s => s.id);
await pool.query(`
UPDATE dispensaries
SET status = 'dropped', updated_at = NOW()
WHERE id = ANY($1::int[])
`, [ids]);
// Log to promotion log for audit
for (const store of droppedStores) {
await pool.query(`
INSERT INTO dutchie_promotion_log
(dispensary_id, action, state_code, store_name, triggered_by)
VALUES ($1, 'dropped', $2, $3, 'discovery_detection')
`, [store.id, store.state, store.name]);
}
}
return {
droppedCount: droppedStores.length,
droppedStores: droppedStores.map(s => ({
id: s.id,
name: s.name,
city: s.city,
state: s.state,
platformDispensaryId: s.platform_dispensary_id,
lastSeenAt: s.last_seen_at,
})),
};
}
// ============================================================
// SINGLE CITY DISCOVERY
// ============================================================