feat: Stealth worker system with mandatory proxy rotation

## Worker System
- Role-agnostic workers that can handle any task type
- Pod-based architecture with StatefulSet (5-15 pods, 5 workers each)
- Custom pod names (Aethelgard, Xylos, Kryll, etc.)
- Worker registry with friendly names and resource monitoring
- Hub-and-spoke visualization on JobQueue page

## Stealth & Anti-Detection (REQUIRED)
- Proxies are MANDATORY - workers fail to start without active proxies
- CrawlRotator initializes on worker startup
- Loads proxies from `proxies` table
- Auto-rotates proxy + fingerprint on 403 errors
- 12 browser fingerprints (Chrome, Firefox, Safari, Edge)
- Locale/timezone matching for geographic consistency

## Task System
- Renamed product_resync → product_refresh
- Task chaining: store_discovery → entry_point → product_discovery
- Priority-based claiming with FOR UPDATE SKIP LOCKED
- Heartbeat and stale task recovery

## UI Updates
- JobQueue: Pod visualization, resource monitoring on hover
- WorkersDashboard: Simplified worker list
- Removed unused filters from task list

## Other
- IP2Location service for visitor analytics
- Findagram consumer features scaffolding
- Documentation updates

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-10 00:44:59 -07:00
parent 0295637ed6
commit 56cc171287
61 changed files with 8591 additions and 2076 deletions

View File

@@ -463,7 +463,7 @@ router.get('/products', async (req: PublicApiRequest, res: Response) => {
// Filter by on special
if (on_special === 'true' || on_special === '1') {
whereClause += ` AND s.is_on_special = TRUE`;
whereClause += ` AND s.special = TRUE`;
}
// Search by name or brand
@@ -547,7 +547,7 @@ router.get('/products', async (req: PublicApiRequest, res: Response) => {
const { rows: countRows } = await pool.query(`
SELECT COUNT(*) as total FROM store_products p
LEFT JOIN LATERAL (
SELECT rec_min_price_cents / 100.0 as price_rec, med_min_price_cents / 100.0 as price_med, special as is_on_special FROM v_product_snapshots
SELECT rec_min_price_cents / 100.0 as price_rec, med_min_price_cents / 100.0 as price_med, special FROM v_product_snapshots
WHERE store_product_id = p.id
ORDER BY crawled_at DESC
LIMIT 1
@@ -1125,6 +1125,7 @@ router.get('/dispensaries', async (req: PublicApiRequest, res: Response) => {
SELECT
d.id,
d.name,
d.slug,
d.address1,
d.address2,
d.city,
@@ -1179,6 +1180,7 @@ router.get('/dispensaries', async (req: PublicApiRequest, res: Response) => {
const transformedDispensaries = dispensaries.map((d) => ({
id: d.id,
name: d.name,
slug: d.slug || null,
address1: d.address1,
address2: d.address2,
city: d.city,
@@ -1876,7 +1878,7 @@ router.get('/stats', async (req: PublicApiRequest, res: Response) => {
SELECT
(SELECT COUNT(*) FROM store_products) as product_count,
(SELECT COUNT(DISTINCT brand_name_raw) FROM store_products WHERE brand_name_raw IS NOT NULL) as brand_count,
(SELECT COUNT(*) FROM dispensaries WHERE crawl_enabled = true AND product_count > 0) as dispensary_count
(SELECT COUNT(DISTINCT dispensary_id) FROM store_products) as dispensary_count
`);
const s = stats[0] || {};
@@ -1996,4 +1998,235 @@ router.get('/menu', async (req: PublicApiRequest, res: Response) => {
}
});
// ============================================================
// VISITOR TRACKING & GEOLOCATION
// ============================================================
import crypto from 'crypto';
import { GeoLocation, lookupIP } from '../services/ip2location';
/**
* Get location from IP using local IP2Location database
*/
function getLocationFromIP(ip: string): GeoLocation | null {
return lookupIP(ip);
}
/**
* Hash IP for privacy (we don't store raw IPs)
*/
function hashIP(ip: string): string {
return crypto.createHash('sha256').update(ip).digest('hex').substring(0, 16);
}
/**
* POST /api/v1/visitor/track
* Track visitor location for analytics
*
* Body:
* - domain: string (required) - 'findagram.co', 'findadispo.com', etc.
* - page_path: string (optional) - current page path
* - session_id: string (optional) - client-generated session ID
* - referrer: string (optional) - document.referrer
*
* Returns:
* - location: { city, state, lat, lng } for client use
*/
router.post('/visitor/track', async (req: Request, res: Response) => {
try {
const { domain, page_path, session_id, referrer } = req.body;
if (!domain) {
return res.status(400).json({ error: 'domain is required' });
}
// Get client IP
const clientIp = (req.headers['x-forwarded-for'] as string)?.split(',')[0].trim() ||
req.headers['x-real-ip'] as string ||
req.ip ||
req.socket.remoteAddress ||
'';
// Get location from IP (local database lookup)
const location = getLocationFromIP(clientIp);
// Store visit (with hashed IP for privacy)
await pool.query(`
INSERT INTO visitor_locations (
ip_hash, city, state, state_code, country, country_code,
latitude, longitude, domain, page_path, referrer, user_agent, session_id
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
`, [
hashIP(clientIp),
location?.city || null,
location?.state || null,
location?.stateCode || null,
location?.country || null,
location?.countryCode || null,
location?.lat || null,
location?.lng || null,
domain,
page_path || null,
referrer || null,
req.headers['user-agent'] || null,
session_id || null
]);
// Return location to client (for nearby dispensary feature)
res.json({
success: true,
location: location ? {
city: location.city,
state: location.state,
stateCode: location.stateCode,
lat: location.lat,
lng: location.lng
} : null
});
} catch (error: any) {
console.error('Visitor tracking error:', error);
// Don't fail the request - tracking is non-critical
res.json({
success: false,
location: null
});
}
});
/**
* GET /api/v1/visitor/location
* Get visitor location without tracking (just IP lookup)
*/
router.get('/visitor/location', (req: Request, res: Response) => {
try {
const clientIp = (req.headers['x-forwarded-for'] as string)?.split(',')[0].trim() ||
req.headers['x-real-ip'] as string ||
req.ip ||
req.socket.remoteAddress ||
'';
const location = getLocationFromIP(clientIp);
res.json({
success: true,
location: location ? {
city: location.city,
state: location.state,
stateCode: location.stateCode,
lat: location.lat,
lng: location.lng
} : null
});
} catch (error: any) {
console.error('Location lookup error:', error);
res.json({
success: false,
location: null
});
}
});
/**
* GET /api/v1/analytics/visitors
* Get visitor analytics (admin only - requires auth)
*
* Query params:
* - domain: filter by domain
* - days: number of days to look back (default: 30)
* - limit: max results (default: 50)
*/
router.get('/analytics/visitors', async (req: PublicApiRequest, res: Response) => {
try {
const scope = req.scope;
// Only allow internal keys
if (!scope || scope.type !== 'internal') {
return res.status(403).json({ error: 'Access denied - internal key required' });
}
const { domain, days = '30', limit = '50' } = req.query;
const daysNum = Math.min(parseInt(days as string, 10) || 30, 90);
const limitNum = Math.min(parseInt(limit as string, 10) || 50, 200);
let whereClause = 'WHERE created_at > NOW() - $1::interval';
const params: any[] = [`${daysNum} days`];
let paramIndex = 2;
if (domain) {
whereClause += ` AND domain = $${paramIndex}`;
params.push(domain);
paramIndex++;
}
// Get top locations
const { rows: topLocations } = await pool.query(`
SELECT
city,
state,
state_code,
country_code,
COUNT(*) as visit_count,
COUNT(DISTINCT session_id) as unique_sessions,
MAX(created_at) as last_visit
FROM visitor_locations
${whereClause}
GROUP BY city, state, state_code, country_code
ORDER BY visit_count DESC
LIMIT $${paramIndex}
`, [...params, limitNum]);
// Get daily totals
const { rows: dailyStats } = await pool.query(`
SELECT
DATE(created_at) as date,
COUNT(*) as visits,
COUNT(DISTINCT session_id) as unique_sessions
FROM visitor_locations
${whereClause}
GROUP BY DATE(created_at)
ORDER BY date DESC
LIMIT 30
`, params);
// Get totals
const { rows: totals } = await pool.query(`
SELECT
COUNT(*) as total_visits,
COUNT(DISTINCT session_id) as total_sessions,
COUNT(DISTINCT city || state_code) as unique_locations
FROM visitor_locations
${whereClause}
`, params);
res.json({
success: true,
period: {
days: daysNum,
domain: domain || 'all'
},
totals: totals[0],
top_locations: topLocations.map(l => ({
city: l.city,
state: l.state,
state_code: l.state_code,
country_code: l.country_code,
visits: parseInt(l.visit_count, 10),
unique_sessions: parseInt(l.unique_sessions, 10),
last_visit: l.last_visit
})),
daily_stats: dailyStats.map(d => ({
date: d.date,
visits: parseInt(d.visits, 10),
unique_sessions: parseInt(d.unique_sessions, 10)
}))
});
} catch (error: any) {
console.error('Visitor analytics error:', error);
res.status(500).json({
error: 'Failed to fetch visitor analytics',
message: error.message
});
}
});
export default router;