feat: Stealth worker system with mandatory proxy rotation
## Worker System - Role-agnostic workers that can handle any task type - Pod-based architecture with StatefulSet (5-15 pods, 5 workers each) - Custom pod names (Aethelgard, Xylos, Kryll, etc.) - Worker registry with friendly names and resource monitoring - Hub-and-spoke visualization on JobQueue page ## Stealth & Anti-Detection (REQUIRED) - Proxies are MANDATORY - workers fail to start without active proxies - CrawlRotator initializes on worker startup - Loads proxies from `proxies` table - Auto-rotates proxy + fingerprint on 403 errors - 12 browser fingerprints (Chrome, Firefox, Safari, Edge) - Locale/timezone matching for geographic consistency ## Task System - Renamed product_resync → product_refresh - Task chaining: store_discovery → entry_point → product_discovery - Priority-based claiming with FOR UPDATE SKIP LOCKED - Heartbeat and stale task recovery ## UI Updates - JobQueue: Pod visualization, resource monitoring on hover - WorkersDashboard: Simplified worker list - Removed unused filters from task list ## Other - IP2Location service for visitor analytics - Findagram consumer features scaffolding - Documentation updates 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -463,7 +463,7 @@ router.get('/products', async (req: PublicApiRequest, res: Response) => {
|
||||
|
||||
// Filter by on special
|
||||
if (on_special === 'true' || on_special === '1') {
|
||||
whereClause += ` AND s.is_on_special = TRUE`;
|
||||
whereClause += ` AND s.special = TRUE`;
|
||||
}
|
||||
|
||||
// Search by name or brand
|
||||
@@ -547,7 +547,7 @@ router.get('/products', async (req: PublicApiRequest, res: Response) => {
|
||||
const { rows: countRows } = await pool.query(`
|
||||
SELECT COUNT(*) as total FROM store_products p
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT rec_min_price_cents / 100.0 as price_rec, med_min_price_cents / 100.0 as price_med, special as is_on_special FROM v_product_snapshots
|
||||
SELECT rec_min_price_cents / 100.0 as price_rec, med_min_price_cents / 100.0 as price_med, special FROM v_product_snapshots
|
||||
WHERE store_product_id = p.id
|
||||
ORDER BY crawled_at DESC
|
||||
LIMIT 1
|
||||
@@ -1125,6 +1125,7 @@ router.get('/dispensaries', async (req: PublicApiRequest, res: Response) => {
|
||||
SELECT
|
||||
d.id,
|
||||
d.name,
|
||||
d.slug,
|
||||
d.address1,
|
||||
d.address2,
|
||||
d.city,
|
||||
@@ -1179,6 +1180,7 @@ router.get('/dispensaries', async (req: PublicApiRequest, res: Response) => {
|
||||
const transformedDispensaries = dispensaries.map((d) => ({
|
||||
id: d.id,
|
||||
name: d.name,
|
||||
slug: d.slug || null,
|
||||
address1: d.address1,
|
||||
address2: d.address2,
|
||||
city: d.city,
|
||||
@@ -1876,7 +1878,7 @@ router.get('/stats', async (req: PublicApiRequest, res: Response) => {
|
||||
SELECT
|
||||
(SELECT COUNT(*) FROM store_products) as product_count,
|
||||
(SELECT COUNT(DISTINCT brand_name_raw) FROM store_products WHERE brand_name_raw IS NOT NULL) as brand_count,
|
||||
(SELECT COUNT(*) FROM dispensaries WHERE crawl_enabled = true AND product_count > 0) as dispensary_count
|
||||
(SELECT COUNT(DISTINCT dispensary_id) FROM store_products) as dispensary_count
|
||||
`);
|
||||
|
||||
const s = stats[0] || {};
|
||||
@@ -1996,4 +1998,235 @@ router.get('/menu', async (req: PublicApiRequest, res: Response) => {
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// VISITOR TRACKING & GEOLOCATION
|
||||
// ============================================================
|
||||
|
||||
import crypto from 'crypto';
|
||||
import { GeoLocation, lookupIP } from '../services/ip2location';
|
||||
|
||||
/**
|
||||
* Get location from IP using local IP2Location database
|
||||
*/
|
||||
function getLocationFromIP(ip: string): GeoLocation | null {
|
||||
return lookupIP(ip);
|
||||
}
|
||||
|
||||
/**
|
||||
* Hash IP for privacy (we don't store raw IPs)
|
||||
*/
|
||||
function hashIP(ip: string): string {
|
||||
return crypto.createHash('sha256').update(ip).digest('hex').substring(0, 16);
|
||||
}
|
||||
|
||||
/**
|
||||
* POST /api/v1/visitor/track
|
||||
* Track visitor location for analytics
|
||||
*
|
||||
* Body:
|
||||
* - domain: string (required) - 'findagram.co', 'findadispo.com', etc.
|
||||
* - page_path: string (optional) - current page path
|
||||
* - session_id: string (optional) - client-generated session ID
|
||||
* - referrer: string (optional) - document.referrer
|
||||
*
|
||||
* Returns:
|
||||
* - location: { city, state, lat, lng } for client use
|
||||
*/
|
||||
router.post('/visitor/track', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { domain, page_path, session_id, referrer } = req.body;
|
||||
|
||||
if (!domain) {
|
||||
return res.status(400).json({ error: 'domain is required' });
|
||||
}
|
||||
|
||||
// Get client IP
|
||||
const clientIp = (req.headers['x-forwarded-for'] as string)?.split(',')[0].trim() ||
|
||||
req.headers['x-real-ip'] as string ||
|
||||
req.ip ||
|
||||
req.socket.remoteAddress ||
|
||||
'';
|
||||
|
||||
// Get location from IP (local database lookup)
|
||||
const location = getLocationFromIP(clientIp);
|
||||
|
||||
// Store visit (with hashed IP for privacy)
|
||||
await pool.query(`
|
||||
INSERT INTO visitor_locations (
|
||||
ip_hash, city, state, state_code, country, country_code,
|
||||
latitude, longitude, domain, page_path, referrer, user_agent, session_id
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
|
||||
`, [
|
||||
hashIP(clientIp),
|
||||
location?.city || null,
|
||||
location?.state || null,
|
||||
location?.stateCode || null,
|
||||
location?.country || null,
|
||||
location?.countryCode || null,
|
||||
location?.lat || null,
|
||||
location?.lng || null,
|
||||
domain,
|
||||
page_path || null,
|
||||
referrer || null,
|
||||
req.headers['user-agent'] || null,
|
||||
session_id || null
|
||||
]);
|
||||
|
||||
// Return location to client (for nearby dispensary feature)
|
||||
res.json({
|
||||
success: true,
|
||||
location: location ? {
|
||||
city: location.city,
|
||||
state: location.state,
|
||||
stateCode: location.stateCode,
|
||||
lat: location.lat,
|
||||
lng: location.lng
|
||||
} : null
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('Visitor tracking error:', error);
|
||||
// Don't fail the request - tracking is non-critical
|
||||
res.json({
|
||||
success: false,
|
||||
location: null
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/v1/visitor/location
|
||||
* Get visitor location without tracking (just IP lookup)
|
||||
*/
|
||||
router.get('/visitor/location', (req: Request, res: Response) => {
|
||||
try {
|
||||
const clientIp = (req.headers['x-forwarded-for'] as string)?.split(',')[0].trim() ||
|
||||
req.headers['x-real-ip'] as string ||
|
||||
req.ip ||
|
||||
req.socket.remoteAddress ||
|
||||
'';
|
||||
|
||||
const location = getLocationFromIP(clientIp);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
location: location ? {
|
||||
city: location.city,
|
||||
state: location.state,
|
||||
stateCode: location.stateCode,
|
||||
lat: location.lat,
|
||||
lng: location.lng
|
||||
} : null
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('Location lookup error:', error);
|
||||
res.json({
|
||||
success: false,
|
||||
location: null
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/v1/analytics/visitors
|
||||
* Get visitor analytics (admin only - requires auth)
|
||||
*
|
||||
* Query params:
|
||||
* - domain: filter by domain
|
||||
* - days: number of days to look back (default: 30)
|
||||
* - limit: max results (default: 50)
|
||||
*/
|
||||
router.get('/analytics/visitors', async (req: PublicApiRequest, res: Response) => {
|
||||
try {
|
||||
const scope = req.scope;
|
||||
|
||||
// Only allow internal keys
|
||||
if (!scope || scope.type !== 'internal') {
|
||||
return res.status(403).json({ error: 'Access denied - internal key required' });
|
||||
}
|
||||
|
||||
const { domain, days = '30', limit = '50' } = req.query;
|
||||
const daysNum = Math.min(parseInt(days as string, 10) || 30, 90);
|
||||
const limitNum = Math.min(parseInt(limit as string, 10) || 50, 200);
|
||||
|
||||
let whereClause = 'WHERE created_at > NOW() - $1::interval';
|
||||
const params: any[] = [`${daysNum} days`];
|
||||
let paramIndex = 2;
|
||||
|
||||
if (domain) {
|
||||
whereClause += ` AND domain = $${paramIndex}`;
|
||||
params.push(domain);
|
||||
paramIndex++;
|
||||
}
|
||||
|
||||
// Get top locations
|
||||
const { rows: topLocations } = await pool.query(`
|
||||
SELECT
|
||||
city,
|
||||
state,
|
||||
state_code,
|
||||
country_code,
|
||||
COUNT(*) as visit_count,
|
||||
COUNT(DISTINCT session_id) as unique_sessions,
|
||||
MAX(created_at) as last_visit
|
||||
FROM visitor_locations
|
||||
${whereClause}
|
||||
GROUP BY city, state, state_code, country_code
|
||||
ORDER BY visit_count DESC
|
||||
LIMIT $${paramIndex}
|
||||
`, [...params, limitNum]);
|
||||
|
||||
// Get daily totals
|
||||
const { rows: dailyStats } = await pool.query(`
|
||||
SELECT
|
||||
DATE(created_at) as date,
|
||||
COUNT(*) as visits,
|
||||
COUNT(DISTINCT session_id) as unique_sessions
|
||||
FROM visitor_locations
|
||||
${whereClause}
|
||||
GROUP BY DATE(created_at)
|
||||
ORDER BY date DESC
|
||||
LIMIT 30
|
||||
`, params);
|
||||
|
||||
// Get totals
|
||||
const { rows: totals } = await pool.query(`
|
||||
SELECT
|
||||
COUNT(*) as total_visits,
|
||||
COUNT(DISTINCT session_id) as total_sessions,
|
||||
COUNT(DISTINCT city || state_code) as unique_locations
|
||||
FROM visitor_locations
|
||||
${whereClause}
|
||||
`, params);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
period: {
|
||||
days: daysNum,
|
||||
domain: domain || 'all'
|
||||
},
|
||||
totals: totals[0],
|
||||
top_locations: topLocations.map(l => ({
|
||||
city: l.city,
|
||||
state: l.state,
|
||||
state_code: l.state_code,
|
||||
country_code: l.country_code,
|
||||
visits: parseInt(l.visit_count, 10),
|
||||
unique_sessions: parseInt(l.unique_sessions, 10),
|
||||
last_visit: l.last_visit
|
||||
})),
|
||||
daily_stats: dailyStats.map(d => ({
|
||||
date: d.date,
|
||||
visits: parseInt(d.visits, 10),
|
||||
unique_sessions: parseInt(d.unique_sessions, 10)
|
||||
}))
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('Visitor analytics error:', error);
|
||||
res.status(500).json({
|
||||
error: 'Failed to fetch visitor analytics',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
|
||||
Reference in New Issue
Block a user