## Worker System - Role-agnostic workers that can handle any task type - Pod-based architecture with StatefulSet (5-15 pods, 5 workers each) - Custom pod names (Aethelgard, Xylos, Kryll, etc.) - Worker registry with friendly names and resource monitoring - Hub-and-spoke visualization on JobQueue page ## Stealth & Anti-Detection (REQUIRED) - Proxies are MANDATORY - workers fail to start without active proxies - CrawlRotator initializes on worker startup - Loads proxies from `proxies` table - Auto-rotates proxy + fingerprint on 403 errors - 12 browser fingerprints (Chrome, Firefox, Safari, Edge) - Locale/timezone matching for geographic consistency ## Task System - Renamed product_resync → product_refresh - Task chaining: store_discovery → entry_point → product_discovery - Priority-based claiming with FOR UPDATE SKIP LOCKED - Heartbeat and stale task recovery ## UI Updates - JobQueue: Pod visualization, resource monitoring on hover - WorkersDashboard: Simplified worker list - Removed unused filters from task list ## Other - IP2Location service for visitor analytics - Findagram consumer features scaffolding - Documentation updates 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
72 lines
2.3 KiB
SQL
72 lines
2.3 KiB
SQL
-- Visitor location analytics for Findagram
|
|
-- Tracks visitor locations to understand popular areas
|
|
|
|
CREATE TABLE IF NOT EXISTS visitor_locations (
|
|
id SERIAL PRIMARY KEY,
|
|
|
|
-- Location data (from IP lookup)
|
|
ip_hash VARCHAR(64), -- Hashed IP for privacy (SHA256)
|
|
city VARCHAR(100),
|
|
state VARCHAR(100),
|
|
state_code VARCHAR(10),
|
|
country VARCHAR(100),
|
|
country_code VARCHAR(10),
|
|
latitude DECIMAL(10, 7),
|
|
longitude DECIMAL(10, 7),
|
|
|
|
-- Visit metadata
|
|
domain VARCHAR(50) NOT NULL, -- 'findagram.co', 'findadispo.com', etc.
|
|
page_path VARCHAR(255), -- '/products', '/dispensaries/123', etc.
|
|
referrer VARCHAR(500),
|
|
user_agent VARCHAR(500),
|
|
|
|
-- Session tracking
|
|
session_id VARCHAR(64), -- For grouping page views in a session
|
|
|
|
-- Timestamps
|
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
);
|
|
|
|
-- Indexes for analytics queries
|
|
CREATE INDEX IF NOT EXISTS idx_visitor_locations_domain ON visitor_locations(domain);
|
|
CREATE INDEX IF NOT EXISTS idx_visitor_locations_city_state ON visitor_locations(city, state_code);
|
|
CREATE INDEX IF NOT EXISTS idx_visitor_locations_created_at ON visitor_locations(created_at);
|
|
CREATE INDEX IF NOT EXISTS idx_visitor_locations_session ON visitor_locations(session_id);
|
|
|
|
-- Aggregated daily stats (materialized for performance)
|
|
CREATE TABLE IF NOT EXISTS visitor_location_stats (
|
|
id SERIAL PRIMARY KEY,
|
|
date DATE NOT NULL,
|
|
domain VARCHAR(50) NOT NULL,
|
|
city VARCHAR(100),
|
|
state VARCHAR(100),
|
|
state_code VARCHAR(10),
|
|
country_code VARCHAR(10),
|
|
|
|
-- Metrics
|
|
visit_count INTEGER DEFAULT 0,
|
|
unique_sessions INTEGER DEFAULT 0,
|
|
|
|
UNIQUE(date, domain, city, state_code, country_code)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_visitor_stats_date ON visitor_location_stats(date);
|
|
CREATE INDEX IF NOT EXISTS idx_visitor_stats_domain ON visitor_location_stats(domain);
|
|
CREATE INDEX IF NOT EXISTS idx_visitor_stats_state ON visitor_location_stats(state_code);
|
|
|
|
-- View for easy querying of top locations
|
|
CREATE OR REPLACE VIEW v_top_visitor_locations AS
|
|
SELECT
|
|
domain,
|
|
city,
|
|
state,
|
|
state_code,
|
|
country_code,
|
|
COUNT(*) as total_visits,
|
|
COUNT(DISTINCT session_id) as unique_sessions,
|
|
MAX(created_at) as last_visit
|
|
FROM visitor_locations
|
|
WHERE created_at > NOW() - INTERVAL '30 days'
|
|
GROUP BY domain, city, state, state_code, country_code
|
|
ORDER BY total_visits DESC;
|