feat(workers): Implement geo-based task pools
Workers now follow the correct flow: 1. Check what pools have pending tasks 2. Claim a pool (e.g., Phoenix AZ) 3. Get Evomi proxy for that geo 4. Run preflight with geo proxy 5. Pull tasks from pool (up to 6 stores) 6. Execute tasks 7. Release pool when exhausted (6 stores visited) Task pools group dispensaries by metro area (100mi radius): - Phoenix AZ, Tucson AZ - Los Angeles CA, San Francisco CA, San Diego CA, Sacramento CA - Denver CO, Chicago IL, Boston MA, Detroit MI - Las Vegas NV, Reno NV, Newark NJ, New York NY - Oklahoma City OK, Tulsa OK, Portland OR, Seattle WA Benefits: - Workers know geo BEFORE getting proxy (no more "No geo assigned") - IP diversity within metro area (Phoenix worker can use Tempe IP) - Simpler worker logic - just match pool geo - Pre-organized tasks, not grouped at claim time New files: - migrations/113_task_pools.sql - schema, seed data, functions - src/services/task-pool.ts - TypeScript service Env vars: - USE_TASK_POOLS=true (new system) - USE_IDENTITY_POOL=false (disabled) 🤖 Generated with [Claude Code](https://claude.ai/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
381
backend/migrations/113_task_pools.sql
Normal file
381
backend/migrations/113_task_pools.sql
Normal file
@@ -0,0 +1,381 @@
|
||||
-- Task Pools: Group tasks by geo area for worker assignment
|
||||
-- Workers claim a pool, get proxy for that geo, then pull tasks from pool
|
||||
|
||||
-- ============================================================================
|
||||
-- TASK POOLS TABLE
|
||||
-- ============================================================================
|
||||
-- Each pool represents a metro area (e.g., Phoenix AZ = 100mi radius)
|
||||
-- Dispensaries are assigned to pools based on location
|
||||
-- Workers claim a pool, not individual tasks
|
||||
|
||||
CREATE TABLE IF NOT EXISTS task_pools (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name VARCHAR(100) NOT NULL UNIQUE, -- e.g., 'phoenix_az'
|
||||
display_name VARCHAR(100) NOT NULL, -- e.g., 'Phoenix, AZ'
|
||||
state_code VARCHAR(2) NOT NULL, -- e.g., 'AZ'
|
||||
city VARCHAR(100) NOT NULL, -- e.g., 'Phoenix'
|
||||
latitude DECIMAL(10, 6) NOT NULL, -- pool center lat
|
||||
longitude DECIMAL(10, 6) NOT NULL, -- pool center lng
|
||||
radius_miles INTEGER DEFAULT 100, -- pool radius (100mi default)
|
||||
timezone VARCHAR(50) NOT NULL, -- e.g., 'America/Phoenix'
|
||||
is_active BOOLEAN DEFAULT true,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Index for active pools
|
||||
CREATE INDEX IF NOT EXISTS idx_task_pools_active ON task_pools(is_active) WHERE is_active = true;
|
||||
|
||||
-- ============================================================================
|
||||
-- LINK DISPENSARIES TO POOLS
|
||||
-- ============================================================================
|
||||
-- Add pool_id to dispensaries table
|
||||
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN IF NOT EXISTS pool_id INTEGER REFERENCES task_pools(id);
|
||||
|
||||
-- Index for pool membership
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_pool ON dispensaries(pool_id) WHERE pool_id IS NOT NULL;
|
||||
|
||||
-- ============================================================================
|
||||
-- WORKER POOL ASSIGNMENT
|
||||
-- ============================================================================
|
||||
-- Track which pool a worker is currently assigned to
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS current_pool_id INTEGER REFERENCES task_pools(id),
|
||||
ADD COLUMN IF NOT EXISTS pool_claimed_at TIMESTAMPTZ,
|
||||
ADD COLUMN IF NOT EXISTS pool_stores_visited INTEGER DEFAULT 0,
|
||||
ADD COLUMN IF NOT EXISTS pool_max_stores INTEGER DEFAULT 6;
|
||||
|
||||
-- ============================================================================
|
||||
-- SEED INITIAL POOLS
|
||||
-- ============================================================================
|
||||
-- Major cannabis markets with approximate center coordinates
|
||||
|
||||
INSERT INTO task_pools (name, display_name, state_code, city, latitude, longitude, timezone, radius_miles) VALUES
|
||||
-- Arizona
|
||||
('phoenix_az', 'Phoenix, AZ', 'AZ', 'Phoenix', 33.4484, -112.0740, 'America/Phoenix', 100),
|
||||
('tucson_az', 'Tucson, AZ', 'AZ', 'Tucson', 32.2226, -110.9747, 'America/Phoenix', 75),
|
||||
|
||||
-- California
|
||||
('los_angeles_ca', 'Los Angeles, CA', 'CA', 'Los Angeles', 34.0522, -118.2437, 'America/Los_Angeles', 100),
|
||||
('san_francisco_ca', 'San Francisco, CA', 'CA', 'San Francisco', 37.7749, -122.4194, 'America/Los_Angeles', 75),
|
||||
('san_diego_ca', 'San Diego, CA', 'CA', 'San Diego', 32.7157, -117.1611, 'America/Los_Angeles', 75),
|
||||
('sacramento_ca', 'Sacramento, CA', 'CA', 'Sacramento', 38.5816, -121.4944, 'America/Los_Angeles', 75),
|
||||
|
||||
-- Colorado
|
||||
('denver_co', 'Denver, CO', 'CO', 'Denver', 39.7392, -104.9903, 'America/Denver', 100),
|
||||
|
||||
-- Illinois
|
||||
('chicago_il', 'Chicago, IL', 'IL', 'Chicago', 41.8781, -87.6298, 'America/Chicago', 100),
|
||||
|
||||
-- Massachusetts
|
||||
('boston_ma', 'Boston, MA', 'MA', 'Boston', 42.3601, -71.0589, 'America/New_York', 75),
|
||||
|
||||
-- Michigan
|
||||
('detroit_mi', 'Detroit, MI', 'MI', 'Detroit', 42.3314, -83.0458, 'America/Detroit', 100),
|
||||
|
||||
-- Nevada
|
||||
('las_vegas_nv', 'Las Vegas, NV', 'NV', 'Las Vegas', 36.1699, -115.1398, 'America/Los_Angeles', 75),
|
||||
('reno_nv', 'Reno, NV', 'NV', 'Reno', 39.5296, -119.8138, 'America/Los_Angeles', 50),
|
||||
|
||||
-- New Jersey
|
||||
('newark_nj', 'Newark, NJ', 'NJ', 'Newark', 40.7357, -74.1724, 'America/New_York', 75),
|
||||
|
||||
-- New York
|
||||
('new_york_ny', 'New York, NY', 'NY', 'New York', 40.7128, -74.0060, 'America/New_York', 75),
|
||||
|
||||
-- Oklahoma
|
||||
('oklahoma_city_ok', 'Oklahoma City, OK', 'OK', 'Oklahoma City', 35.4676, -97.5164, 'America/Chicago', 100),
|
||||
('tulsa_ok', 'Tulsa, OK', 'OK', 'Tulsa', 36.1540, -95.9928, 'America/Chicago', 75),
|
||||
|
||||
-- Oregon
|
||||
('portland_or', 'Portland, OR', 'OR', 'Portland', 45.5152, -122.6784, 'America/Los_Angeles', 75),
|
||||
|
||||
-- Washington
|
||||
('seattle_wa', 'Seattle, WA', 'WA', 'Seattle', 47.6062, -122.3321, 'America/Los_Angeles', 100)
|
||||
|
||||
ON CONFLICT (name) DO NOTHING;
|
||||
|
||||
-- ============================================================================
|
||||
-- FUNCTION: Assign dispensary to nearest pool
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE FUNCTION assign_dispensary_to_pool(disp_id INTEGER)
|
||||
RETURNS INTEGER AS $$
|
||||
DECLARE
|
||||
disp_lat DECIMAL(10,6);
|
||||
disp_lng DECIMAL(10,6);
|
||||
nearest_pool_id INTEGER;
|
||||
BEGIN
|
||||
-- Get dispensary coordinates
|
||||
SELECT latitude, longitude INTO disp_lat, disp_lng
|
||||
FROM dispensaries WHERE id = disp_id;
|
||||
|
||||
IF disp_lat IS NULL OR disp_lng IS NULL THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- Find nearest active pool within radius
|
||||
-- Using Haversine approximation (accurate enough for 100mi)
|
||||
SELECT id INTO nearest_pool_id
|
||||
FROM task_pools
|
||||
WHERE is_active = true
|
||||
AND (
|
||||
3959 * acos(
|
||||
cos(radians(latitude)) * cos(radians(disp_lat)) *
|
||||
cos(radians(disp_lng) - radians(longitude)) +
|
||||
sin(radians(latitude)) * sin(radians(disp_lat))
|
||||
)
|
||||
) <= radius_miles
|
||||
ORDER BY (
|
||||
3959 * acos(
|
||||
cos(radians(latitude)) * cos(radians(disp_lat)) *
|
||||
cos(radians(disp_lng) - radians(longitude)) +
|
||||
sin(radians(latitude)) * sin(radians(disp_lat))
|
||||
)
|
||||
)
|
||||
LIMIT 1;
|
||||
|
||||
-- Update dispensary
|
||||
IF nearest_pool_id IS NOT NULL THEN
|
||||
UPDATE dispensaries SET pool_id = nearest_pool_id WHERE id = disp_id;
|
||||
END IF;
|
||||
|
||||
RETURN nearest_pool_id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================================
|
||||
-- FUNCTION: Assign all dispensaries to pools (batch)
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE FUNCTION assign_all_dispensaries_to_pools()
|
||||
RETURNS TABLE(assigned INTEGER, unassigned INTEGER) AS $$
|
||||
DECLARE
|
||||
assigned_count INTEGER := 0;
|
||||
unassigned_count INTEGER := 0;
|
||||
disp RECORD;
|
||||
pool_id INTEGER;
|
||||
BEGIN
|
||||
FOR disp IN SELECT id FROM dispensaries WHERE pool_id IS NULL AND latitude IS NOT NULL LOOP
|
||||
pool_id := assign_dispensary_to_pool(disp.id);
|
||||
IF pool_id IS NOT NULL THEN
|
||||
assigned_count := assigned_count + 1;
|
||||
ELSE
|
||||
unassigned_count := unassigned_count + 1;
|
||||
END IF;
|
||||
END LOOP;
|
||||
|
||||
RETURN QUERY SELECT assigned_count, unassigned_count;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================================
|
||||
-- FUNCTION: Get pools with pending tasks
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE FUNCTION get_pools_with_pending_tasks()
|
||||
RETURNS TABLE(
|
||||
pool_id INTEGER,
|
||||
pool_name VARCHAR(100),
|
||||
display_name VARCHAR(100),
|
||||
state_code VARCHAR(2),
|
||||
city VARCHAR(100),
|
||||
timezone VARCHAR(50),
|
||||
pending_count BIGINT,
|
||||
store_count BIGINT
|
||||
) AS $$
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
SELECT
|
||||
tp.id as pool_id,
|
||||
tp.name as pool_name,
|
||||
tp.display_name,
|
||||
tp.state_code,
|
||||
tp.city,
|
||||
tp.timezone,
|
||||
COUNT(DISTINCT t.id) as pending_count,
|
||||
COUNT(DISTINCT d.id) as store_count
|
||||
FROM task_pools tp
|
||||
JOIN dispensaries d ON d.pool_id = tp.id
|
||||
JOIN tasks t ON t.dispensary_id = d.id AND t.status = 'pending'
|
||||
WHERE tp.is_active = true
|
||||
GROUP BY tp.id, tp.name, tp.display_name, tp.state_code, tp.city, tp.timezone
|
||||
HAVING COUNT(DISTINCT t.id) > 0
|
||||
ORDER BY COUNT(DISTINCT t.id) DESC;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================================
|
||||
-- FUNCTION: Worker claims a pool
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE FUNCTION worker_claim_pool(
|
||||
p_worker_id VARCHAR(100),
|
||||
p_pool_id INTEGER DEFAULT NULL
|
||||
)
|
||||
RETURNS TABLE(
|
||||
pool_id INTEGER,
|
||||
pool_name VARCHAR(100),
|
||||
display_name VARCHAR(100),
|
||||
state_code VARCHAR(2),
|
||||
city VARCHAR(100),
|
||||
latitude DECIMAL(10,6),
|
||||
longitude DECIMAL(10,6),
|
||||
timezone VARCHAR(50)
|
||||
) AS $$
|
||||
DECLARE
|
||||
claimed_pool_id INTEGER;
|
||||
BEGIN
|
||||
-- If no pool specified, pick the one with most pending tasks
|
||||
IF p_pool_id IS NULL THEN
|
||||
SELECT tp.id INTO claimed_pool_id
|
||||
FROM task_pools tp
|
||||
JOIN dispensaries d ON d.pool_id = tp.id
|
||||
JOIN tasks t ON t.dispensary_id = d.id AND t.status = 'pending'
|
||||
WHERE tp.is_active = true
|
||||
GROUP BY tp.id
|
||||
ORDER BY COUNT(DISTINCT t.id) DESC
|
||||
LIMIT 1;
|
||||
ELSE
|
||||
claimed_pool_id := p_pool_id;
|
||||
END IF;
|
||||
|
||||
IF claimed_pool_id IS NULL THEN
|
||||
RETURN;
|
||||
END IF;
|
||||
|
||||
-- Update worker registry with pool assignment
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
current_pool_id = claimed_pool_id,
|
||||
pool_claimed_at = NOW(),
|
||||
pool_stores_visited = 0,
|
||||
pool_max_stores = 6,
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
-- Return pool info
|
||||
RETURN QUERY
|
||||
SELECT
|
||||
tp.id,
|
||||
tp.name,
|
||||
tp.display_name,
|
||||
tp.state_code,
|
||||
tp.city,
|
||||
tp.latitude,
|
||||
tp.longitude,
|
||||
tp.timezone
|
||||
FROM task_pools tp
|
||||
WHERE tp.id = claimed_pool_id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================================
|
||||
-- FUNCTION: Pull tasks from worker's pool (up to 6 stores)
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE FUNCTION pull_tasks_from_pool(
|
||||
p_worker_id VARCHAR(100),
|
||||
p_max_stores INTEGER DEFAULT 6
|
||||
)
|
||||
RETURNS TABLE(
|
||||
task_id INTEGER,
|
||||
dispensary_id INTEGER,
|
||||
dispensary_name VARCHAR(255),
|
||||
role VARCHAR(50),
|
||||
platform VARCHAR(50),
|
||||
method VARCHAR(20)
|
||||
) AS $$
|
||||
DECLARE
|
||||
worker_pool_id INTEGER;
|
||||
stores_visited INTEGER;
|
||||
max_stores INTEGER;
|
||||
stores_remaining INTEGER;
|
||||
BEGIN
|
||||
-- Get worker's current pool and store count
|
||||
SELECT current_pool_id, pool_stores_visited, pool_max_stores
|
||||
INTO worker_pool_id, stores_visited, max_stores
|
||||
FROM worker_registry
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
IF worker_pool_id IS NULL THEN
|
||||
RAISE EXCEPTION 'Worker % has no pool assigned', p_worker_id;
|
||||
END IF;
|
||||
|
||||
stores_remaining := max_stores - stores_visited;
|
||||
IF stores_remaining <= 0 THEN
|
||||
RETURN; -- Worker exhausted
|
||||
END IF;
|
||||
|
||||
-- Claim tasks from pool (one task per store, up to remaining capacity)
|
||||
RETURN QUERY
|
||||
WITH available_stores AS (
|
||||
SELECT DISTINCT ON (d.id)
|
||||
t.id as task_id,
|
||||
d.id as dispensary_id,
|
||||
d.name as dispensary_name,
|
||||
t.role,
|
||||
t.platform,
|
||||
t.method
|
||||
FROM tasks t
|
||||
JOIN dispensaries d ON d.id = t.dispensary_id
|
||||
WHERE d.pool_id = worker_pool_id
|
||||
AND t.status = 'pending'
|
||||
AND t.scheduled_for <= NOW()
|
||||
ORDER BY d.id, t.priority DESC, t.created_at ASC
|
||||
LIMIT stores_remaining
|
||||
),
|
||||
claimed AS (
|
||||
UPDATE tasks
|
||||
SET
|
||||
status = 'claimed',
|
||||
claimed_by = p_worker_id,
|
||||
claimed_at = NOW()
|
||||
WHERE id IN (SELECT task_id FROM available_stores)
|
||||
RETURNING id
|
||||
)
|
||||
SELECT
|
||||
av.task_id,
|
||||
av.dispensary_id,
|
||||
av.dispensary_name,
|
||||
av.role,
|
||||
av.platform,
|
||||
av.method
|
||||
FROM available_stores av
|
||||
WHERE av.task_id IN (SELECT id FROM claimed);
|
||||
|
||||
-- Update worker store count
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
pool_stores_visited = pool_stores_visited + (
|
||||
SELECT COUNT(DISTINCT dispensary_id)
|
||||
FROM tasks
|
||||
WHERE claimed_by = p_worker_id AND status = 'claimed'
|
||||
),
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================================
|
||||
-- FUNCTION: Worker releases pool (exhausted or done)
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE FUNCTION worker_release_pool(p_worker_id VARCHAR(100))
|
||||
RETURNS BOOLEAN AS $$
|
||||
BEGIN
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
current_pool_id = NULL,
|
||||
pool_claimed_at = NULL,
|
||||
pool_stores_visited = 0,
|
||||
current_state = NULL,
|
||||
current_city = NULL,
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
RETURN true;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================================
|
||||
-- RUN: Assign existing dispensaries to pools
|
||||
-- ============================================================================
|
||||
SELECT * FROM assign_all_dispensaries_to_pools();
|
||||
Reference in New Issue
Block a user