Compare commits

...

7 Commits

Author SHA1 Message Date
Kelly
6490df9faf feat(tasks): Consolidate schedule management into task_schedules
- Add schedule CRUD endpoints to /api/tasks/schedules
- Add Schedules section to TasksDashboard with edit/delete/bulk actions
- Deprecate job_schedules table (entries disabled in DB)
- Mark CrawlSchedulePage as deprecated (removed from menu)
- Add deprecation comments to legacy schedule methods in api.ts
- Add migration comments to workers.ts explaining consolidation

Key changes:
- Schedule management now at /admin/tasks instead of /admin/schedule
- task_schedules uses interval_hours (simpler than base_interval_minutes + jitter)
- All schedule routes placed before /:id to avoid Express route conflicts

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 01:15:21 -07:00
kelly
a077f81c65 Merge pull request 'fix(preflight): Phase 2 - Correct parameter order and add IP/fingerprint reporting' (#56) from feat/preflight-phase2-reporting into master 2025-12-12 07:35:02 +00:00
Kelly
6bcadd9e71 fix(preflight): Correct parameter order and add IP/fingerprint reporting
- Fix update_worker_preflight call to use correct parameter order:
  (worker_id, transport, status, ip, response_ms, error, fingerprint)
- Add proxyIp to both curl and http preflight reports
- Add fingerprint JSONB with timezone, location, and bot detection data
- Log HTTP IP and timezone after preflight completes

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 00:32:45 -07:00
kelly
a77bf8611a Merge pull request 'feat(workers): Preflight phase 1 - Schema, StatefulSet, and timezone matching' (#55) from feat/preflight-phase1-schema into master 2025-12-12 07:30:53 +00:00
Kelly
33feca3138 fix(antidetect): Match browser timezone to proxy IP location
- Add IP geolocation lookup via ip-api.com to get timezone from proxy IP
- Use ipify.org API for reliable proxy IP detection (replaces unreliable fingerprint.com scraping)
- Set browser timezone via CDP Emulation.setTimezoneOverride to match proxy location
- Add detectedTimezone and detectedLocation to preflight result
- Add /api/worker-registry/preflight-test endpoint for smoke testing

Fixes timezone mismatch where browser showed America/Phoenix while proxy was in America/New_York

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 00:25:39 -07:00
kelly
7d85a97b63 Merge pull request 'feat: Preflight schema and StatefulSet' (#54) from feat/preflight-phase1-schema into master
Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/54
2025-12-12 07:14:40 +00:00
Kelly
ce081effd4 feat(workers): Add preflight schema and StatefulSet
- Migration 085: Add curl_ip, http_ip, fingerprint_data, preflight_status,
  preflight_at columns to worker_registry
- StatefulSet manifest for 8 persistent workers with OnDelete update strategy

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-11 23:45:04 -07:00
10 changed files with 1528 additions and 25 deletions

View File

@@ -0,0 +1,77 @@
apiVersion: v1
kind: Service
metadata:
name: scraper-worker
namespace: dispensary-scraper
labels:
app: scraper-worker
spec:
clusterIP: None # Headless service required for StatefulSet
selector:
app: scraper-worker
ports:
- port: 3010
name: http
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: scraper-worker
namespace: dispensary-scraper
spec:
serviceName: scraper-worker
replicas: 8
podManagementPolicy: Parallel # Start all pods at once
updateStrategy:
type: OnDelete # Pods only update when manually deleted - no automatic restarts
selector:
matchLabels:
app: scraper-worker
template:
metadata:
labels:
app: scraper-worker
spec:
terminationGracePeriodSeconds: 60
imagePullSecrets:
- name: regcred
containers:
- name: worker
image: code.cannabrands.app/creationshop/dispensary-scraper:latest
imagePullPolicy: Always
command: ["node"]
args: ["dist/tasks/task-worker.js"]
env:
- name: WORKER_MODE
value: "true"
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: MAX_CONCURRENT_TASKS
value: "50"
- name: API_BASE_URL
value: http://scraper
- name: NODE_OPTIONS
value: --max-old-space-size=1500
envFrom:
- configMapRef:
name: scraper-config
- secretRef:
name: scraper-secrets
resources:
requests:
cpu: 100m
memory: 1Gi
limits:
cpu: 500m
memory: 2Gi
livenessProbe:
exec:
command:
- /bin/sh
- -c
- pgrep -f 'task-worker' > /dev/null
initialDelaySeconds: 10
periodSeconds: 30
failureThreshold: 3

View File

@@ -0,0 +1,168 @@
-- Migration 085: Add IP and fingerprint columns for preflight reporting
-- These columns were missing from migration 084
-- ===================================================================
-- PART 1: Add IP address columns to worker_registry
-- ===================================================================
-- IP address detected during curl/axios preflight
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS curl_ip VARCHAR(45);
-- IP address detected during http/Puppeteer preflight
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS http_ip VARCHAR(45);
-- ===================================================================
-- PART 2: Add fingerprint data column
-- ===================================================================
-- Browser fingerprint data captured during Puppeteer preflight
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS fingerprint_data JSONB;
-- ===================================================================
-- PART 3: Add combined preflight status/timestamp for convenience
-- ===================================================================
-- Overall preflight status (computed from both transports)
-- Values: 'pending', 'passed', 'partial', 'failed'
-- - 'pending': neither transport tested
-- - 'passed': both transports passed (or http passed for browser-only)
-- - 'partial': at least one passed
-- - 'failed': no transport passed
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS preflight_status VARCHAR(20) DEFAULT 'pending';
-- Most recent preflight completion timestamp
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS preflight_at TIMESTAMPTZ;
-- ===================================================================
-- PART 4: Update function to set preflight status
-- ===================================================================
CREATE OR REPLACE FUNCTION update_worker_preflight(
p_worker_id VARCHAR(100),
p_transport VARCHAR(10), -- 'curl' or 'http'
p_status VARCHAR(20), -- 'passed', 'failed', 'skipped'
p_ip VARCHAR(45) DEFAULT NULL,
p_response_ms INTEGER DEFAULT NULL,
p_error TEXT DEFAULT NULL,
p_fingerprint JSONB DEFAULT NULL
) RETURNS VOID AS $$
DECLARE
v_curl_status VARCHAR(20);
v_http_status VARCHAR(20);
v_overall_status VARCHAR(20);
BEGIN
IF p_transport = 'curl' THEN
UPDATE worker_registry
SET
preflight_curl_status = p_status,
preflight_curl_at = NOW(),
preflight_curl_ms = p_response_ms,
preflight_curl_error = p_error,
curl_ip = p_ip,
updated_at = NOW()
WHERE worker_id = p_worker_id;
ELSIF p_transport = 'http' THEN
UPDATE worker_registry
SET
preflight_http_status = p_status,
preflight_http_at = NOW(),
preflight_http_ms = p_response_ms,
preflight_http_error = p_error,
http_ip = p_ip,
fingerprint_data = COALESCE(p_fingerprint, fingerprint_data),
updated_at = NOW()
WHERE worker_id = p_worker_id;
END IF;
-- Update overall preflight status
SELECT preflight_curl_status, preflight_http_status
INTO v_curl_status, v_http_status
FROM worker_registry
WHERE worker_id = p_worker_id;
-- Compute overall status
IF v_curl_status = 'passed' AND v_http_status = 'passed' THEN
v_overall_status := 'passed';
ELSIF v_curl_status = 'passed' OR v_http_status = 'passed' THEN
v_overall_status := 'partial';
ELSIF v_curl_status = 'failed' OR v_http_status = 'failed' THEN
v_overall_status := 'failed';
ELSE
v_overall_status := 'pending';
END IF;
UPDATE worker_registry
SET
preflight_status = v_overall_status,
preflight_at = NOW()
WHERE worker_id = p_worker_id;
END;
$$ LANGUAGE plpgsql;
-- ===================================================================
-- PART 5: Update v_active_workers view
-- ===================================================================
DROP VIEW IF EXISTS v_active_workers;
CREATE VIEW v_active_workers AS
SELECT
wr.id,
wr.worker_id,
wr.friendly_name,
wr.role,
wr.status,
wr.pod_name,
wr.hostname,
wr.started_at,
wr.last_heartbeat_at,
wr.last_task_at,
wr.tasks_completed,
wr.tasks_failed,
wr.current_task_id,
-- IP addresses from preflights
wr.curl_ip,
wr.http_ip,
-- Combined preflight status
wr.preflight_status,
wr.preflight_at,
-- Detailed preflight status per transport
wr.preflight_curl_status,
wr.preflight_http_status,
wr.preflight_curl_at,
wr.preflight_http_at,
wr.preflight_curl_error,
wr.preflight_http_error,
wr.preflight_curl_ms,
wr.preflight_http_ms,
-- Fingerprint data
wr.fingerprint_data,
-- Computed fields
EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
CASE
WHEN wr.status = 'offline' THEN 'offline'
WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
WHEN wr.current_task_id IS NOT NULL THEN 'busy'
ELSE 'ready'
END as health_status,
-- Capability flags (can this worker handle curl/http tasks?)
(wr.preflight_curl_status = 'passed') as can_curl,
(wr.preflight_http_status = 'passed') as can_http
FROM worker_registry wr
WHERE wr.status != 'terminated'
ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
-- ===================================================================
-- Comments
-- ===================================================================
COMMENT ON COLUMN worker_registry.curl_ip IS 'IP address detected during curl/axios preflight';
COMMENT ON COLUMN worker_registry.http_ip IS 'IP address detected during Puppeteer preflight';
COMMENT ON COLUMN worker_registry.fingerprint_data IS 'Browser fingerprint captured during Puppeteer preflight';
COMMENT ON COLUMN worker_registry.preflight_status IS 'Overall preflight status: pending, passed, partial, failed';
COMMENT ON COLUMN worker_registry.preflight_at IS 'Most recent preflight completion timestamp';

View File

@@ -3,6 +3,24 @@
* *
* Endpoints for managing worker tasks, viewing capacity metrics, * Endpoints for managing worker tasks, viewing capacity metrics,
* and generating batch tasks. * and generating batch tasks.
*
* SCHEDULE MANAGEMENT (added 2025-12-12):
* This file now contains the canonical schedule management endpoints.
* The job_schedules table has been deprecated and all schedule management
* is now consolidated into task_schedules:
*
* Schedule endpoints:
* GET /api/tasks/schedules - List all schedules
* POST /api/tasks/schedules - Create new schedule
* GET /api/tasks/schedules/:id - Get schedule by ID
* PUT /api/tasks/schedules/:id - Update schedule
* DELETE /api/tasks/schedules/:id - Delete schedule
* DELETE /api/tasks/schedules - Bulk delete schedules
* POST /api/tasks/schedules/:id/run-now - Trigger schedule immediately
* POST /api/tasks/schedules/:id/toggle - Toggle schedule enabled/disabled
*
* Note: Schedule routes are defined BEFORE /:id to avoid route conflicts
* (Express matches routes in order, and "schedules" would match /:id otherwise)
*/ */
import { Router, Request, Response } from 'express'; import { Router, Request, Response } from 'express';
@@ -131,6 +149,366 @@ router.get('/capacity/:role', async (req: Request, res: Response) => {
} }
}); });
// ============================================================
// SCHEDULE MANAGEMENT ROUTES
// (Must be before /:id to avoid route conflicts)
// ============================================================
/**
* GET /api/tasks/schedules
* List all task schedules
*/
router.get('/schedules', async (req: Request, res: Response) => {
try {
const enabledOnly = req.query.enabled === 'true';
let query = `
SELECT id, name, role, description, enabled, interval_hours,
priority, state_code, platform, last_run_at, next_run_at,
last_task_count, last_error, created_at, updated_at
FROM task_schedules
`;
if (enabledOnly) {
query += ` WHERE enabled = true`;
}
query += ` ORDER BY name`;
const result = await pool.query(query);
res.json({ schedules: result.rows });
} catch (error: unknown) {
console.error('Error listing schedules:', error);
res.status(500).json({ error: 'Failed to list schedules' });
}
});
/**
* DELETE /api/tasks/schedules
* Bulk delete schedules
*
* Body:
* - ids: number[] (required) - array of schedule IDs to delete
* - all: boolean (optional) - if true, delete all schedules (ids ignored)
*/
router.delete('/schedules', async (req: Request, res: Response) => {
try {
const { ids, all } = req.body;
let result;
if (all === true) {
// Delete all schedules
result = await pool.query(`
DELETE FROM task_schedules RETURNING id, name
`);
} else if (Array.isArray(ids) && ids.length > 0) {
// Delete specific schedules by IDs
result = await pool.query(`
DELETE FROM task_schedules WHERE id = ANY($1) RETURNING id, name
`, [ids]);
} else {
return res.status(400).json({
error: 'Either provide ids array or set all=true',
});
}
res.json({
success: true,
deleted_count: result.rowCount,
deleted: result.rows,
message: `Deleted ${result.rowCount} schedule(s)`,
});
} catch (error: unknown) {
console.error('Error bulk deleting schedules:', error);
res.status(500).json({ error: 'Failed to delete schedules' });
}
});
/**
* POST /api/tasks/schedules
* Create a new schedule
*
* Body:
* - name: string (required, unique)
* - role: TaskRole (required)
* - description: string (optional)
* - enabled: boolean (default true)
* - interval_hours: number (required)
* - priority: number (default 0)
* - state_code: string (optional)
* - platform: string (optional)
*/
router.post('/schedules', async (req: Request, res: Response) => {
try {
const {
name,
role,
description,
enabled = true,
interval_hours,
priority = 0,
state_code,
platform,
} = req.body;
if (!name || !role || !interval_hours) {
return res.status(400).json({
error: 'name, role, and interval_hours are required',
});
}
// Calculate next_run_at based on interval
const nextRunAt = new Date(Date.now() + interval_hours * 60 * 60 * 1000);
const result = await pool.query(`
INSERT INTO task_schedules
(name, role, description, enabled, interval_hours, priority, state_code, platform, next_run_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
RETURNING id, name, role, description, enabled, interval_hours,
priority, state_code, platform, last_run_at, next_run_at,
last_task_count, last_error, created_at, updated_at
`, [name, role, description, enabled, interval_hours, priority, state_code, platform, nextRunAt]);
res.status(201).json(result.rows[0]);
} catch (error: any) {
if (error.code === '23505') {
// Unique constraint violation
return res.status(409).json({ error: 'A schedule with this name already exists' });
}
console.error('Error creating schedule:', error);
res.status(500).json({ error: 'Failed to create schedule' });
}
});
/**
* GET /api/tasks/schedules/:id
* Get a specific schedule by ID
*/
router.get('/schedules/:id', async (req: Request, res: Response) => {
try {
const scheduleId = parseInt(req.params.id, 10);
const result = await pool.query(`
SELECT id, name, role, description, enabled, interval_hours,
priority, state_code, platform, last_run_at, next_run_at,
last_task_count, last_error, created_at, updated_at
FROM task_schedules
WHERE id = $1
`, [scheduleId]);
if (result.rows.length === 0) {
return res.status(404).json({ error: 'Schedule not found' });
}
res.json(result.rows[0]);
} catch (error: unknown) {
console.error('Error getting schedule:', error);
res.status(500).json({ error: 'Failed to get schedule' });
}
});
/**
* PUT /api/tasks/schedules/:id
* Update an existing schedule
*/
router.put('/schedules/:id', async (req: Request, res: Response) => {
try {
const scheduleId = parseInt(req.params.id, 10);
const {
name,
role,
description,
enabled,
interval_hours,
priority,
state_code,
platform,
} = req.body;
// Build dynamic update query
const updates: string[] = [];
const values: any[] = [];
let paramIndex = 1;
if (name !== undefined) {
updates.push(`name = $${paramIndex++}`);
values.push(name);
}
if (role !== undefined) {
updates.push(`role = $${paramIndex++}`);
values.push(role);
}
if (description !== undefined) {
updates.push(`description = $${paramIndex++}`);
values.push(description);
}
if (enabled !== undefined) {
updates.push(`enabled = $${paramIndex++}`);
values.push(enabled);
}
if (interval_hours !== undefined) {
updates.push(`interval_hours = $${paramIndex++}`);
values.push(interval_hours);
// Recalculate next_run_at if interval changed
const nextRunAt = new Date(Date.now() + interval_hours * 60 * 60 * 1000);
updates.push(`next_run_at = $${paramIndex++}`);
values.push(nextRunAt);
}
if (priority !== undefined) {
updates.push(`priority = $${paramIndex++}`);
values.push(priority);
}
if (state_code !== undefined) {
updates.push(`state_code = $${paramIndex++}`);
values.push(state_code || null);
}
if (platform !== undefined) {
updates.push(`platform = $${paramIndex++}`);
values.push(platform || null);
}
if (updates.length === 0) {
return res.status(400).json({ error: 'No fields to update' });
}
updates.push('updated_at = NOW()');
values.push(scheduleId);
const result = await pool.query(`
UPDATE task_schedules
SET ${updates.join(', ')}
WHERE id = $${paramIndex}
RETURNING id, name, role, description, enabled, interval_hours,
priority, state_code, platform, last_run_at, next_run_at,
last_task_count, last_error, created_at, updated_at
`, values);
if (result.rows.length === 0) {
return res.status(404).json({ error: 'Schedule not found' });
}
res.json(result.rows[0]);
} catch (error: any) {
if (error.code === '23505') {
return res.status(409).json({ error: 'A schedule with this name already exists' });
}
console.error('Error updating schedule:', error);
res.status(500).json({ error: 'Failed to update schedule' });
}
});
/**
* DELETE /api/tasks/schedules/:id
* Delete a schedule
*/
router.delete('/schedules/:id', async (req: Request, res: Response) => {
try {
const scheduleId = parseInt(req.params.id, 10);
const result = await pool.query(`
DELETE FROM task_schedules WHERE id = $1 RETURNING id, name
`, [scheduleId]);
if (result.rows.length === 0) {
return res.status(404).json({ error: 'Schedule not found' });
}
res.json({
success: true,
message: `Schedule "${result.rows[0].name}" deleted`,
});
} catch (error: unknown) {
console.error('Error deleting schedule:', error);
res.status(500).json({ error: 'Failed to delete schedule' });
}
});
/**
* POST /api/tasks/schedules/:id/run-now
* Manually trigger a scheduled task to run immediately
*/
router.post('/schedules/:id/run-now', async (req: Request, res: Response) => {
try {
const scheduleId = parseInt(req.params.id, 10);
// Get the schedule
const scheduleResult = await pool.query(`
SELECT id, name, role, state_code, platform, priority
FROM task_schedules WHERE id = $1
`, [scheduleId]);
if (scheduleResult.rows.length === 0) {
return res.status(404).json({ error: 'Schedule not found' });
}
const schedule = scheduleResult.rows[0];
// Create a task based on the schedule
const task = await taskService.createTask({
role: schedule.role,
platform: schedule.platform,
priority: schedule.priority + 10, // Boost priority for manual runs
});
// Update last_run_at on the schedule
await pool.query(`
UPDATE task_schedules
SET last_run_at = NOW(),
next_run_at = NOW() + (interval_hours || ' hours')::interval,
updated_at = NOW()
WHERE id = $1
`, [scheduleId]);
res.json({
success: true,
message: `Schedule "${schedule.name}" triggered`,
task,
});
} catch (error: unknown) {
console.error('Error running schedule:', error);
res.status(500).json({ error: 'Failed to run schedule' });
}
});
/**
* POST /api/tasks/schedules/:id/toggle
* Toggle a schedule's enabled status
*/
router.post('/schedules/:id/toggle', async (req: Request, res: Response) => {
try {
const scheduleId = parseInt(req.params.id, 10);
const result = await pool.query(`
UPDATE task_schedules
SET enabled = NOT enabled,
updated_at = NOW()
WHERE id = $1
RETURNING id, name, enabled
`, [scheduleId]);
if (result.rows.length === 0) {
return res.status(404).json({ error: 'Schedule not found' });
}
res.json({
success: true,
schedule: result.rows[0],
message: result.rows[0].enabled
? `Schedule "${result.rows[0].name}" enabled`
: `Schedule "${result.rows[0].name}" disabled`,
});
} catch (error: unknown) {
console.error('Error toggling schedule:', error);
res.status(500).json({ error: 'Failed to toggle schedule' });
}
});
// ============================================================
// TASK-SPECIFIC ROUTES (with :id parameter)
// ============================================================
/** /**
* GET /api/tasks/:id * GET /api/tasks/:id
* Get a specific task by ID * Get a specific task by ID

View File

@@ -23,6 +23,8 @@
import { Router, Request, Response } from 'express'; import { Router, Request, Response } from 'express';
import { pool } from '../db/pool'; import { pool } from '../db/pool';
import os from 'os'; import os from 'os';
import { runPuppeteerPreflightWithRetry } from '../services/puppeteer-preflight';
import { CrawlRotator } from '../services/crawl-rotator';
const router = Router(); const router = Router();
@@ -864,4 +866,58 @@ router.get('/pods', async (_req: Request, res: Response) => {
} }
}); });
// ============================================================
// PREFLIGHT SMOKE TEST
// ============================================================
/**
* POST /api/worker-registry/preflight-test
* Run an HTTP (Puppeteer) preflight test and return results
*
* This is a smoke test endpoint to verify the preflight system works.
* Returns IP, fingerprint data, bot detection results, and products fetched.
*/
router.post('/preflight-test', async (_req: Request, res: Response) => {
try {
console.log('[PreflightTest] Starting HTTP preflight smoke test...');
// Create a temporary CrawlRotator for the test
const crawlRotator = new CrawlRotator();
// Run the Puppeteer preflight (with 1 retry)
const startTime = Date.now();
const result = await runPuppeteerPreflightWithRetry(crawlRotator, 1);
const duration = Date.now() - startTime;
console.log(`[PreflightTest] Completed in ${duration}ms - passed: ${result.passed}`);
res.json({
success: true,
test: 'http_preflight',
duration_ms: duration,
result: {
passed: result.passed,
proxy_ip: result.proxyIp,
fingerprint: result.fingerprint,
bot_detection: result.botDetection,
products_returned: result.productsReturned,
browser_user_agent: result.browserUserAgent,
ip_verified: result.ipVerified,
proxy_available: result.proxyAvailable,
proxy_connected: result.proxyConnected,
antidetect_ready: result.antidetectReady,
response_time_ms: result.responseTimeMs,
error: result.error
}
});
} catch (error: any) {
console.error('[PreflightTest] Error:', error.message);
res.status(500).json({
success: false,
test: 'http_preflight',
error: error.message
});
}
});
export default router; export default router;

View File

@@ -4,10 +4,25 @@
* Provider-agnostic worker management and job monitoring. * Provider-agnostic worker management and job monitoring.
* Replaces legacy /api/dutchie-az/admin/schedules and /api/dutchie-az/monitor/* routes. * Replaces legacy /api/dutchie-az/admin/schedules and /api/dutchie-az/monitor/* routes.
* *
* DEPRECATION NOTE (2025-12-12):
* This file still queries job_schedules for backwards compatibility with
* the /api/workers endpoints that display worker status. However, the
* job_schedules table is DEPRECATED - all entries have been disabled.
*
* Schedule management has been consolidated into task_schedules:
* - Use /api/tasks/schedules for schedule CRUD operations
* - Use TasksDashboard.tsx (/admin/tasks) for schedule management UI
* - task_schedules uses interval_hours (simpler than base_interval_minutes + jitter)
*
* The /api/workers endpoints remain useful for:
* - Monitoring active workers and job status
* - K8s scaling controls
* - Job history and logs
*
* Endpoints: * Endpoints:
* GET /api/workers - List all workers/schedules * GET /api/workers - List all workers/schedules
* GET /api/workers/active - List currently active workers * GET /api/workers/active - List currently active workers
* GET /api/workers/schedule - Get all job schedules * GET /api/workers/schedule - Get all job schedules (DEPRECATED - use /api/tasks/schedules)
* GET /api/workers/:workerName - Get specific worker details * GET /api/workers/:workerName - Get specific worker details
* GET /api/workers/:workerName/scope - Get worker's scope (states, etc.) * GET /api/workers/:workerName/scope - Get worker's scope (states, etc.)
* GET /api/workers/:workerName/stats - Get worker statistics * GET /api/workers/:workerName/stats - Get worker statistics

View File

@@ -26,6 +26,34 @@ const TEST_PLATFORM_ID = '6405ef617056e8014d79101b';
const FINGERPRINT_DEMO_URL = 'https://demo.fingerprint.com/'; const FINGERPRINT_DEMO_URL = 'https://demo.fingerprint.com/';
const AMIUNIQUE_URL = 'https://amiunique.org/fingerprint'; const AMIUNIQUE_URL = 'https://amiunique.org/fingerprint';
// IP geolocation API for timezone lookup (free, no key required)
const IP_API_URL = 'http://ip-api.com/json';
/**
* Look up timezone from IP address using ip-api.com
* Returns IANA timezone (e.g., 'America/New_York') or null on failure
*/
async function getTimezoneFromIp(ip: string): Promise<{ timezone: string; city?: string; region?: string } | null> {
try {
const axios = require('axios');
const response = await axios.get(`${IP_API_URL}/${ip}?fields=status,timezone,city,regionName`, {
timeout: 5000,
});
if (response.data?.status === 'success' && response.data?.timezone) {
return {
timezone: response.data.timezone,
city: response.data.city,
region: response.data.regionName,
};
}
return null;
} catch (err: any) {
console.log(`[PuppeteerPreflight] IP geolocation lookup failed: ${err.message}`);
return null;
}
}
export interface PuppeteerPreflightResult extends PreflightResult { export interface PuppeteerPreflightResult extends PreflightResult {
method: 'http'; method: 'http';
/** Number of products returned (proves API access) */ /** Number of products returned (proves API access) */
@@ -42,6 +70,13 @@ export interface PuppeteerPreflightResult extends PreflightResult {
expectedProxyIp?: string; expectedProxyIp?: string;
/** Whether IP verification passed (detected IP matches proxy) */ /** Whether IP verification passed (detected IP matches proxy) */
ipVerified?: boolean; ipVerified?: boolean;
/** Detected timezone from IP geolocation */
detectedTimezone?: string;
/** Detected location from IP geolocation */
detectedLocation?: {
city?: string;
region?: string;
};
} }
/** /**
@@ -136,7 +171,52 @@ export async function runPuppeteerPreflight(
}; };
// ========================================================================= // =========================================================================
// STEP 1: Visit fingerprint.com demo to verify anti-detect and get IP // STEP 1a: Get IP address directly via simple API (more reliable than scraping)
// =========================================================================
console.log(`[PuppeteerPreflight] Getting proxy IP address...`);
try {
const ipApiResponse = await page.evaluate(async () => {
try {
const response = await fetch('https://api.ipify.org?format=json');
const data = await response.json();
return { ip: data.ip, error: null };
} catch (err: any) {
return { ip: null, error: err.message };
}
});
if (ipApiResponse.ip) {
result.proxyIp = ipApiResponse.ip;
result.proxyConnected = true;
console.log(`[PuppeteerPreflight] Detected proxy IP: ${ipApiResponse.ip}`);
// Look up timezone from IP
const geoData = await getTimezoneFromIp(ipApiResponse.ip);
if (geoData) {
result.detectedTimezone = geoData.timezone;
result.detectedLocation = { city: geoData.city, region: geoData.region };
console.log(`[PuppeteerPreflight] IP Geolocation: ${geoData.city}, ${geoData.region} (${geoData.timezone})`);
// Set browser timezone to match proxy location via CDP
try {
const client = await page.target().createCDPSession();
await client.send('Emulation.setTimezoneOverride', { timezoneId: geoData.timezone });
console.log(`[PuppeteerPreflight] Browser timezone set to: ${geoData.timezone}`);
} catch (tzErr: any) {
console.log(`[PuppeteerPreflight] Failed to set browser timezone: ${tzErr.message}`);
}
} else {
console.log(`[PuppeteerPreflight] WARNING: Could not determine timezone from IP - timezone mismatch possible`);
}
} else {
console.log(`[PuppeteerPreflight] IP lookup failed: ${ipApiResponse.error || 'unknown error'}`);
}
} catch (ipErr: any) {
console.log(`[PuppeteerPreflight] IP API error: ${ipErr.message}`);
}
// =========================================================================
// STEP 1b: Visit fingerprint.com demo to verify anti-detect
// ========================================================================= // =========================================================================
console.log(`[PuppeteerPreflight] Testing anti-detect at ${FINGERPRINT_DEMO_URL}...`); console.log(`[PuppeteerPreflight] Testing anti-detect at ${FINGERPRINT_DEMO_URL}...`);
@@ -199,6 +279,8 @@ export async function runPuppeteerPreflight(
// Don't fail - residential proxies often show different egress IPs // Don't fail - residential proxies often show different egress IPs
} }
} }
// Note: Timezone already set earlier via ipify.org IP lookup
} }
if (fingerprintData.visitorId) { if (fingerprintData.visitorId) {

View File

@@ -11,10 +11,17 @@
* - Workers report heartbeats to worker_registry * - Workers report heartbeats to worker_registry
* - Workers are ROLE-AGNOSTIC by default (can handle any task type) * - Workers are ROLE-AGNOSTIC by default (can handle any task type)
* *
* Stealth & Anti-Detection: * Stealth & Anti-Detection (LAZY INITIALIZATION):
* PROXIES ARE REQUIRED - workers will fail to start if no proxies available. * Workers start IMMEDIATELY without waiting for proxies.
* Stealth systems (proxies, fingerprints, preflights) are initialized
* on first task claim, not at worker startup.
* *
* On startup, workers initialize the CrawlRotator which provides: * This allows workers to:
* - Register and send heartbeats immediately
* - Wait in main loop without blocking on proxy availability
* - Initialize proxies/preflights only when tasks are actually available
*
* On first task claim attempt, workers initialize the CrawlRotator which provides:
* - Proxy rotation: Loads proxies from `proxies` table, ALL requests use proxy * - Proxy rotation: Loads proxies from `proxies` table, ALL requests use proxy
* - User-Agent rotation: Cycles through realistic browser fingerprints * - User-Agent rotation: Cycles through realistic browser fingerprints
* - Fingerprint rotation: Changes browser profile on blocks * - Fingerprint rotation: Changes browser profile on blocks
@@ -34,11 +41,16 @@
* *
* Environment: * Environment:
* WORKER_ROLE - Which task role to process (optional, null = any task) * WORKER_ROLE - Which task role to process (optional, null = any task)
* WORKER_ID - Optional custom worker ID (auto-generated if not provided) * POD_NAME - K8s StatefulSet pod name (PRIMARY - use this for persistent identity)
* POD_NAME - Kubernetes pod name (optional) * WORKER_ID - Custom worker ID (fallback if POD_NAME not set)
* POLL_INTERVAL_MS - How often to check for tasks (default: 5000) * POLL_INTERVAL_MS - How often to check for tasks (default: 5000)
* HEARTBEAT_INTERVAL_MS - How often to update heartbeat (default: 30000) * HEARTBEAT_INTERVAL_MS - How often to update heartbeat (default: 30000)
* API_BASE_URL - Backend API URL for registration (default: http://localhost:3010) * API_BASE_URL - Backend API URL for registration (default: http://localhost:3010)
*
* Worker Identity:
* Workers use POD_NAME as their worker_id for persistent identity across restarts.
* In K8s StatefulSet, POD_NAME = "scraper-worker-0" through "scraper-worker-7".
* This ensures workers re-register with the same ID instead of creating new entries.
*/ */
import { Pool } from 'pg'; import { Pool } from 'pg';
@@ -209,6 +221,16 @@ export class TaskWorker {
private preflightCurlResult: CurlPreflightResult | null = null; private preflightCurlResult: CurlPreflightResult | null = null;
private preflightHttpResult: PuppeteerPreflightResult | null = null; private preflightHttpResult: PuppeteerPreflightResult | null = null;
// ==========================================================================
// LAZY INITIALIZATION FLAGS
// ==========================================================================
// Stealth/proxy initialization is deferred until first task claim.
// Workers register immediately and enter main loop without blocking.
// ==========================================================================
private stealthInitialized: boolean = false;
private preflightsCompleted: boolean = false;
private initializingPromise: Promise<void> | null = null;
constructor(role: TaskRole | null = null, workerId?: string) { constructor(role: TaskRole | null = null, workerId?: string) {
this.pool = getPool(); this.pool = getPool();
this.role = role; this.role = role;
@@ -293,9 +315,9 @@ export class TaskWorker {
/** /**
* Initialize stealth systems (proxy rotation, fingerprints) * Initialize stealth systems (proxy rotation, fingerprints)
* Called once on worker startup before processing any tasks. * Called LAZILY on first task claim attempt (NOT at worker startup).
* *
* IMPORTANT: Proxies are REQUIRED. Workers will wait until proxies are available. * IMPORTANT: Proxies are REQUIRED to claim tasks. This method waits until proxies are available.
* Workers listen for PostgreSQL NOTIFY 'proxy_added' to wake up immediately when proxies are added. * Workers listen for PostgreSQL NOTIFY 'proxy_added' to wake up immediately when proxies are added.
*/ */
private async initializeStealth(): Promise<void> { private async initializeStealth(): Promise<void> {
@@ -435,35 +457,98 @@ export class TaskWorker {
/** /**
* Report preflight status to worker_registry * Report preflight status to worker_registry
* Function signature: update_worker_preflight(worker_id, transport, status, ip, response_ms, error, fingerprint)
*/ */
private async reportPreflightStatus(): Promise<void> { private async reportPreflightStatus(): Promise<void> {
try { try {
// Update worker_registry directly via SQL (more reliable than API) // Update worker_registry directly via SQL (more reliable than API)
// CURL preflight - includes IP address
await this.pool.query(` await this.pool.query(`
SELECT update_worker_preflight($1, 'curl', $2, $3, $4) SELECT update_worker_preflight($1, 'curl', $2, $3, $4, $5, $6)
`, [ `, [
this.workerId, this.workerId,
this.preflightCurlPassed ? 'passed' : 'failed', this.preflightCurlPassed ? 'passed' : 'failed',
this.preflightCurlResult?.proxyIp || null,
this.preflightCurlResult?.responseTimeMs || null, this.preflightCurlResult?.responseTimeMs || null,
this.preflightCurlResult?.error || null, this.preflightCurlResult?.error || null,
null, // No fingerprint for curl
]); ]);
// HTTP preflight - includes IP, fingerprint, and timezone data
const httpFingerprint = this.preflightHttpResult ? {
...this.preflightHttpResult.fingerprint,
detectedTimezone: (this.preflightHttpResult as any).detectedTimezone,
detectedLocation: (this.preflightHttpResult as any).detectedLocation,
productsReturned: this.preflightHttpResult.productsReturned,
botDetection: (this.preflightHttpResult as any).botDetection,
} : null;
await this.pool.query(` await this.pool.query(`
SELECT update_worker_preflight($1, 'http', $2, $3, $4) SELECT update_worker_preflight($1, 'http', $2, $3, $4, $5, $6)
`, [ `, [
this.workerId, this.workerId,
this.preflightHttpPassed ? 'passed' : 'failed', this.preflightHttpPassed ? 'passed' : 'failed',
this.preflightHttpResult?.proxyIp || null,
this.preflightHttpResult?.responseTimeMs || null, this.preflightHttpResult?.responseTimeMs || null,
this.preflightHttpResult?.error || null, this.preflightHttpResult?.error || null,
httpFingerprint ? JSON.stringify(httpFingerprint) : null,
]); ]);
console.log(`[TaskWorker] Preflight status reported to worker_registry`); console.log(`[TaskWorker] Preflight status reported to worker_registry`);
if (this.preflightHttpResult?.proxyIp) {
console.log(`[TaskWorker] HTTP IP: ${this.preflightHttpResult.proxyIp}, Timezone: ${(this.preflightHttpResult as any).detectedTimezone || 'unknown'}`);
}
} catch (err: any) { } catch (err: any) {
// Non-fatal - worker can still function // Non-fatal - worker can still function
console.warn(`[TaskWorker] Could not report preflight status: ${err.message}`); console.warn(`[TaskWorker] Could not report preflight status: ${err.message}`);
} }
} }
/**
* Lazy initialization of stealth systems.
* Called BEFORE claiming first task (not at worker startup).
* This allows workers to register and enter main loop immediately.
*
* Returns true if initialization succeeded, false otherwise.
*/
private async ensureStealthInitialized(): Promise<boolean> {
// Already initialized
if (this.stealthInitialized && this.preflightsCompleted) {
return true;
}
// Already initializing (prevent concurrent init attempts)
if (this.initializingPromise) {
await this.initializingPromise;
return this.stealthInitialized && this.preflightsCompleted;
}
console.log(`[TaskWorker] ${this.friendlyName} lazy-initializing stealth systems (first task claim)...`);
this.initializingPromise = (async () => {
try {
// Initialize proxy/fingerprint rotation
await this.initializeStealth();
this.stealthInitialized = true;
// Run dual-transport preflights
await this.runDualPreflights();
this.preflightsCompleted = true;
const preflightMsg = `curl=${this.preflightCurlPassed ? '✓' : '✗'} http=${this.preflightHttpPassed ? '✓' : '✗'}`;
console.log(`[TaskWorker] ${this.friendlyName} stealth ready (${preflightMsg})`);
} catch (err: any) {
console.error(`[TaskWorker] ${this.friendlyName} stealth init failed: ${err.message}`);
this.stealthInitialized = false;
this.preflightsCompleted = false;
}
})();
await this.initializingPromise;
this.initializingPromise = null;
return this.stealthInitialized && this.preflightsCompleted;
}
/** /**
* Register worker with the registry (get friendly name) * Register worker with the registry (get friendly name)
*/ */
@@ -597,25 +682,22 @@ export class TaskWorker {
/** /**
* Start the worker loop * Start the worker loop
*
* Workers start IMMEDIATELY without blocking on proxy/preflight init.
* Stealth systems are lazy-initialized on first task claim.
* This allows workers to register and send heartbeats even when proxies aren't ready.
*/ */
async start(): Promise<void> { async start(): Promise<void> {
this.isRunning = true; this.isRunning = true;
// Initialize stealth systems (proxy rotation, fingerprints) // Register with the API to get a friendly name (non-blocking)
await this.initializeStealth();
// Register with the API to get a friendly name
await this.register(); await this.register();
// Run dual-transport preflights // Start registry heartbeat immediately
await this.runDualPreflights();
// Start registry heartbeat
this.startRegistryHeartbeat(); this.startRegistryHeartbeat();
const roleMsg = this.role ? `for role: ${this.role}` : '(role-agnostic - any task)'; const roleMsg = this.role ? `for role: ${this.role}` : '(role-agnostic - any task)';
const preflightMsg = `curl=${this.preflightCurlPassed ? '✓' : '✗'} http=${this.preflightHttpPassed ? '✓' : '✗'}`; console.log(`[TaskWorker] ${this.friendlyName} starting ${roleMsg} (stealth=lazy, max ${this.maxConcurrentTasks} concurrent tasks)`);
console.log(`[TaskWorker] ${this.friendlyName} starting ${roleMsg} (${preflightMsg}, max ${this.maxConcurrentTasks} concurrent tasks)`);
while (this.isRunning) { while (this.isRunning) {
try { try {
@@ -669,6 +751,20 @@ export class TaskWorker {
// Try to claim more tasks if we have capacity // Try to claim more tasks if we have capacity
if (this.canAcceptMoreTasks()) { if (this.canAcceptMoreTasks()) {
// =================================================================
// LAZY INITIALIZATION - Initialize stealth on first task claim
// Workers start immediately and init proxies only when needed
// =================================================================
if (!this.stealthInitialized) {
const initSuccess = await this.ensureStealthInitialized();
if (!initSuccess) {
// Init failed - wait and retry next loop
console.log(`[TaskWorker] ${this.friendlyName} stealth init failed, waiting before retry...`);
await this.sleep(30000);
return;
}
}
// Pass preflight capabilities to only claim compatible tasks // Pass preflight capabilities to only claim compatible tasks
const task = await taskService.claimTask( const task = await taskService.claimTask(
this.role, this.role,
@@ -904,7 +1000,10 @@ async function main(): Promise<void> {
process.exit(1); process.exit(1);
} }
const workerId = process.env.WORKER_ID; // Use POD_NAME for persistent identity in K8s StatefulSet
// This ensures workers keep the same ID across restarts
// Falls back to WORKER_ID, then generates UUID if neither is set
const workerId = process.env.POD_NAME || process.env.WORKER_ID;
// Pass null for role-agnostic, or the specific role // Pass null for role-agnostic, or the specific role
const worker = new TaskWorker(role || null, workerId); const worker = new TaskWorker(role || null, workerId);

View File

@@ -2666,13 +2666,25 @@ class ApiClient {
// Dashboard methods // Dashboard methods
getMarketDashboard = this.getMarketsDashboard.bind(this); getMarketDashboard = this.getMarketsDashboard.bind(this);
// Schedule methods (no conflicts) // ============================================================
// LEGACY SCHEDULE METHODS (DEPRECATED 2025-12-12)
// These use /api/markets/admin/schedules which queries job_schedules
// Use getTaskSchedules(), updateTaskSchedule(), etc. instead
// (defined below, use /api/tasks/schedules which queries task_schedules)
// ============================================================
/** @deprecated Use getTaskSchedules() - queries task_schedules table */
getSchedules = this.getCrawlSchedules.bind(this); getSchedules = this.getCrawlSchedules.bind(this);
/** @deprecated Use getTaskSchedule() - queries task_schedules table */
getSchedule = this.getDutchieAZSchedule.bind(this); getSchedule = this.getDutchieAZSchedule.bind(this);
/** @deprecated Use createTaskSchedule() - queries task_schedules table */
createSchedule = this.createDutchieAZSchedule.bind(this); createSchedule = this.createDutchieAZSchedule.bind(this);
/** @deprecated Use updateTaskSchedule() - queries task_schedules table */
updateSchedule = this.updateDutchieAZSchedule.bind(this); updateSchedule = this.updateDutchieAZSchedule.bind(this);
/** @deprecated Use deleteTaskSchedule() - queries task_schedules table */
deleteSchedule = this.deleteDutchieAZSchedule.bind(this); deleteSchedule = this.deleteDutchieAZSchedule.bind(this);
/** @deprecated Use runTaskScheduleNow() - queries task_schedules table */
triggerSchedule = this.triggerDutchieAZSchedule.bind(this); triggerSchedule = this.triggerDutchieAZSchedule.bind(this);
/** @deprecated - job_schedules init not needed for task_schedules */
initSchedules = this.initDutchieAZSchedules.bind(this); initSchedules = this.initDutchieAZSchedules.bind(this);
getScheduleLogs = this.getCrawlScheduleLogs.bind(this); getScheduleLogs = this.getCrawlScheduleLogs.bind(this);
getRunLogs = this.getDutchieAZRunLogs.bind(this); getRunLogs = this.getDutchieAZRunLogs.bind(this);
@@ -2976,6 +2988,99 @@ class ApiClient {
{ method: 'POST', body: JSON.stringify({ replicas }) } { method: 'POST', body: JSON.stringify({ replicas }) }
); );
} }
// ==========================================
// Task Schedules API (recurring task definitions)
// ==========================================
async getTaskSchedules(enabledOnly?: boolean) {
const qs = enabledOnly ? '?enabled=true' : '';
return this.request<{ schedules: TaskSchedule[] }>(`/api/tasks/schedules${qs}`);
}
async getTaskSchedule(id: number) {
return this.request<TaskSchedule>(`/api/tasks/schedules/${id}`);
}
async createTaskSchedule(data: {
name: string;
role: string;
description?: string;
enabled?: boolean;
interval_hours: number;
priority?: number;
state_code?: string;
platform?: string;
}) {
return this.request<TaskSchedule>('/api/tasks/schedules', {
method: 'POST',
body: JSON.stringify(data),
});
}
async updateTaskSchedule(id: number, data: Partial<{
name: string;
role: string;
description: string;
enabled: boolean;
interval_hours: number;
priority: number;
state_code: string;
platform: string;
}>) {
return this.request<TaskSchedule>(`/api/tasks/schedules/${id}`, {
method: 'PUT',
body: JSON.stringify(data),
});
}
async deleteTaskSchedule(id: number) {
return this.request<{ success: boolean; message: string }>(`/api/tasks/schedules/${id}`, {
method: 'DELETE',
});
}
async deleteTaskSchedulesBulk(ids?: number[], all?: boolean) {
return this.request<{ success: boolean; deleted_count: number; deleted: { id: number; name: string }[]; message: string }>(
'/api/tasks/schedules',
{
method: 'DELETE',
body: JSON.stringify({ ids, all }),
}
);
}
async runTaskScheduleNow(id: number) {
return this.request<{ success: boolean; message: string; task: any }>(`/api/tasks/schedules/${id}/run-now`, {
method: 'POST',
});
}
async toggleTaskSchedule(id: number) {
return this.request<{ success: boolean; schedule: { id: number; name: string; enabled: boolean }; message: string }>(
`/api/tasks/schedules/${id}/toggle`,
{ method: 'POST' }
);
}
}
// Type for task schedules
export interface TaskSchedule {
id: number;
name: string;
role: string;
description: string | null;
enabled: boolean;
interval_hours: number;
priority: number;
state_code: string | null;
platform: string | null;
last_run_at: string | null;
next_run_at: string | null;
last_task_count: number;
last_error: string | null;
created_at: string;
updated_at: string;
} }
export const api = new ApiClient(API_URL); export const api = new ApiClient(API_URL);

View File

@@ -1,3 +1,18 @@
/**
* @deprecated 2025-12-12
*
* This page used the legacy job_schedules table which has been deprecated.
* All schedule management has been consolidated into task_schedules and
* is now managed via the /admin/tasks page (TasksDashboard.tsx).
*
* The job_schedules table entries have been disabled and marked deprecated.
* This page is no longer in the navigation menu but kept for reference.
*
* Migration details:
* - job_schedules used base_interval_minutes + jitter_minutes
* - task_schedules uses interval_hours (simpler model)
* - All CRUD operations now via /api/tasks/schedules endpoints
*/
import { useEffect, useState } from 'react'; import { useEffect, useState } from 'react';
import { Layout } from '../components/Layout'; import { Layout } from '../components/Layout';
import { api } from '../lib/api'; import { api } from '../lib/api';

View File

@@ -1,5 +1,5 @@
import { useState, useEffect } from 'react'; import { useState, useEffect } from 'react';
import { api } from '../lib/api'; import { api, TaskSchedule } from '../lib/api';
import { Layout } from '../components/Layout'; import { Layout } from '../components/Layout';
import { import {
ListChecks, ListChecks,
@@ -21,6 +21,10 @@ import {
X, X,
Calendar, Calendar,
Trash2, Trash2,
Edit2,
Play,
Pause,
Timer,
} from 'lucide-react'; } from 'lucide-react';
interface Task { interface Task {
@@ -383,8 +387,232 @@ const ROLES = [
'product_discovery', 'product_discovery',
'product_refresh', 'product_refresh',
'analytics_refresh', 'analytics_refresh',
'payload_fetch',
]; ];
// ============================================================
// Schedule Edit Modal
// ============================================================
interface ScheduleEditModalProps {
isOpen: boolean;
schedule: TaskSchedule | null;
onClose: () => void;
onSave: () => void;
}
function ScheduleEditModal({ isOpen, schedule, onClose, onSave }: ScheduleEditModalProps) {
const [name, setName] = useState('');
const [role, setRole] = useState('product_refresh');
const [description, setDescription] = useState('');
const [enabled, setEnabled] = useState(true);
const [intervalHours, setIntervalHours] = useState(4);
const [priority, setPriority] = useState(0);
const [stateCode, setStateCode] = useState('');
const [platform, setPlatform] = useState('dutchie');
const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const isNew = !schedule;
useEffect(() => {
if (schedule) {
setName(schedule.name);
setRole(schedule.role);
setDescription(schedule.description || '');
setEnabled(schedule.enabled);
setIntervalHours(schedule.interval_hours);
setPriority(schedule.priority);
setStateCode(schedule.state_code || '');
setPlatform(schedule.platform || 'dutchie');
} else {
// Reset for new schedule
setName('');
setRole('product_refresh');
setDescription('');
setEnabled(true);
setIntervalHours(4);
setPriority(0);
setStateCode('');
setPlatform('dutchie');
}
setError(null);
}, [schedule, isOpen]);
const handleSubmit = async () => {
if (!name.trim()) {
setError('Name is required');
return;
}
setLoading(true);
setError(null);
try {
const data = {
name: name.trim(),
role,
description: description.trim() || undefined,
enabled,
interval_hours: intervalHours,
priority,
state_code: stateCode.trim() || undefined,
platform: platform.trim() || undefined,
};
if (isNew) {
await api.createTaskSchedule(data as any);
} else {
await api.updateTaskSchedule(schedule!.id, data);
}
onSave();
onClose();
} catch (err: any) {
setError(err.response?.data?.error || err.message || 'Failed to save schedule');
} finally {
setLoading(false);
}
};
if (!isOpen) return null;
return (
<div className="fixed inset-0 z-50 overflow-y-auto">
<div className="flex min-h-full items-center justify-center p-4">
<div className="fixed inset-0 bg-black/50" onClick={onClose} />
<div className="relative bg-white rounded-xl shadow-xl max-w-lg w-full">
<div className="px-6 py-4 border-b border-gray-200 flex items-center justify-between">
<h2 className="text-lg font-semibold text-gray-900">
{isNew ? 'Create Schedule' : 'Edit Schedule'}
</h2>
<button onClick={onClose} className="p-1 hover:bg-gray-100 rounded">
<X className="w-5 h-5 text-gray-500" />
</button>
</div>
<div className="px-6 py-4 space-y-4">
{error && (
<div className="bg-red-50 border border-red-200 rounded-lg p-3 text-red-700 text-sm">
{error}
</div>
)}
<div>
<label className="block text-sm font-medium text-gray-700 mb-1">Name *</label>
<input
type="text"
value={name}
onChange={(e) => setName(e.target.value)}
placeholder="e.g., product_refresh_all"
className="w-full px-3 py-2 border border-gray-200 rounded-lg"
/>
</div>
<div>
<label className="block text-sm font-medium text-gray-700 mb-1">Role *</label>
<select
value={role}
onChange={(e) => setRole(e.target.value)}
className="w-full px-3 py-2 border border-gray-200 rounded-lg"
>
{TASK_ROLES.map(r => (
<option key={r.id} value={r.id}>{r.name}</option>
))}
</select>
</div>
<div>
<label className="block text-sm font-medium text-gray-700 mb-1">Description</label>
<input
type="text"
value={description}
onChange={(e) => setDescription(e.target.value)}
placeholder="Optional description"
className="w-full px-3 py-2 border border-gray-200 rounded-lg"
/>
</div>
<div className="grid grid-cols-2 gap-4">
<div>
<label className="block text-sm font-medium text-gray-700 mb-1">Interval (hours) *</label>
<input
type="number"
min="1"
max="168"
value={intervalHours}
onChange={(e) => setIntervalHours(parseInt(e.target.value) || 4)}
className="w-full px-3 py-2 border border-gray-200 rounded-lg"
/>
</div>
<div>
<label className="block text-sm font-medium text-gray-700 mb-1">Priority</label>
<input
type="number"
min="0"
max="100"
value={priority}
onChange={(e) => setPriority(parseInt(e.target.value) || 0)}
className="w-full px-3 py-2 border border-gray-200 rounded-lg"
/>
</div>
</div>
<div className="grid grid-cols-2 gap-4">
<div>
<label className="block text-sm font-medium text-gray-700 mb-1">State Code</label>
<input
type="text"
value={stateCode}
onChange={(e) => setStateCode(e.target.value.toUpperCase())}
placeholder="e.g., AZ"
maxLength={2}
className="w-full px-3 py-2 border border-gray-200 rounded-lg"
/>
</div>
<div>
<label className="block text-sm font-medium text-gray-700 mb-1">Platform</label>
<input
type="text"
value={platform}
onChange={(e) => setPlatform(e.target.value)}
placeholder="e.g., dutchie"
className="w-full px-3 py-2 border border-gray-200 rounded-lg"
/>
</div>
</div>
<div className="flex items-center gap-2">
<input
type="checkbox"
id="enabled"
checked={enabled}
onChange={(e) => setEnabled(e.target.checked)}
className="w-4 h-4 text-emerald-600 rounded"
/>
<label htmlFor="enabled" className="text-sm text-gray-700">Enabled</label>
</div>
</div>
<div className="px-6 py-4 border-t border-gray-200 bg-gray-50 flex justify-end gap-3">
<button onClick={onClose} className="px-4 py-2 text-sm text-gray-700 hover:bg-gray-100 rounded-lg">
Cancel
</button>
<button
onClick={handleSubmit}
disabled={loading}
className="px-4 py-2 text-sm bg-emerald-600 text-white rounded-lg hover:bg-emerald-700 disabled:opacity-50 flex items-center gap-2"
>
{loading && <RefreshCw className="w-4 h-4 animate-spin" />}
{isNew ? 'Create' : 'Save'}
</button>
</div>
</div>
</div>
</div>
);
}
const STATUS_COLORS: Record<string, string> = { const STATUS_COLORS: Record<string, string> = {
pending: 'bg-yellow-100 text-yellow-800', pending: 'bg-yellow-100 text-yellow-800',
claimed: 'bg-blue-100 text-blue-800', claimed: 'bg-blue-100 text-blue-800',
@@ -452,6 +680,13 @@ export default function TasksDashboard() {
const [poolPaused, setPoolPaused] = useState(false); const [poolPaused, setPoolPaused] = useState(false);
const [showCreateModal, setShowCreateModal] = useState(false); const [showCreateModal, setShowCreateModal] = useState(false);
// Schedules state
const [schedules, setSchedules] = useState<TaskSchedule[]>([]);
const [showSchedules, setShowSchedules] = useState(true);
const [selectedSchedules, setSelectedSchedules] = useState<Set<number>>(new Set());
const [editingSchedule, setEditingSchedule] = useState<TaskSchedule | null>(null);
const [showScheduleModal, setShowScheduleModal] = useState(false);
// Pagination // Pagination
const [page, setPage] = useState(0); const [page, setPage] = useState(0);
const tasksPerPage = 25; const tasksPerPage = 25;
@@ -465,7 +700,7 @@ export default function TasksDashboard() {
const fetchData = async () => { const fetchData = async () => {
try { try {
const [tasksRes, countsRes, capacityRes, poolStatus] = await Promise.all([ const [tasksRes, countsRes, capacityRes, poolStatus, schedulesRes] = await Promise.all([
api.getTasks({ api.getTasks({
role: roleFilter || undefined, role: roleFilter || undefined,
status: statusFilter || undefined, status: statusFilter || undefined,
@@ -474,12 +709,14 @@ export default function TasksDashboard() {
api.getTaskCounts(), api.getTaskCounts(),
api.getTaskCapacity(), api.getTaskCapacity(),
api.getTaskPoolStatus(), api.getTaskPoolStatus(),
api.getTaskSchedules(),
]); ]);
setTasks(tasksRes.tasks || []); setTasks(tasksRes.tasks || []);
setCounts(countsRes); setCounts(countsRes);
setCapacity(capacityRes.metrics || []); setCapacity(capacityRes.metrics || []);
setPoolPaused(poolStatus.paused); setPoolPaused(poolStatus.paused);
setSchedules(schedulesRes.schedules || []);
setError(null); setError(null);
} catch (err: any) { } catch (err: any) {
setError(err.message || 'Failed to load tasks'); setError(err.message || 'Failed to load tasks');
@@ -488,6 +725,76 @@ export default function TasksDashboard() {
} }
}; };
const handleDeleteSchedule = async (scheduleId: number) => {
if (!confirm('Delete this schedule?')) return;
try {
await api.deleteTaskSchedule(scheduleId);
setSelectedSchedules(prev => {
const next = new Set(prev);
next.delete(scheduleId);
return next;
});
fetchData();
} catch (err: any) {
console.error('Delete schedule error:', err);
alert(err.response?.data?.error || 'Failed to delete schedule');
}
};
const handleBulkDeleteSchedules = async () => {
if (selectedSchedules.size === 0) return;
if (!confirm(`Delete ${selectedSchedules.size} selected schedule(s)?`)) return;
try {
await api.deleteTaskSchedulesBulk(Array.from(selectedSchedules));
setSelectedSchedules(new Set());
fetchData();
} catch (err: any) {
console.error('Bulk delete error:', err);
alert(err.response?.data?.error || 'Failed to delete schedules');
}
};
const handleToggleSchedule = async (scheduleId: number) => {
try {
await api.toggleTaskSchedule(scheduleId);
fetchData();
} catch (err: any) {
console.error('Toggle schedule error:', err);
alert(err.response?.data?.error || 'Failed to toggle schedule');
}
};
const handleRunScheduleNow = async (scheduleId: number) => {
try {
const result = await api.runTaskScheduleNow(scheduleId);
alert(result.message);
fetchData();
} catch (err: any) {
console.error('Run schedule error:', err);
alert(err.response?.data?.error || 'Failed to run schedule');
}
};
const toggleSelectSchedule = (id: number) => {
setSelectedSchedules(prev => {
const next = new Set(prev);
if (next.has(id)) {
next.delete(id);
} else {
next.add(id);
}
return next;
});
};
const toggleSelectAllSchedules = () => {
if (selectedSchedules.size === schedules.length) {
setSelectedSchedules(new Set());
} else {
setSelectedSchedules(new Set(schedules.map(s => s.id)));
}
};
const handleDeleteTask = async (taskId: number) => { const handleDeleteTask = async (taskId: number) => {
if (!confirm('Delete this task?')) return; if (!confirm('Delete this task?')) return;
try { try {
@@ -583,6 +890,17 @@ export default function TasksDashboard() {
onTaskCreated={fetchData} onTaskCreated={fetchData}
/> />
{/* Schedule Edit Modal */}
<ScheduleEditModal
isOpen={showScheduleModal}
schedule={editingSchedule}
onClose={() => {
setShowScheduleModal(false);
setEditingSchedule(null);
}}
onSave={fetchData}
/>
{/* Status Summary Cards */} {/* Status Summary Cards */}
<div className="grid grid-cols-2 sm:grid-cols-3 lg:grid-cols-6 gap-4"> <div className="grid grid-cols-2 sm:grid-cols-3 lg:grid-cols-6 gap-4">
{Object.entries(counts || {}).map(([status, count]) => ( {Object.entries(counts || {}).map(([status, count]) => (
@@ -714,6 +1032,196 @@ export default function TasksDashboard() {
)} )}
</div> </div>
{/* Schedules Section */}
<div className="bg-white rounded-lg border border-gray-200 overflow-hidden">
<button
onClick={() => setShowSchedules(!showSchedules)}
className="w-full flex items-center justify-between p-4 hover:bg-gray-50"
>
<div className="flex items-center gap-2">
<Timer className="w-5 h-5 text-emerald-600" />
<span className="font-medium text-gray-900">Schedules ({schedules.length})</span>
</div>
{showSchedules ? (
<ChevronUp className="w-5 h-5 text-gray-400" />
) : (
<ChevronDown className="w-5 h-5 text-gray-400" />
)}
</button>
{showSchedules && (
<div className="border-t border-gray-200">
{/* Schedule Actions */}
<div className="p-4 bg-gray-50 border-b border-gray-200 flex flex-wrap items-center justify-between gap-2">
<div className="flex items-center gap-2">
<button
onClick={() => {
setEditingSchedule(null);
setShowScheduleModal(true);
}}
className="flex items-center gap-1 px-3 py-1.5 text-sm bg-emerald-600 text-white rounded hover:bg-emerald-700"
>
<Plus className="w-4 h-4" />
New Schedule
</button>
{selectedSchedules.size > 0 && (
<button
onClick={handleBulkDeleteSchedules}
className="flex items-center gap-1 px-3 py-1.5 text-sm bg-red-600 text-white rounded hover:bg-red-700"
>
<Trash2 className="w-4 h-4" />
Delete ({selectedSchedules.size})
</button>
)}
</div>
<span className="text-sm text-gray-500">
{schedules.filter(s => s.enabled).length} enabled
</span>
</div>
{schedules.length === 0 ? (
<div className="p-8 text-center text-gray-500">
No schedules configured. Click "New Schedule" to create one.
</div>
) : (
<div className="overflow-x-auto">
<table className="min-w-full divide-y divide-gray-200">
<thead className="bg-gray-50">
<tr>
<th className="px-4 py-3 text-left">
<input
type="checkbox"
checked={selectedSchedules.size === schedules.length && schedules.length > 0}
onChange={toggleSelectAllSchedules}
className="w-4 h-4 text-emerald-600 rounded"
/>
</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">
Name
</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">
Role
</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">
Interval
</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">
Last Run
</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">
Next Run
</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">
Status
</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase w-32">
Actions
</th>
</tr>
</thead>
<tbody className="divide-y divide-gray-200">
{schedules.map((schedule) => (
<tr key={schedule.id} className="hover:bg-gray-50">
<td className="px-4 py-3">
<input
type="checkbox"
checked={selectedSchedules.has(schedule.id)}
onChange={() => toggleSelectSchedule(schedule.id)}
className="w-4 h-4 text-emerald-600 rounded"
/>
</td>
<td className="px-4 py-3">
<div className="text-sm font-medium text-gray-900">{schedule.name}</div>
{schedule.description && (
<div className="text-xs text-gray-500">{schedule.description}</div>
)}
</td>
<td className="px-4 py-3 text-sm text-gray-600">
{schedule.role.replace(/_/g, ' ')}
</td>
<td className="px-4 py-3 text-sm text-gray-600">
Every {schedule.interval_hours}h
</td>
<td className="px-4 py-3 text-sm text-gray-600">
{schedule.last_run_at ? formatTimeAgo(schedule.last_run_at) : '-'}
</td>
<td className="px-4 py-3 text-sm text-gray-600">
{schedule.next_run_at ? formatTimeAgo(schedule.next_run_at) : '-'}
</td>
<td className="px-4 py-3">
<span
className={`inline-flex items-center gap-1 px-2 py-1 rounded-full text-xs font-medium ${
schedule.enabled
? 'bg-green-100 text-green-800'
: 'bg-gray-100 text-gray-800'
}`}
>
{schedule.enabled ? (
<>
<Play className="w-3 h-3" />
Active
</>
) : (
<>
<Pause className="w-3 h-3" />
Paused
</>
)}
</span>
</td>
<td className="px-4 py-3">
<div className="flex items-center gap-1">
<button
onClick={() => handleRunScheduleNow(schedule.id)}
className="p-1.5 text-gray-400 hover:text-emerald-600 hover:bg-emerald-50 rounded transition-colors"
title="Run now"
>
<PlayCircle className="w-4 h-4" />
</button>
<button
onClick={() => handleToggleSchedule(schedule.id)}
className={`p-1.5 rounded transition-colors ${
schedule.enabled
? 'text-gray-400 hover:text-yellow-600 hover:bg-yellow-50'
: 'text-gray-400 hover:text-green-600 hover:bg-green-50'
}`}
title={schedule.enabled ? 'Pause' : 'Enable'}
>
{schedule.enabled ? (
<Pause className="w-4 h-4" />
) : (
<Play className="w-4 h-4" />
)}
</button>
<button
onClick={() => {
setEditingSchedule(schedule);
setShowScheduleModal(true);
}}
className="p-1.5 text-gray-400 hover:text-blue-600 hover:bg-blue-50 rounded transition-colors"
title="Edit"
>
<Edit2 className="w-4 h-4" />
</button>
<button
onClick={() => handleDeleteSchedule(schedule.id)}
className="p-1.5 text-gray-400 hover:text-red-600 hover:bg-red-50 rounded transition-colors"
title="Delete"
>
<Trash2 className="w-4 h-4" />
</button>
</div>
</td>
</tr>
))}
</tbody>
</table>
</div>
)}
</div>
)}
</div>
{/* Filters */} {/* Filters */}
<div className="flex flex-col sm:flex-row gap-4"> <div className="flex flex-col sm:flex-row gap-4">
<div className="relative flex-1"> <div className="relative flex-1">