From 56cc171287eb4ae8d1859b0b12fd7ee47d20e99c Mon Sep 17 00:00:00 2001 From: Kelly Date: Wed, 10 Dec 2025 00:44:59 -0700 Subject: [PATCH] feat: Stealth worker system with mandatory proxy rotation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Worker System - Role-agnostic workers that can handle any task type - Pod-based architecture with StatefulSet (5-15 pods, 5 workers each) - Custom pod names (Aethelgard, Xylos, Kryll, etc.) - Worker registry with friendly names and resource monitoring - Hub-and-spoke visualization on JobQueue page ## Stealth & Anti-Detection (REQUIRED) - Proxies are MANDATORY - workers fail to start without active proxies - CrawlRotator initializes on worker startup - Loads proxies from `proxies` table - Auto-rotates proxy + fingerprint on 403 errors - 12 browser fingerprints (Chrome, Firefox, Safari, Edge) - Locale/timezone matching for geographic consistency ## Task System - Renamed product_resync → product_refresh - Task chaining: store_discovery → entry_point → product_discovery - Priority-based claiming with FOR UPDATE SKIP LOCKED - Heartbeat and stale task recovery ## UI Updates - JobQueue: Pod visualization, resource monitoring on hover - WorkersDashboard: Simplified worker list - Removed unused filters from task list ## Other - IP2Location service for visitor analytics - Findagram consumer features scaffolding - Documentation updates 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- CLAUDE.md | 25 +- backend/.gitignore | 3 + backend/docs/CRAWL_PIPELINE.md | 230 +++ backend/k8s/cronjob-ip2location.yaml | 69 + backend/k8s/deployment.yaml | 10 + backend/migrations/076_visitor_analytics.sql | 71 + backend/migrations/076_worker_registry.sql | 141 ++ .../migrations/077_click_events_location.sql | 35 + backend/node_modules/.package-lock.json | 19 + backend/package-lock.json | 20 + backend/package.json | 1 + backend/scripts/download-ip2location.sh | 65 + backend/src/discovery/discovery-crawler.ts | 118 ++ backend/src/index.ts | 5 + backend/src/routes/categories.ts | 239 ++- backend/src/routes/dispensaries.ts | 37 +- backend/src/routes/events.ts | 17 +- backend/src/routes/products.ts | 20 +- backend/src/routes/public-api.ts | 239 ++- backend/src/routes/tasks.ts | 4 +- backend/src/routes/worker-registry.ts | 652 ++++++++ backend/src/services/crawl-rotator.ts | 33 +- backend/src/services/ip2location.ts | 134 ++ backend/src/services/scraper.ts | 5 +- .../tasks/handlers/entry-point-discovery.ts | 131 +- backend/src/tasks/handlers/index.ts | 2 +- .../src/tasks/handlers/product-discovery.ts | 6 +- .../{product-resync.ts => product-refresh.ts} | 6 +- backend/src/tasks/index.ts | 2 +- backend/src/tasks/start-pod.ts | 93 ++ backend/src/tasks/task-service.ts | 64 +- backend/src/tasks/task-worker.ts | 245 ++- cannaiq/src/lib/api.ts | 7 +- cannaiq/src/pages/Dashboard.tsx | 55 + cannaiq/src/pages/Dispensaries.tsx | 28 +- cannaiq/src/pages/JobQueue.tsx | 1384 ++++++++++------- cannaiq/src/pages/TasksDashboard.tsx | 2 +- cannaiq/src/pages/WorkersDashboard.tsx | 1269 +++++---------- docs/CRAWL_SYSTEM_V2.md | 353 +++++ docs/WORKER_SYSTEM.md | 408 +++++ docs/legacy_mapping.md | 18 +- findadispo/frontend/src/api/client.js | 86 + .../src/pages/findadispo/DispensaryDetail.jsx | 163 +- findagram/FINDAGRAM.md | 114 ++ findagram/frontend/src/App.js | 47 +- findagram/frontend/src/api/client.js | 197 ++- findagram/frontend/src/api/consumer.js | 302 ++++ .../src/components/findagram/AuthModal.jsx | 315 ++++ .../src/components/findagram/Header.jsx | 46 +- .../src/components/findagram/ProductCard.jsx | 127 +- findagram/frontend/src/context/AuthContext.js | 258 +++ .../frontend/src/hooks/useGeolocation.js | 314 ++++ findagram/frontend/src/lib/storage.js | 363 +++++ .../frontend/src/pages/findagram/Alerts.jsx | 247 ++- .../src/pages/findagram/Dashboard.jsx | 237 ++- .../frontend/src/pages/findagram/Deals.jsx | 271 ++-- .../src/pages/findagram/DispensaryDetail.jsx | 654 ++++++++ .../src/pages/findagram/Favorites.jsx | 184 ++- .../frontend/src/pages/findagram/Home.jsx | 194 ++- .../src/pages/findagram/SavedSearches.jsx | 122 +- k8s/scraper-worker.yaml | 161 +- 61 files changed, 8591 insertions(+), 2076 deletions(-) create mode 100644 backend/.gitignore create mode 100644 backend/k8s/cronjob-ip2location.yaml create mode 100644 backend/migrations/076_visitor_analytics.sql create mode 100644 backend/migrations/076_worker_registry.sql create mode 100644 backend/migrations/077_click_events_location.sql create mode 100755 backend/scripts/download-ip2location.sh create mode 100644 backend/src/routes/worker-registry.ts create mode 100644 backend/src/services/ip2location.ts rename backend/src/tasks/handlers/{product-resync.ts => product-refresh.ts} (98%) create mode 100644 backend/src/tasks/start-pod.ts create mode 100644 docs/CRAWL_SYSTEM_V2.md create mode 100644 docs/WORKER_SYSTEM.md create mode 100644 findagram/FINDAGRAM.md create mode 100644 findagram/frontend/src/api/consumer.js create mode 100644 findagram/frontend/src/components/findagram/AuthModal.jsx create mode 100644 findagram/frontend/src/context/AuthContext.js create mode 100644 findagram/frontend/src/hooks/useGeolocation.js create mode 100644 findagram/frontend/src/lib/storage.js create mode 100644 findagram/frontend/src/pages/findagram/DispensaryDetail.jsx diff --git a/CLAUDE.md b/CLAUDE.md index dbd7e261..0ff24bfe 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -213,22 +213,23 @@ CannaiQ has **TWO databases** with distinct purposes: | Table | Purpose | Row Count | |-------|---------|-----------| | `dispensaries` | Store/dispensary records | ~188+ rows | -| `dutchie_products` | Product catalog | ~37,000+ rows | -| `dutchie_product_snapshots` | Price/stock history | ~millions | -| `store_products` | Canonical product schema | ~37,000+ rows | -| `store_product_snapshots` | Canonical snapshot schema | growing | +| `store_products` | Product catalog | ~37,000+ rows | +| `store_product_snapshots` | Price/stock history | ~millions | **LEGACY TABLES (EMPTY - DO NOT USE):** | Table | Status | Action | |-------|--------|--------| | `stores` | EMPTY (0 rows) | Use `dispensaries` instead | -| `products` | EMPTY (0 rows) | Use `dutchie_products` or `store_products` | +| `products` | EMPTY (0 rows) | Use `store_products` instead | +| `dutchie_products` | LEGACY (0 rows) | Use `store_products` instead | +| `dutchie_product_snapshots` | LEGACY (0 rows) | Use `store_product_snapshots` instead | | `categories` | EMPTY (0 rows) | Categories stored in product records | **Code must NEVER:** - Query the `stores` table (use `dispensaries`) -- Query the `products` table (use `dutchie_products` or `store_products`) +- Query the `products` table (use `store_products`) +- Query the `dutchie_products` table (use `store_products`) - Query the `categories` table (categories are in product records) **CRITICAL RULES:** @@ -343,23 +344,23 @@ npx tsx src/scripts/etl/042_legacy_import.ts - SCHEMA ONLY - no data inserts from legacy tables **ETL Script 042** (`backend/src/scripts/etl/042_legacy_import.ts`): -- Copies data from `dutchie_products` → `store_products` -- Copies data from `dutchie_product_snapshots` → `store_product_snapshots` +- Copies data from legacy `dutchie_legacy.dutchie_products` → `store_products` +- Copies data from legacy `dutchie_legacy.dutchie_product_snapshots` → `store_product_snapshots` - Extracts brands from product data into `brands` table - Links dispensaries to chains and states - INSERT-ONLY and IDEMPOTENT (uses ON CONFLICT DO NOTHING) - Run manually: `cd backend && npx tsx src/scripts/etl/042_legacy_import.ts` **Tables touched by ETL:** -| Source Table | Target Table | -|--------------|--------------| +| Source Table (dutchie_legacy) | Target Table (dutchie_menus) | +|-------------------------------|------------------------------| | `dutchie_products` | `store_products` | | `dutchie_product_snapshots` | `store_product_snapshots` | | (brand names extracted) | `brands` | | (state codes mapped) | `dispensaries.state_id` | | (chain names matched) | `dispensaries.chain_id` | -**Legacy tables remain intact** - `dutchie_products` and `dutchie_product_snapshots` are not modified. +**Note:** The legacy `dutchie_products` and `dutchie_product_snapshots` tables in `dutchie_legacy` are read-only sources. All new crawl data goes directly to `store_products` and `store_product_snapshots`. **Migration 045** (`backend/migrations/045_add_image_columns.sql`): - Adds `thumbnail_url` to `store_products` and `store_product_snapshots` @@ -881,7 +882,7 @@ export default defineConfig({ 18) **Dashboard Architecture** - **Frontend**: Rebuild the frontend with `VITE_API_URL` pointing to the correct backend and redeploy. - - **Backend**: `/api/dashboard/stats` MUST use the canonical DB pool. Use the correct tables: `dutchie_products`, `dispensaries`, and views like `v_dashboard_stats`, `v_latest_snapshots`. + - **Backend**: `/api/dashboard/stats` MUST use the canonical DB pool. Use the correct tables: `store_products`, `dispensaries`, and views like `v_dashboard_stats`, `v_latest_snapshots`. 19) **Deployment (Gitea + Kubernetes)** - **Registry**: Gitea at `code.cannabrands.app/creationshop/dispensary-scraper` diff --git a/backend/.gitignore b/backend/.gitignore new file mode 100644 index 00000000..ad1d7842 --- /dev/null +++ b/backend/.gitignore @@ -0,0 +1,3 @@ + +# IP2Location database (downloaded separately) +data/ip2location/ diff --git a/backend/docs/CRAWL_PIPELINE.md b/backend/docs/CRAWL_PIPELINE.md index 239c2161..1c92d419 100644 --- a/backend/docs/CRAWL_PIPELINE.md +++ b/backend/docs/CRAWL_PIPELINE.md @@ -275,6 +275,22 @@ Store metadata: --- +## Worker Roles + +Workers pull tasks from the `worker_tasks` queue based on their assigned role. + +| Role | Name | Description | Handler | +|------|------|-------------|---------| +| `product_resync` | Product Resync | Re-crawl dispensary products for price/stock changes | `handleProductResync` | +| `product_discovery` | Product Discovery | Initial product discovery for new dispensaries | `handleProductDiscovery` | +| `store_discovery` | Store Discovery | Discover new dispensary locations | `handleStoreDiscovery` | +| `entry_point_discovery` | Entry Point Discovery | Resolve platform IDs from menu URLs | `handleEntryPointDiscovery` | +| `analytics_refresh` | Analytics Refresh | Refresh materialized views and analytics | `handleAnalyticsRefresh` | + +**API Endpoint:** `GET /api/worker-registry/roles` + +--- + ## Scheduling Crawls are scheduled via `worker_tasks` table: @@ -282,8 +298,219 @@ Crawls are scheduled via `worker_tasks` table: | Role | Frequency | Description | |------|-----------|-------------| | `product_resync` | Every 4 hours | Regular product refresh | +| `product_discovery` | On-demand | First crawl for new stores | | `entry_point_discovery` | On-demand | New store setup | | `store_discovery` | Daily | Find new stores | +| `analytics_refresh` | Daily | Refresh analytics materialized views | + +--- + +## Priority & On-Demand Tasks + +Tasks are claimed by workers in order of **priority DESC, created_at ASC**. + +### Priority Levels + +| Priority | Use Case | Example | +|----------|----------|---------| +| 0 | Scheduled/batch tasks | Daily product_resync generation | +| 10 | On-demand/chained tasks | entry_point → product_discovery | +| Higher | Urgent/manual triggers | Admin-triggered immediate crawl | + +### Task Chaining + +When a task completes, the system automatically creates follow-up tasks: + +``` +store_discovery (completed) + └─► entry_point_discovery (priority: 10) for each new store + +entry_point_discovery (completed, success) + └─► product_discovery (priority: 10) for that store + +product_discovery (completed) + └─► [no chain] Store enters regular resync schedule +``` + +### On-Demand Task Creation + +Use the task service to create high-priority tasks: + +```typescript +// Create immediate product resync for a store +await taskService.createTask({ + role: 'product_resync', + dispensary_id: 123, + platform: 'dutchie', + priority: 20, // Higher than batch tasks +}); + +// Convenience methods with default high priority (10) +await taskService.createEntryPointTask(dispensaryId, 'dutchie'); +await taskService.createProductDiscoveryTask(dispensaryId, 'dutchie'); +await taskService.createStoreDiscoveryTask('dutchie', 'AZ'); +``` + +### Claim Function + +The `claim_task()` SQL function atomically claims tasks: +- Respects priority ordering (higher = first) +- Uses `FOR UPDATE SKIP LOCKED` for concurrency +- Prevents multiple active tasks per store + +--- + +## Image Storage + +Images are downloaded from Dutchie's AWS S3 and stored locally with on-demand resizing. + +### Storage Path +``` +/storage/images/products/////image-.webp +/storage/images/brands//logo-.webp +``` + +**Example:** +``` +/storage/images/products/az/az-deeply-rooted/bud-bros/6913e3cd444eac3935e928b9/image-ae38b1f9.webp +``` + +### Image Proxy API +Served via `/img/*` with on-demand resizing using **sharp**: + +``` +GET /img/products/az/az-deeply-rooted/bud-bros/6913e3cd444eac3935e928b9/image-ae38b1f9.webp?w=200 +``` + +| Param | Description | +|-------|-------------| +| `w` | Width in pixels (max 4000) | +| `h` | Height in pixels (max 4000) | +| `q` | Quality 1-100 (default 80) | +| `fit` | cover, contain, fill, inside, outside | +| `blur` | Blur sigma (0.3-1000) | +| `gray` | Grayscale (1 = enabled) | +| `format` | webp, jpeg, png, avif (default webp) | + +### Key Files +| File | Purpose | +|------|---------| +| `src/utils/image-storage.ts` | Download & save images to local filesystem | +| `src/routes/image-proxy.ts` | On-demand resize/transform at `/img/*` | + +### Download Rules + +| Scenario | Image Action | +|----------|--------------| +| **New product (first crawl)** | Download if `primaryImageUrl` exists | +| **Existing product (refresh)** | Download only if `local_image_path` is NULL (backfill) | +| **Product already has local image** | Skip download entirely | + +**Logic:** +- Images are downloaded **once** and never re-downloaded on subsequent crawls +- `skipIfExists: true` - filesystem check prevents re-download even if queued +- First crawl: all products get images +- Refresh crawl: only new products or products missing local images + +### Storage Rules +- **NO MinIO** - local filesystem only (`STORAGE_DRIVER=local`) +- Store full resolution, resize on-demand via `/img` proxy +- Convert to webp for consistency using **sharp** +- Preserve original Dutchie URL as fallback in `image_url` column +- Local path stored in `local_image_path` column + +--- + +## Stealth & Anti-Detection + +**PROXIES ARE REQUIRED** - Workers will fail to start if no active proxies are available in the database. All HTTP requests to Dutchie go through a proxy. + +Workers automatically initialize anti-detection systems on startup. + +### Components + +| Component | Purpose | Source | +|-----------|---------|--------| +| **CrawlRotator** | Coordinates proxy + UA rotation | `src/services/crawl-rotator.ts` | +| **ProxyRotator** | Round-robin proxy selection, health tracking | `src/services/crawl-rotator.ts` | +| **UserAgentRotator** | Cycles through realistic browser fingerprints | `src/services/crawl-rotator.ts` | +| **Dutchie Client** | Curl-based HTTP with auto-retry on 403 | `src/platforms/dutchie/client.ts` | + +### Initialization Flow + +``` +Worker Start + │ + ├─► initializeStealth() + │ │ + │ ├─► CrawlRotator.initialize() + │ │ └─► Load proxies from `proxies` table + │ │ + │ └─► setCrawlRotator(rotator) + │ └─► Wire to Dutchie client + │ + └─► Process tasks... +``` + +### Stealth Session (per task) + +Each crawl task starts a stealth session: + +```typescript +// In product-refresh.ts, entry-point-discovery.ts +const session = startSession(dispensary.state || 'AZ', 'America/Phoenix'); +``` + +This creates a new identity with: +- **Random fingerprint:** Chrome/Firefox/Safari/Edge on Win/Mac/Linux +- **Accept-Language:** Matches timezone (e.g., `America/Phoenix` → `en-US,en;q=0.9`) +- **sec-ch-ua headers:** Proper Client Hints for the browser profile + +### On 403 Block + +When Dutchie returns 403, the client automatically: + +1. Records failure on current proxy (increments `failure_count`) +2. If proxy has 5+ failures, deactivates it +3. Rotates to next healthy proxy +4. Rotates fingerprint +5. Retries the request + +### Proxy Table Schema + +```sql +CREATE TABLE proxies ( + id SERIAL PRIMARY KEY, + host VARCHAR(255) NOT NULL, + port INTEGER NOT NULL, + username VARCHAR(100), + password VARCHAR(100), + protocol VARCHAR(10) DEFAULT 'http', -- http, https, socks5 + is_active BOOLEAN DEFAULT true, + last_used_at TIMESTAMPTZ, + failure_count INTEGER DEFAULT 0, + success_count INTEGER DEFAULT 0, + avg_response_time_ms INTEGER, + last_failure_at TIMESTAMPTZ, + last_error TEXT +); +``` + +### Configuration + +Proxies are mandatory. There is no environment variable to disable them. Workers will refuse to start without active proxies in the database. + +### Fingerprints Available + +The client includes 6 browser fingerprints: +- Chrome 131 on Windows +- Chrome 131 on macOS +- Chrome 120 on Windows +- Firefox 133 on Windows +- Safari 17.2 on macOS +- Edge 131 on Windows + +Each includes proper `sec-ch-ua`, `sec-ch-ua-platform`, and `sec-ch-ua-mobile` headers. --- @@ -293,6 +520,7 @@ Crawls are scheduled via `worker_tasks` table: - **Normalization errors:** Logged as warnings, continue with valid products - **Image download errors:** Non-fatal, logged, continue - **Database errors:** Task fails, will be retried +- **403 blocks:** Auto-rotate proxy + fingerprint, retry (up to 3 retries) --- @@ -305,4 +533,6 @@ Crawls are scheduled via `worker_tasks` table: | `src/platforms/dutchie/index.ts` | GraphQL client, session management | | `src/hydration/normalizers/dutchie.ts` | Payload normalization | | `src/hydration/canonical-upsert.ts` | Database upsert logic | +| `src/utils/image-storage.ts` | Image download and local storage | +| `src/routes/image-proxy.ts` | On-demand image resizing | | `migrations/075_consecutive_misses.sql` | OOS tracking column | diff --git a/backend/k8s/cronjob-ip2location.yaml b/backend/k8s/cronjob-ip2location.yaml new file mode 100644 index 00000000..af5788de --- /dev/null +++ b/backend/k8s/cronjob-ip2location.yaml @@ -0,0 +1,69 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: ip2location-update + namespace: default +spec: + # Run on the 1st of every month at 3am UTC + schedule: "0 3 1 * *" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + template: + spec: + containers: + - name: ip2location-updater + image: curlimages/curl:latest + command: + - /bin/sh + - -c + - | + set -e + echo "Downloading IP2Location LITE DB5..." + + # Download to temp + cd /tmp + curl -L -o ip2location.zip "https://www.ip2location.com/download/?token=${IP2LOCATION_TOKEN}&file=DB5LITEBIN" + + # Extract + unzip -o ip2location.zip + + # Find and copy the BIN file + BIN_FILE=$(ls *.BIN 2>/dev/null | head -1) + if [ -z "$BIN_FILE" ]; then + echo "ERROR: No BIN file found" + exit 1 + fi + + # Copy to shared volume + cp "$BIN_FILE" /data/IP2LOCATION-LITE-DB5.BIN + + echo "Done! Database updated: /data/IP2LOCATION-LITE-DB5.BIN" + env: + - name: IP2LOCATION_TOKEN + valueFrom: + secretKeyRef: + name: dutchie-backend-secret + key: IP2LOCATION_TOKEN + volumeMounts: + - name: ip2location-data + mountPath: /data + restartPolicy: OnFailure + volumes: + - name: ip2location-data + persistentVolumeClaim: + claimName: ip2location-pvc +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ip2location-pvc + namespace: default +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Mi diff --git a/backend/k8s/deployment.yaml b/backend/k8s/deployment.yaml index 580cd550..e48fc2c1 100644 --- a/backend/k8s/deployment.yaml +++ b/backend/k8s/deployment.yaml @@ -26,6 +26,12 @@ spec: name: dutchie-backend-config - secretRef: name: dutchie-backend-secret + env: + - name: IP2LOCATION_DB_PATH + value: /data/ip2location/IP2LOCATION-LITE-DB5.BIN + volumeMounts: + - name: ip2location-data + mountPath: /data/ip2location resources: requests: memory: "256Mi" @@ -45,3 +51,7 @@ spec: port: 3010 initialDelaySeconds: 5 periodSeconds: 5 + volumes: + - name: ip2location-data + persistentVolumeClaim: + claimName: ip2location-pvc diff --git a/backend/migrations/076_visitor_analytics.sql b/backend/migrations/076_visitor_analytics.sql new file mode 100644 index 00000000..a74cf19f --- /dev/null +++ b/backend/migrations/076_visitor_analytics.sql @@ -0,0 +1,71 @@ +-- Visitor location analytics for Findagram +-- Tracks visitor locations to understand popular areas + +CREATE TABLE IF NOT EXISTS visitor_locations ( + id SERIAL PRIMARY KEY, + + -- Location data (from IP lookup) + ip_hash VARCHAR(64), -- Hashed IP for privacy (SHA256) + city VARCHAR(100), + state VARCHAR(100), + state_code VARCHAR(10), + country VARCHAR(100), + country_code VARCHAR(10), + latitude DECIMAL(10, 7), + longitude DECIMAL(10, 7), + + -- Visit metadata + domain VARCHAR(50) NOT NULL, -- 'findagram.co', 'findadispo.com', etc. + page_path VARCHAR(255), -- '/products', '/dispensaries/123', etc. + referrer VARCHAR(500), + user_agent VARCHAR(500), + + -- Session tracking + session_id VARCHAR(64), -- For grouping page views in a session + + -- Timestamps + created_at TIMESTAMPTZ DEFAULT NOW() +); + +-- Indexes for analytics queries +CREATE INDEX IF NOT EXISTS idx_visitor_locations_domain ON visitor_locations(domain); +CREATE INDEX IF NOT EXISTS idx_visitor_locations_city_state ON visitor_locations(city, state_code); +CREATE INDEX IF NOT EXISTS idx_visitor_locations_created_at ON visitor_locations(created_at); +CREATE INDEX IF NOT EXISTS idx_visitor_locations_session ON visitor_locations(session_id); + +-- Aggregated daily stats (materialized for performance) +CREATE TABLE IF NOT EXISTS visitor_location_stats ( + id SERIAL PRIMARY KEY, + date DATE NOT NULL, + domain VARCHAR(50) NOT NULL, + city VARCHAR(100), + state VARCHAR(100), + state_code VARCHAR(10), + country_code VARCHAR(10), + + -- Metrics + visit_count INTEGER DEFAULT 0, + unique_sessions INTEGER DEFAULT 0, + + UNIQUE(date, domain, city, state_code, country_code) +); + +CREATE INDEX IF NOT EXISTS idx_visitor_stats_date ON visitor_location_stats(date); +CREATE INDEX IF NOT EXISTS idx_visitor_stats_domain ON visitor_location_stats(domain); +CREATE INDEX IF NOT EXISTS idx_visitor_stats_state ON visitor_location_stats(state_code); + +-- View for easy querying of top locations +CREATE OR REPLACE VIEW v_top_visitor_locations AS +SELECT + domain, + city, + state, + state_code, + country_code, + COUNT(*) as total_visits, + COUNT(DISTINCT session_id) as unique_sessions, + MAX(created_at) as last_visit +FROM visitor_locations +WHERE created_at > NOW() - INTERVAL '30 days' +GROUP BY domain, city, state, state_code, country_code +ORDER BY total_visits DESC; diff --git a/backend/migrations/076_worker_registry.sql b/backend/migrations/076_worker_registry.sql new file mode 100644 index 00000000..acab6885 --- /dev/null +++ b/backend/migrations/076_worker_registry.sql @@ -0,0 +1,141 @@ +-- Migration 076: Worker Registry for Dynamic Workers +-- Workers register on startup, receive a friendly name, and report heartbeats + +-- Name pool for workers (expandable, no hardcoding) +CREATE TABLE IF NOT EXISTS worker_name_pool ( + id SERIAL PRIMARY KEY, + name VARCHAR(50) UNIQUE NOT NULL, + in_use BOOLEAN DEFAULT FALSE, + assigned_to VARCHAR(100), -- worker_id + assigned_at TIMESTAMPTZ, + created_at TIMESTAMPTZ DEFAULT NOW() +); + +-- Seed with initial names (can add more via API) +INSERT INTO worker_name_pool (name) VALUES + ('Alice'), ('Bella'), ('Clara'), ('Diana'), ('Elena'), + ('Fiona'), ('Grace'), ('Hazel'), ('Iris'), ('Julia'), + ('Katie'), ('Luna'), ('Mia'), ('Nora'), ('Olive'), + ('Pearl'), ('Quinn'), ('Rosa'), ('Sara'), ('Tara'), + ('Uma'), ('Vera'), ('Wendy'), ('Xena'), ('Yuki'), ('Zara'), + ('Amber'), ('Blake'), ('Coral'), ('Dawn'), ('Echo'), + ('Fleur'), ('Gem'), ('Haven'), ('Ivy'), ('Jade'), + ('Kira'), ('Lotus'), ('Maple'), ('Nova'), ('Onyx'), + ('Pixel'), ('Quest'), ('Raven'), ('Sage'), ('Terra'), + ('Unity'), ('Violet'), ('Willow'), ('Xylo'), ('Yara'), ('Zen') +ON CONFLICT (name) DO NOTHING; + +-- Worker registry - tracks active workers +CREATE TABLE IF NOT EXISTS worker_registry ( + id SERIAL PRIMARY KEY, + worker_id VARCHAR(100) UNIQUE NOT NULL, -- e.g., "pod-abc123" or uuid + friendly_name VARCHAR(50), -- assigned from pool + role VARCHAR(50) NOT NULL, -- task role + pod_name VARCHAR(100), -- k8s pod name + hostname VARCHAR(100), -- machine hostname + ip_address VARCHAR(50), -- worker IP + status VARCHAR(20) DEFAULT 'starting', -- starting, active, idle, offline, terminated + started_at TIMESTAMPTZ DEFAULT NOW(), + last_heartbeat_at TIMESTAMPTZ DEFAULT NOW(), + last_task_at TIMESTAMPTZ, + tasks_completed INTEGER DEFAULT 0, + tasks_failed INTEGER DEFAULT 0, + current_task_id INTEGER, + metadata JSONB DEFAULT '{}', + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW() +); + +-- Indexes for worker registry +CREATE INDEX IF NOT EXISTS idx_worker_registry_status ON worker_registry(status); +CREATE INDEX IF NOT EXISTS idx_worker_registry_role ON worker_registry(role); +CREATE INDEX IF NOT EXISTS idx_worker_registry_heartbeat ON worker_registry(last_heartbeat_at); + +-- Function to assign a name to a new worker +CREATE OR REPLACE FUNCTION assign_worker_name(p_worker_id VARCHAR(100)) +RETURNS VARCHAR(50) AS $$ +DECLARE + v_name VARCHAR(50); +BEGIN + -- Try to get an unused name + UPDATE worker_name_pool + SET in_use = TRUE, assigned_to = p_worker_id, assigned_at = NOW() + WHERE id = ( + SELECT id FROM worker_name_pool + WHERE in_use = FALSE + ORDER BY RANDOM() + LIMIT 1 + FOR UPDATE SKIP LOCKED + ) + RETURNING name INTO v_name; + + -- If no names available, generate one + IF v_name IS NULL THEN + v_name := 'Worker-' || SUBSTRING(p_worker_id FROM 1 FOR 8); + END IF; + + RETURN v_name; +END; +$$ LANGUAGE plpgsql; + +-- Function to release a worker's name back to the pool +CREATE OR REPLACE FUNCTION release_worker_name(p_worker_id VARCHAR(100)) +RETURNS VOID AS $$ +BEGIN + UPDATE worker_name_pool + SET in_use = FALSE, assigned_to = NULL, assigned_at = NULL + WHERE assigned_to = p_worker_id; +END; +$$ LANGUAGE plpgsql; + +-- Function to mark stale workers as offline +CREATE OR REPLACE FUNCTION mark_stale_workers(stale_threshold_minutes INTEGER DEFAULT 5) +RETURNS INTEGER AS $$ +DECLARE + v_count INTEGER; +BEGIN + UPDATE worker_registry + SET status = 'offline', updated_at = NOW() + WHERE status IN ('active', 'idle', 'starting') + AND last_heartbeat_at < NOW() - (stale_threshold_minutes || ' minutes')::INTERVAL + RETURNING COUNT(*) INTO v_count; + + -- Release names from offline workers + PERFORM release_worker_name(worker_id) + FROM worker_registry + WHERE status = 'offline' + AND last_heartbeat_at < NOW() - INTERVAL '30 minutes'; + + RETURN COALESCE(v_count, 0); +END; +$$ LANGUAGE plpgsql; + +-- View for dashboard +CREATE OR REPLACE VIEW v_active_workers AS +SELECT + wr.id, + wr.worker_id, + wr.friendly_name, + wr.role, + wr.status, + wr.pod_name, + wr.hostname, + wr.started_at, + wr.last_heartbeat_at, + wr.last_task_at, + wr.tasks_completed, + wr.tasks_failed, + wr.current_task_id, + EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat, + CASE + WHEN wr.status = 'offline' THEN 'offline' + WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale' + WHEN wr.current_task_id IS NOT NULL THEN 'busy' + ELSE 'ready' + END as health_status +FROM worker_registry wr +WHERE wr.status != 'terminated' +ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC; + +COMMENT ON TABLE worker_registry IS 'Tracks all workers that have registered with the system'; +COMMENT ON TABLE worker_name_pool IS 'Pool of friendly names for workers - expandable via API'; diff --git a/backend/migrations/077_click_events_location.sql b/backend/migrations/077_click_events_location.sql new file mode 100644 index 00000000..f6639fb6 --- /dev/null +++ b/backend/migrations/077_click_events_location.sql @@ -0,0 +1,35 @@ +-- Migration: Add visitor location and dispensary name to click events +-- Captures where visitors are clicking from and which dispensary + +-- Add visitor location columns +ALTER TABLE product_click_events +ADD COLUMN IF NOT EXISTS visitor_city VARCHAR(100); + +ALTER TABLE product_click_events +ADD COLUMN IF NOT EXISTS visitor_state VARCHAR(10); + +ALTER TABLE product_click_events +ADD COLUMN IF NOT EXISTS visitor_lat DECIMAL(10, 7); + +ALTER TABLE product_click_events +ADD COLUMN IF NOT EXISTS visitor_lng DECIMAL(10, 7); + +-- Add dispensary name for easier reporting +ALTER TABLE product_click_events +ADD COLUMN IF NOT EXISTS dispensary_name VARCHAR(255); + +-- Create index for location-based analytics +CREATE INDEX IF NOT EXISTS idx_product_click_events_visitor_state +ON product_click_events(visitor_state) +WHERE visitor_state IS NOT NULL; + +CREATE INDEX IF NOT EXISTS idx_product_click_events_visitor_city +ON product_click_events(visitor_city) +WHERE visitor_city IS NOT NULL; + +-- Add comments +COMMENT ON COLUMN product_click_events.visitor_city IS 'City where the visitor is located (from IP geolocation)'; +COMMENT ON COLUMN product_click_events.visitor_state IS 'State where the visitor is located (from IP geolocation)'; +COMMENT ON COLUMN product_click_events.visitor_lat IS 'Visitor latitude (from IP geolocation)'; +COMMENT ON COLUMN product_click_events.visitor_lng IS 'Visitor longitude (from IP geolocation)'; +COMMENT ON COLUMN product_click_events.dispensary_name IS 'Name of the dispensary (denormalized for easier reporting)'; diff --git a/backend/node_modules/.package-lock.json b/backend/node_modules/.package-lock.json index 1b115bb0..8d3c398d 100644 --- a/backend/node_modules/.package-lock.json +++ b/backend/node_modules/.package-lock.json @@ -1026,6 +1026,17 @@ "url": "https://github.com/sponsors/fb55" } }, + "node_modules/csv-parser": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-3.2.0.tgz", + "integrity": "sha512-fgKbp+AJbn1h2dcAHKIdKNSSjfp43BZZykXsCjzALjKy80VXQNHPFJ6T9Afwdzoj24aMkq8GwDS7KGcDPpejrA==", + "bin": { + "csv-parser": "bin/csv-parser" + }, + "engines": { + "node": ">= 10" + } + }, "node_modules/data-uri-to-buffer": { "version": "6.0.2", "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz", @@ -2235,6 +2246,14 @@ "node": ">= 12" } }, + "node_modules/ip2location-nodejs": { + "version": "9.7.0", + "resolved": "https://registry.npmjs.org/ip2location-nodejs/-/ip2location-nodejs-9.7.0.tgz", + "integrity": "sha512-eQ4T5TXm1cx0+pQcRycPiuaiRuoDEMd9O89Be7Ugk555qi9UY9enXSznkkqr3kQRyUaXx7zj5dORC5LGTPOttA==", + "dependencies": { + "csv-parser": "^3.0.0" + } + }, "node_modules/ipaddr.js": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.2.0.tgz", diff --git a/backend/package-lock.json b/backend/package-lock.json index 1d03ed60..826ddc7e 100644 --- a/backend/package-lock.json +++ b/backend/package-lock.json @@ -21,6 +21,7 @@ "helmet": "^7.1.0", "https-proxy-agent": "^7.0.2", "ioredis": "^5.8.2", + "ip2location-nodejs": "^9.7.0", "ipaddr.js": "^2.2.0", "jsonwebtoken": "^9.0.2", "minio": "^7.1.3", @@ -1531,6 +1532,17 @@ "url": "https://github.com/sponsors/fb55" } }, + "node_modules/csv-parser": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-3.2.0.tgz", + "integrity": "sha512-fgKbp+AJbn1h2dcAHKIdKNSSjfp43BZZykXsCjzALjKy80VXQNHPFJ6T9Afwdzoj24aMkq8GwDS7KGcDPpejrA==", + "bin": { + "csv-parser": "bin/csv-parser" + }, + "engines": { + "node": ">= 10" + } + }, "node_modules/data-uri-to-buffer": { "version": "6.0.2", "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz", @@ -2754,6 +2766,14 @@ "node": ">= 12" } }, + "node_modules/ip2location-nodejs": { + "version": "9.7.0", + "resolved": "https://registry.npmjs.org/ip2location-nodejs/-/ip2location-nodejs-9.7.0.tgz", + "integrity": "sha512-eQ4T5TXm1cx0+pQcRycPiuaiRuoDEMd9O89Be7Ugk555qi9UY9enXSznkkqr3kQRyUaXx7zj5dORC5LGTPOttA==", + "dependencies": { + "csv-parser": "^3.0.0" + } + }, "node_modules/ipaddr.js": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.2.0.tgz", diff --git a/backend/package.json b/backend/package.json index 147b90e7..082651b5 100755 --- a/backend/package.json +++ b/backend/package.json @@ -35,6 +35,7 @@ "helmet": "^7.1.0", "https-proxy-agent": "^7.0.2", "ioredis": "^5.8.2", + "ip2location-nodejs": "^9.7.0", "ipaddr.js": "^2.2.0", "jsonwebtoken": "^9.0.2", "minio": "^7.1.3", diff --git a/backend/scripts/download-ip2location.sh b/backend/scripts/download-ip2location.sh new file mode 100755 index 00000000..b7c64965 --- /dev/null +++ b/backend/scripts/download-ip2location.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# Download IP2Location LITE DB3 (City-level) database +# Free for commercial use with attribution +# https://lite.ip2location.com/database/db3-ip-country-region-city + +set -e + +DATA_DIR="${1:-./data/ip2location}" +DB_FILE="IP2LOCATION-LITE-DB3.BIN" + +mkdir -p "$DATA_DIR" +cd "$DATA_DIR" + +echo "Downloading IP2Location LITE DB3 database..." + +# IP2Location LITE DB3 - includes city, region, country, lat/lng +# You need to register at https://lite.ip2location.com/ to get a download token +# Then set IP2LOCATION_TOKEN environment variable + +if [ -z "$IP2LOCATION_TOKEN" ]; then + echo "" + echo "ERROR: IP2LOCATION_TOKEN not set" + echo "" + echo "To download the database:" + echo "1. Register free at https://lite.ip2location.com/" + echo "2. Get your download token from the dashboard" + echo "3. Run: IP2LOCATION_TOKEN=your_token ./scripts/download-ip2location.sh" + echo "" + exit 1 +fi + +# Download DB3.LITE (IPv4 + City) +DOWNLOAD_URL="https://www.ip2location.com/download/?token=${IP2LOCATION_TOKEN}&file=DB3LITEBIN" + +echo "Downloading from IP2Location..." +curl -L -o ip2location.zip "$DOWNLOAD_URL" + +echo "Extracting..." +unzip -o ip2location.zip + +# Rename to standard name +if [ -f "IP2LOCATION-LITE-DB3.BIN" ]; then + echo "Database ready: $DATA_DIR/IP2LOCATION-LITE-DB3.BIN" +elif [ -f "IP-COUNTRY-REGION-CITY.BIN" ]; then + mv "IP-COUNTRY-REGION-CITY.BIN" "$DB_FILE" + echo "Database ready: $DATA_DIR/$DB_FILE" +else + # Find whatever BIN file was extracted + BIN_FILE=$(ls *.BIN 2>/dev/null | head -1) + if [ -n "$BIN_FILE" ]; then + mv "$BIN_FILE" "$DB_FILE" + echo "Database ready: $DATA_DIR/$DB_FILE" + else + echo "ERROR: No BIN file found in archive" + ls -la + exit 1 + fi +fi + +# Cleanup +rm -f ip2location.zip *.txt LICENSE* README* + +echo "" +echo "Done! Database saved to: $DATA_DIR/$DB_FILE" +echo "Update monthly by re-running this script." diff --git a/backend/src/discovery/discovery-crawler.ts b/backend/src/discovery/discovery-crawler.ts index 117bc759..24e191fb 100644 --- a/backend/src/discovery/discovery-crawler.ts +++ b/backend/src/discovery/discovery-crawler.ts @@ -191,6 +191,23 @@ export async function runFullDiscovery( } } + // Step 5: Detect dropped stores (in DB but not in discovery results) + if (!dryRun) { + console.log('\n[Discovery] Step 5: Detecting dropped stores...'); + const droppedResult = await detectDroppedStores(pool, stateCode); + if (droppedResult.droppedCount > 0) { + console.log(`[Discovery] Found ${droppedResult.droppedCount} dropped stores:`); + droppedResult.droppedStores.slice(0, 10).forEach(s => { + console.log(` - ${s.name} (${s.city}, ${s.state}) - last seen: ${s.lastSeenAt}`); + }); + if (droppedResult.droppedCount > 10) { + console.log(` ... and ${droppedResult.droppedCount - 10} more`); + } + } else { + console.log(`[Discovery] No dropped stores detected`); + } + } + return { cities: cityResult, locations: locationResults, @@ -200,6 +217,107 @@ export async function runFullDiscovery( }; } +// ============================================================ +// DROPPED STORE DETECTION +// ============================================================ + +export interface DroppedStoreResult { + droppedCount: number; + droppedStores: Array<{ + id: number; + name: string; + city: string; + state: string; + platformDispensaryId: string; + lastSeenAt: string; + }>; +} + +/** + * Detect stores that exist in dispensaries but were not found in discovery. + * Marks them as status='dropped' for manual review. + * + * A store is considered "dropped" if: + * 1. It has a platform_dispensary_id (was verified via Dutchie) + * 2. It was NOT seen in the latest discovery crawl (last_seen_at in discovery < 24h ago) + * 3. It's currently marked as 'open' status + */ +export async function detectDroppedStores( + pool: Pool, + stateCode?: string +): Promise { + // Find dispensaries that: + // 1. Have platform_dispensary_id (verified Dutchie stores) + // 2. Are currently 'open' status + // 3. Have a linked discovery record that wasn't seen in the last discovery run + // (last_seen_at in dutchie_discovery_locations is older than 24 hours) + const params: any[] = []; + let stateFilter = ''; + + if (stateCode) { + stateFilter = ` AND d.state = $1`; + params.push(stateCode); + } + + const query = ` + WITH recently_seen AS ( + SELECT DISTINCT platform_location_id + FROM dutchie_discovery_locations + WHERE last_seen_at > NOW() - INTERVAL '24 hours' + AND active = true + ) + SELECT + d.id, + d.name, + d.city, + d.state, + d.platform_dispensary_id, + d.updated_at as last_seen_at + FROM dispensaries d + WHERE d.platform_dispensary_id IS NOT NULL + AND d.platform = 'dutchie' + AND (d.status = 'open' OR d.status IS NULL) + AND d.crawl_enabled = true + AND d.platform_dispensary_id NOT IN (SELECT platform_location_id FROM recently_seen) + ${stateFilter} + ORDER BY d.name + `; + + const result = await pool.query(query, params); + const droppedStores = result.rows; + + // Mark these stores as 'dropped' status + if (droppedStores.length > 0) { + const ids = droppedStores.map(s => s.id); + await pool.query(` + UPDATE dispensaries + SET status = 'dropped', updated_at = NOW() + WHERE id = ANY($1::int[]) + `, [ids]); + + // Log to promotion log for audit + for (const store of droppedStores) { + await pool.query(` + INSERT INTO dutchie_promotion_log + (dispensary_id, action, state_code, store_name, triggered_by) + VALUES ($1, 'dropped', $2, $3, 'discovery_detection') + `, [store.id, store.state, store.name]); + } + } + + return { + droppedCount: droppedStores.length, + droppedStores: droppedStores.map(s => ({ + id: s.id, + name: s.name, + city: s.city, + state: s.state, + platformDispensaryId: s.platform_dispensary_id, + lastSeenAt: s.last_seen_at, + })), + }; +} + // ============================================================ // SINGLE CITY DISCOVERY // ============================================================ diff --git a/backend/src/index.ts b/backend/src/index.ts index 5ac41c47..6a8d1d3e 100755 --- a/backend/src/index.ts +++ b/backend/src/index.ts @@ -140,6 +140,7 @@ import clickAnalyticsRoutes from './routes/click-analytics'; import seoRoutes from './routes/seo'; import priceAnalyticsRoutes from './routes/price-analytics'; import tasksRoutes from './routes/tasks'; +import workerRegistryRoutes from './routes/worker-registry'; // Mark requests from trusted domains (cannaiq.co, findagram.co, findadispo.com) // These domains can access the API without authentication @@ -216,6 +217,10 @@ console.log('[Workers] Routes registered at /api/workers, /api/monitor, and /api app.use('/api/tasks', tasksRoutes); console.log('[Tasks] Routes registered at /api/tasks'); +// Worker registry - dynamic worker registration, heartbeats, and name management +app.use('/api/worker-registry', workerRegistryRoutes); +console.log('[WorkerRegistry] Routes registered at /api/worker-registry'); + // Phase 3: Analytics V2 - Enhanced analytics with rec/med state segmentation try { const analyticsV2Router = createAnalyticsV2Router(getPool()); diff --git a/backend/src/routes/categories.ts b/backend/src/routes/categories.ts index 69f1cb37..7ad9d455 100644 --- a/backend/src/routes/categories.ts +++ b/backend/src/routes/categories.ts @@ -5,33 +5,37 @@ import { pool } from '../db/pool'; const router = Router(); router.use(authMiddleware); -// Get categories (flat list) +// Get categories (flat list) - derived from actual product data router.get('/', async (req, res) => { try { - const { store_id } = req.query; - + const { store_id, in_stock_only } = req.query; + let query = ` SELECT - c.*, - COUNT(DISTINCT p.id) as product_count, - pc.name as parent_name - FROM categories c - LEFT JOIN store_products p ON c.name = p.category_raw - LEFT JOIN categories pc ON c.parent_id = pc.id + category_raw as name, + category_raw as slug, + COUNT(*) as product_count, + COUNT(*) FILTER (WHERE is_in_stock = true) as in_stock_count + FROM store_products + WHERE category_raw IS NOT NULL `; - + const params: any[] = []; - + if (store_id) { - query += ' WHERE c.store_id = $1'; params.push(store_id); + query += ` AND dispensary_id = $${params.length}`; } - + + if (in_stock_only === 'true') { + query += ` AND is_in_stock = true`; + } + query += ` - GROUP BY c.id, pc.name - ORDER BY c.display_order, c.name + GROUP BY category_raw + ORDER BY category_raw `; - + const result = await pool.query(query, params); res.json({ categories: result.rows }); } catch (error) { @@ -40,50 +44,86 @@ router.get('/', async (req, res) => { } }); -// Get category tree (hierarchical) +// Get category tree (hierarchical) - category -> subcategory structure from product data router.get('/tree', async (req, res) => { try { - const { store_id } = req.query; - - if (!store_id) { - return res.status(400).json({ error: 'store_id is required' }); - } - - // Get all categories for the store - const result = await pool.query(` - SELECT - c.*, - COUNT(DISTINCT p.id) as product_count - FROM categories c - LEFT JOIN store_products p ON c.name = p.category_raw AND p.is_in_stock = true AND p.dispensary_id = $1 - WHERE c.store_id = $1 - GROUP BY c.id - ORDER BY c.display_order, c.name - `, [store_id]); - - // Build tree structure - const categories = result.rows; - const categoryMap = new Map(); - const tree: any[] = []; - - // First pass: create map - categories.forEach((cat: { id: number; parent_id?: number }) => { - categoryMap.set(cat.id, { ...cat, children: [] }); - }); + const { store_id, in_stock_only } = req.query; - // Second pass: build tree - categories.forEach((cat: { id: number; parent_id?: number }) => { - const node = categoryMap.get(cat.id); - if (cat.parent_id) { - const parent = categoryMap.get(cat.parent_id); - if (parent) { - parent.children.push(node); - } - } else { - tree.push(node); + // Get category + subcategory combinations with counts + let query = ` + SELECT + category_raw as category, + subcategory_raw as subcategory, + COUNT(*) as product_count, + COUNT(*) FILTER (WHERE is_in_stock = true) as in_stock_count + FROM store_products + WHERE category_raw IS NOT NULL + `; + + const params: any[] = []; + + if (store_id) { + params.push(store_id); + query += ` AND dispensary_id = $${params.length}`; + } + + if (in_stock_only === 'true') { + query += ` AND is_in_stock = true`; + } + + query += ` + GROUP BY category_raw, subcategory_raw + ORDER BY category_raw, subcategory_raw + `; + + const result = await pool.query(query, params); + + // Build tree structure: category -> subcategories + const categoryMap = new Map; + }>(); + + for (const row of result.rows) { + const category = row.category; + const subcategory = row.subcategory; + const count = parseInt(row.product_count); + const inStockCount = parseInt(row.in_stock_count); + + if (!categoryMap.has(category)) { + categoryMap.set(category, { + name: category, + slug: category.toLowerCase().replace(/\s+/g, '-'), + product_count: 0, + in_stock_count: 0, + subcategories: [] + }); } - }); - + + const cat = categoryMap.get(category)!; + cat.product_count += count; + cat.in_stock_count += inStockCount; + + if (subcategory) { + cat.subcategories.push({ + name: subcategory, + slug: subcategory.toLowerCase().replace(/\s+/g, '-'), + product_count: count, + in_stock_count: inStockCount + }); + } + } + + const tree = Array.from(categoryMap.values()); + res.json({ tree }); } catch (error) { console.error('Error fetching category tree:', error); @@ -91,4 +131,91 @@ router.get('/tree', async (req, res) => { } }); +// Get all unique subcategories for a category +router.get('/:category/subcategories', async (req, res) => { + try { + const { category } = req.params; + const { store_id, in_stock_only } = req.query; + + let query = ` + SELECT + subcategory_raw as name, + subcategory_raw as slug, + COUNT(*) as product_count, + COUNT(*) FILTER (WHERE is_in_stock = true) as in_stock_count + FROM store_products + WHERE category_raw = $1 + AND subcategory_raw IS NOT NULL + `; + + const params: any[] = [category]; + + if (store_id) { + params.push(store_id); + query += ` AND dispensary_id = $${params.length}`; + } + + if (in_stock_only === 'true') { + query += ` AND is_in_stock = true`; + } + + query += ` + GROUP BY subcategory_raw + ORDER BY subcategory_raw + `; + + const result = await pool.query(query, params); + res.json({ + category, + subcategories: result.rows + }); + } catch (error) { + console.error('Error fetching subcategories:', error); + res.status(500).json({ error: 'Failed to fetch subcategories' }); + } +}); + +// Get global category summary (across all stores) +router.get('/summary', async (req, res) => { + try { + const { state } = req.query; + + let query = ` + SELECT + sp.category_raw as category, + COUNT(DISTINCT sp.id) as product_count, + COUNT(DISTINCT sp.dispensary_id) as store_count, + COUNT(*) FILTER (WHERE sp.is_in_stock = true) as in_stock_count + FROM store_products sp + `; + + const params: any[] = []; + + if (state) { + query += ` + JOIN dispensaries d ON sp.dispensary_id = d.id + WHERE sp.category_raw IS NOT NULL + AND d.state = $1 + `; + params.push(state); + } else { + query += ` WHERE sp.category_raw IS NOT NULL`; + } + + query += ` + GROUP BY sp.category_raw + ORDER BY product_count DESC + `; + + const result = await pool.query(query, params); + res.json({ + categories: result.rows, + total_categories: result.rows.length + }); + } catch (error) { + console.error('Error fetching category summary:', error); + res.status(500).json({ error: 'Failed to fetch category summary' }); + } +}); + export default router; diff --git a/backend/src/routes/dispensaries.ts b/backend/src/routes/dispensaries.ts index 7cc5b58d..2bffb832 100644 --- a/backend/src/routes/dispensaries.ts +++ b/backend/src/routes/dispensaries.ts @@ -11,7 +11,7 @@ const VALID_MENU_TYPES = ['dutchie', 'treez', 'jane', 'weedmaps', 'leafly', 'mea // Get all dispensaries (with pagination) router.get('/', async (req, res) => { try { - const { menu_type, city, state, crawl_enabled, dutchie_verified, limit, offset, search } = req.query; + const { menu_type, city, state, crawl_enabled, dutchie_verified, status, limit, offset, search } = req.query; const pageLimit = Math.min(parseInt(limit as string) || 50, 500); const pageOffset = parseInt(offset as string) || 0; @@ -100,6 +100,12 @@ router.get('/', async (req, res) => { } } + // Filter by status (e.g., 'dropped', 'open', 'closed') + if (status) { + conditions.push(`status = $${params.length + 1}`); + params.push(status); + } + // Search filter (name, dba_name, city, company_name) if (search) { conditions.push(`(name ILIKE $${params.length + 1} OR dba_name ILIKE $${params.length + 1} OR city ILIKE $${params.length + 1})`); @@ -161,6 +167,7 @@ router.get('/stats/crawl-status', async (req, res) => { COUNT(*) FILTER (WHERE crawl_enabled = false OR crawl_enabled IS NULL) as disabled_count, COUNT(*) FILTER (WHERE dutchie_verified = true) as verified_count, COUNT(*) FILTER (WHERE dutchie_verified = false OR dutchie_verified IS NULL) as unverified_count, + COUNT(*) FILTER (WHERE status = 'dropped') as dropped_count, COUNT(*) as total_count FROM dispensaries `; @@ -190,6 +197,34 @@ router.get('/stats/crawl-status', async (req, res) => { } }); +// Get dropped stores count (for dashboard alert) +router.get('/stats/dropped', async (req, res) => { + try { + const result = await pool.query(` + SELECT + COUNT(*) as dropped_count, + json_agg(json_build_object( + 'id', id, + 'name', name, + 'city', city, + 'state', state, + 'dropped_at', updated_at + ) ORDER BY updated_at DESC) FILTER (WHERE status = 'dropped') as dropped_stores + FROM dispensaries + WHERE status = 'dropped' + `); + + const row = result.rows[0]; + res.json({ + dropped_count: parseInt(row.dropped_count) || 0, + dropped_stores: row.dropped_stores || [] + }); + } catch (error) { + console.error('Error fetching dropped stores:', error); + res.status(500).json({ error: 'Failed to fetch dropped stores' }); + } +}); + // Get single dispensary by slug or ID router.get('/:slugOrId', async (req, res) => { try { diff --git a/backend/src/routes/events.ts b/backend/src/routes/events.ts index 43473c02..bc5993b1 100644 --- a/backend/src/routes/events.ts +++ b/backend/src/routes/events.ts @@ -22,11 +22,17 @@ interface ProductClickEventPayload { store_id?: string; brand_id?: string; campaign_id?: string; + dispensary_name?: string; action: 'view' | 'open_store' | 'open_product' | 'compare' | 'other'; source: string; page_type?: string; // Page where event occurred (e.g., StoreDetailPage, BrandsIntelligence) url_path?: string; // URL path for debugging occurred_at?: string; + // Visitor location (from frontend IP geolocation) + visitor_city?: string; + visitor_state?: string; + visitor_lat?: number; + visitor_lng?: number; } /** @@ -77,13 +83,14 @@ router.post('/product-click', optionalAuthMiddleware, async (req: Request, res: // Insert the event with enhanced fields await pool.query( `INSERT INTO product_click_events - (product_id, store_id, brand_id, campaign_id, action, source, user_id, ip_address, user_agent, occurred_at, event_type, page_type, url_path, device_type) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)`, + (product_id, store_id, brand_id, campaign_id, dispensary_name, action, source, user_id, ip_address, user_agent, occurred_at, event_type, page_type, url_path, device_type, visitor_city, visitor_state, visitor_lat, visitor_lng) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19)`, [ payload.product_id, payload.store_id || null, payload.brand_id || null, payload.campaign_id || null, + payload.dispensary_name || null, payload.action, payload.source, userId, @@ -93,7 +100,11 @@ router.post('/product-click', optionalAuthMiddleware, async (req: Request, res: 'product_click', // event_type payload.page_type || null, payload.url_path || null, - deviceType + deviceType, + payload.visitor_city || null, + payload.visitor_state || null, + payload.visitor_lat || null, + payload.visitor_lng || null ] ); diff --git a/backend/src/routes/products.ts b/backend/src/routes/products.ts index 771be3cf..e9fe19cb 100755 --- a/backend/src/routes/products.ts +++ b/backend/src/routes/products.ts @@ -1,11 +1,29 @@ import { Router } from 'express'; import { authMiddleware } from '../auth/middleware'; import { pool } from '../db/pool'; -import { getImageUrl } from '../utils/minio'; const router = Router(); router.use(authMiddleware); +/** + * Convert local image path to proxy URL + * /images/products/... -> /img/products/... + */ +function getImageUrl(localPath: string): string { + if (!localPath) return ''; + // If already a full URL, return as-is + if (localPath.startsWith('http')) return localPath; + // Convert /images/ path to /img/ proxy path + if (localPath.startsWith('/images/')) { + return '/img' + localPath.substring(7); + } + // Handle paths without leading slash + if (localPath.startsWith('images/')) { + return '/img/' + localPath.substring(7); + } + return '/img/' + localPath; +} + // Freshness threshold: data older than this is considered stale const STALE_THRESHOLD_HOURS = 4; diff --git a/backend/src/routes/public-api.ts b/backend/src/routes/public-api.ts index ff22722c..7179fa8b 100644 --- a/backend/src/routes/public-api.ts +++ b/backend/src/routes/public-api.ts @@ -463,7 +463,7 @@ router.get('/products', async (req: PublicApiRequest, res: Response) => { // Filter by on special if (on_special === 'true' || on_special === '1') { - whereClause += ` AND s.is_on_special = TRUE`; + whereClause += ` AND s.special = TRUE`; } // Search by name or brand @@ -547,7 +547,7 @@ router.get('/products', async (req: PublicApiRequest, res: Response) => { const { rows: countRows } = await pool.query(` SELECT COUNT(*) as total FROM store_products p LEFT JOIN LATERAL ( - SELECT rec_min_price_cents / 100.0 as price_rec, med_min_price_cents / 100.0 as price_med, special as is_on_special FROM v_product_snapshots + SELECT rec_min_price_cents / 100.0 as price_rec, med_min_price_cents / 100.0 as price_med, special FROM v_product_snapshots WHERE store_product_id = p.id ORDER BY crawled_at DESC LIMIT 1 @@ -1125,6 +1125,7 @@ router.get('/dispensaries', async (req: PublicApiRequest, res: Response) => { SELECT d.id, d.name, + d.slug, d.address1, d.address2, d.city, @@ -1179,6 +1180,7 @@ router.get('/dispensaries', async (req: PublicApiRequest, res: Response) => { const transformedDispensaries = dispensaries.map((d) => ({ id: d.id, name: d.name, + slug: d.slug || null, address1: d.address1, address2: d.address2, city: d.city, @@ -1876,7 +1878,7 @@ router.get('/stats', async (req: PublicApiRequest, res: Response) => { SELECT (SELECT COUNT(*) FROM store_products) as product_count, (SELECT COUNT(DISTINCT brand_name_raw) FROM store_products WHERE brand_name_raw IS NOT NULL) as brand_count, - (SELECT COUNT(*) FROM dispensaries WHERE crawl_enabled = true AND product_count > 0) as dispensary_count + (SELECT COUNT(DISTINCT dispensary_id) FROM store_products) as dispensary_count `); const s = stats[0] || {}; @@ -1996,4 +1998,235 @@ router.get('/menu', async (req: PublicApiRequest, res: Response) => { } }); +// ============================================================ +// VISITOR TRACKING & GEOLOCATION +// ============================================================ + +import crypto from 'crypto'; +import { GeoLocation, lookupIP } from '../services/ip2location'; + +/** + * Get location from IP using local IP2Location database + */ +function getLocationFromIP(ip: string): GeoLocation | null { + return lookupIP(ip); +} + +/** + * Hash IP for privacy (we don't store raw IPs) + */ +function hashIP(ip: string): string { + return crypto.createHash('sha256').update(ip).digest('hex').substring(0, 16); +} + +/** + * POST /api/v1/visitor/track + * Track visitor location for analytics + * + * Body: + * - domain: string (required) - 'findagram.co', 'findadispo.com', etc. + * - page_path: string (optional) - current page path + * - session_id: string (optional) - client-generated session ID + * - referrer: string (optional) - document.referrer + * + * Returns: + * - location: { city, state, lat, lng } for client use + */ +router.post('/visitor/track', async (req: Request, res: Response) => { + try { + const { domain, page_path, session_id, referrer } = req.body; + + if (!domain) { + return res.status(400).json({ error: 'domain is required' }); + } + + // Get client IP + const clientIp = (req.headers['x-forwarded-for'] as string)?.split(',')[0].trim() || + req.headers['x-real-ip'] as string || + req.ip || + req.socket.remoteAddress || + ''; + + // Get location from IP (local database lookup) + const location = getLocationFromIP(clientIp); + + // Store visit (with hashed IP for privacy) + await pool.query(` + INSERT INTO visitor_locations ( + ip_hash, city, state, state_code, country, country_code, + latitude, longitude, domain, page_path, referrer, user_agent, session_id + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) + `, [ + hashIP(clientIp), + location?.city || null, + location?.state || null, + location?.stateCode || null, + location?.country || null, + location?.countryCode || null, + location?.lat || null, + location?.lng || null, + domain, + page_path || null, + referrer || null, + req.headers['user-agent'] || null, + session_id || null + ]); + + // Return location to client (for nearby dispensary feature) + res.json({ + success: true, + location: location ? { + city: location.city, + state: location.state, + stateCode: location.stateCode, + lat: location.lat, + lng: location.lng + } : null + }); + } catch (error: any) { + console.error('Visitor tracking error:', error); + // Don't fail the request - tracking is non-critical + res.json({ + success: false, + location: null + }); + } +}); + +/** + * GET /api/v1/visitor/location + * Get visitor location without tracking (just IP lookup) + */ +router.get('/visitor/location', (req: Request, res: Response) => { + try { + const clientIp = (req.headers['x-forwarded-for'] as string)?.split(',')[0].trim() || + req.headers['x-real-ip'] as string || + req.ip || + req.socket.remoteAddress || + ''; + + const location = getLocationFromIP(clientIp); + + res.json({ + success: true, + location: location ? { + city: location.city, + state: location.state, + stateCode: location.stateCode, + lat: location.lat, + lng: location.lng + } : null + }); + } catch (error: any) { + console.error('Location lookup error:', error); + res.json({ + success: false, + location: null + }); + } +}); + +/** + * GET /api/v1/analytics/visitors + * Get visitor analytics (admin only - requires auth) + * + * Query params: + * - domain: filter by domain + * - days: number of days to look back (default: 30) + * - limit: max results (default: 50) + */ +router.get('/analytics/visitors', async (req: PublicApiRequest, res: Response) => { + try { + const scope = req.scope; + + // Only allow internal keys + if (!scope || scope.type !== 'internal') { + return res.status(403).json({ error: 'Access denied - internal key required' }); + } + + const { domain, days = '30', limit = '50' } = req.query; + const daysNum = Math.min(parseInt(days as string, 10) || 30, 90); + const limitNum = Math.min(parseInt(limit as string, 10) || 50, 200); + + let whereClause = 'WHERE created_at > NOW() - $1::interval'; + const params: any[] = [`${daysNum} days`]; + let paramIndex = 2; + + if (domain) { + whereClause += ` AND domain = $${paramIndex}`; + params.push(domain); + paramIndex++; + } + + // Get top locations + const { rows: topLocations } = await pool.query(` + SELECT + city, + state, + state_code, + country_code, + COUNT(*) as visit_count, + COUNT(DISTINCT session_id) as unique_sessions, + MAX(created_at) as last_visit + FROM visitor_locations + ${whereClause} + GROUP BY city, state, state_code, country_code + ORDER BY visit_count DESC + LIMIT $${paramIndex} + `, [...params, limitNum]); + + // Get daily totals + const { rows: dailyStats } = await pool.query(` + SELECT + DATE(created_at) as date, + COUNT(*) as visits, + COUNT(DISTINCT session_id) as unique_sessions + FROM visitor_locations + ${whereClause} + GROUP BY DATE(created_at) + ORDER BY date DESC + LIMIT 30 + `, params); + + // Get totals + const { rows: totals } = await pool.query(` + SELECT + COUNT(*) as total_visits, + COUNT(DISTINCT session_id) as total_sessions, + COUNT(DISTINCT city || state_code) as unique_locations + FROM visitor_locations + ${whereClause} + `, params); + + res.json({ + success: true, + period: { + days: daysNum, + domain: domain || 'all' + }, + totals: totals[0], + top_locations: topLocations.map(l => ({ + city: l.city, + state: l.state, + state_code: l.state_code, + country_code: l.country_code, + visits: parseInt(l.visit_count, 10), + unique_sessions: parseInt(l.unique_sessions, 10), + last_visit: l.last_visit + })), + daily_stats: dailyStats.map(d => ({ + date: d.date, + visits: parseInt(d.visits, 10), + unique_sessions: parseInt(d.unique_sessions, 10) + })) + }); + } catch (error: any) { + console.error('Visitor analytics error:', error); + res.status(500).json({ + error: 'Failed to fetch visitor analytics', + message: error.message + }); + } +}); + export default router; diff --git a/backend/src/routes/tasks.ts b/backend/src/routes/tasks.ts index 259f4096..6c68e3cc 100644 --- a/backend/src/routes/tasks.ts +++ b/backend/src/routes/tasks.ts @@ -444,7 +444,7 @@ router.post('/migration/cancel-pending-crawl-jobs', async (_req: Request, res: R /** * POST /api/tasks/migration/create-resync-tasks - * Create product_resync tasks for all crawl-enabled dispensaries + * Create product_refresh tasks for all crawl-enabled dispensaries */ router.post('/migration/create-resync-tasks', async (req: Request, res: Response) => { try { @@ -474,7 +474,7 @@ router.post('/migration/create-resync-tasks', async (req: Request, res: Response const hasActive = await taskService.hasActiveTask(disp.id); if (!hasActive) { await taskService.createTask({ - role: 'product_resync', + role: 'product_refresh', dispensary_id: disp.id, platform: 'dutchie', priority, diff --git a/backend/src/routes/worker-registry.ts b/backend/src/routes/worker-registry.ts new file mode 100644 index 00000000..edb79d1d --- /dev/null +++ b/backend/src/routes/worker-registry.ts @@ -0,0 +1,652 @@ +/** + * Worker Registry API Routes + * + * Dynamic worker management - workers register on startup, get assigned names, + * and report heartbeats. Everything is API-driven, no hardcoding. + * + * Endpoints: + * POST /api/worker-registry/register - Worker reports for duty + * POST /api/worker-registry/heartbeat - Worker heartbeat + * POST /api/worker-registry/deregister - Worker signing off + * GET /api/worker-registry/workers - List all workers (for dashboard) + * GET /api/worker-registry/workers/:id - Get specific worker + * POST /api/worker-registry/cleanup - Mark stale workers offline + * + * GET /api/worker-registry/names - List all names in pool + * POST /api/worker-registry/names - Add names to pool + * DELETE /api/worker-registry/names/:name - Remove name from pool + * + * GET /api/worker-registry/roles - List available task roles + * POST /api/worker-registry/roles - Add a new role (future) + */ + +import { Router, Request, Response } from 'express'; +import { pool } from '../db/pool'; +import os from 'os'; + +const router = Router(); + +// ============================================================ +// WORKER REGISTRATION +// ============================================================ + +/** + * POST /api/worker-registry/register + * Worker reports for duty - gets assigned a friendly name + * + * Body: + * - role: string (optional) - task role, or null for role-agnostic workers + * - worker_id: string (optional) - custom ID, auto-generated if not provided + * - pod_name: string (optional) - k8s pod name + * - hostname: string (optional) - machine hostname + * - metadata: object (optional) - additional worker info + * + * Returns: + * - worker_id: assigned worker ID + * - friendly_name: assigned name from pool + * - role: confirmed role (or null if agnostic) + * - message: welcome message + */ +router.post('/register', async (req: Request, res: Response) => { + try { + const { + role = null, // Role is now optional - null means agnostic + worker_id, + pod_name, + hostname, + ip_address, + metadata = {} + } = req.body; + + // Generate worker_id if not provided + const finalWorkerId = worker_id || `worker-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`; + const finalHostname = hostname || os.hostname(); + const clientIp = ip_address || req.ip || req.socket.remoteAddress; + + // Check if worker already registered + const existing = await pool.query( + 'SELECT id, friendly_name, status FROM worker_registry WHERE worker_id = $1', + [finalWorkerId] + ); + + if (existing.rows.length > 0) { + // Re-activate existing worker + const { rows } = await pool.query(` + UPDATE worker_registry + SET status = 'active', + role = $1, + pod_name = $2, + hostname = $3, + ip_address = $4, + last_heartbeat_at = NOW(), + started_at = NOW(), + metadata = $5, + updated_at = NOW() + WHERE worker_id = $6 + RETURNING id, worker_id, friendly_name, role + `, [role, pod_name, finalHostname, clientIp, metadata, finalWorkerId]); + + const worker = rows[0]; + const roleMsg = role ? `for ${role}` : 'as role-agnostic'; + console.log(`[WorkerRegistry] Worker "${worker.friendly_name}" (${finalWorkerId}) re-registered ${roleMsg}`); + + return res.json({ + success: true, + worker_id: worker.worker_id, + friendly_name: worker.friendly_name, + role: worker.role, + message: role + ? `Welcome back, ${worker.friendly_name}! You are assigned to ${role}.` + : `Welcome back, ${worker.friendly_name}! You are ready to take any task.` + }); + } + + // Assign a friendly name + const nameResult = await pool.query('SELECT assign_worker_name($1) as name', [finalWorkerId]); + const friendlyName = nameResult.rows[0].name; + + // Register the worker + const { rows } = await pool.query(` + INSERT INTO worker_registry ( + worker_id, friendly_name, role, pod_name, hostname, ip_address, status, metadata + ) VALUES ($1, $2, $3, $4, $5, $6, 'active', $7) + RETURNING id, worker_id, friendly_name, role + `, [finalWorkerId, friendlyName, role, pod_name, finalHostname, clientIp, metadata]); + + const worker = rows[0]; + const roleMsg = role ? `for ${role}` : 'as role-agnostic'; + console.log(`[WorkerRegistry] New worker "${friendlyName}" (${finalWorkerId}) reporting for duty ${roleMsg}`); + + res.json({ + success: true, + worker_id: worker.worker_id, + friendly_name: worker.friendly_name, + role: worker.role, + message: role + ? `Hello ${friendlyName}! You are now registered for ${role}. Ready for work!` + : `Hello ${friendlyName}! You are ready to take any task from the pool.` + }); + } catch (error: any) { + console.error('[WorkerRegistry] Registration error:', error); + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * POST /api/worker-registry/heartbeat + * Worker sends heartbeat to stay alive + * + * Body: + * - worker_id: string (required) + * - current_task_id: number (optional) - task currently being processed + * - status: string (optional) - 'active', 'idle' + */ +router.post('/heartbeat', async (req: Request, res: Response) => { + try { + const { worker_id, current_task_id, status = 'active', resources } = req.body; + + if (!worker_id) { + return res.status(400).json({ success: false, error: 'worker_id is required' }); + } + + // Store resources in metadata jsonb column + const { rows } = await pool.query(` + UPDATE worker_registry + SET last_heartbeat_at = NOW(), + current_task_id = $1, + status = $2, + metadata = COALESCE(metadata, '{}'::jsonb) || COALESCE($4::jsonb, '{}'::jsonb), + updated_at = NOW() + WHERE worker_id = $3 + RETURNING id, friendly_name, status + `, [current_task_id || null, status, worker_id, resources ? JSON.stringify(resources) : null]); + + if (rows.length === 0) { + return res.status(404).json({ success: false, error: 'Worker not found - please register first' }); + } + + res.json({ + success: true, + worker: rows[0] + }); + } catch (error: any) { + console.error('[WorkerRegistry] Heartbeat error:', error); + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * POST /api/worker-registry/task-completed + * Worker reports task completion + * + * Body: + * - worker_id: string (required) + * - success: boolean (required) + */ +router.post('/task-completed', async (req: Request, res: Response) => { + try { + const { worker_id, success } = req.body; + + if (!worker_id) { + return res.status(400).json({ success: false, error: 'worker_id is required' }); + } + + const incrementField = success ? 'tasks_completed' : 'tasks_failed'; + + const { rows } = await pool.query(` + UPDATE worker_registry + SET ${incrementField} = ${incrementField} + 1, + last_task_at = NOW(), + current_task_id = NULL, + status = 'idle', + updated_at = NOW() + WHERE worker_id = $1 + RETURNING id, friendly_name, tasks_completed, tasks_failed + `, [worker_id]); + + if (rows.length === 0) { + return res.status(404).json({ success: false, error: 'Worker not found' }); + } + + res.json({ success: true, worker: rows[0] }); + } catch (error: any) { + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * POST /api/worker-registry/deregister + * Worker signing off (graceful shutdown) + * + * Body: + * - worker_id: string (required) + */ +router.post('/deregister', async (req: Request, res: Response) => { + try { + const { worker_id } = req.body; + + if (!worker_id) { + return res.status(400).json({ success: false, error: 'worker_id is required' }); + } + + // Release the name back to the pool + await pool.query('SELECT release_worker_name($1)', [worker_id]); + + // Mark as terminated + const { rows } = await pool.query(` + UPDATE worker_registry + SET status = 'terminated', + current_task_id = NULL, + updated_at = NOW() + WHERE worker_id = $1 + RETURNING id, friendly_name + `, [worker_id]); + + if (rows.length === 0) { + return res.status(404).json({ success: false, error: 'Worker not found' }); + } + + console.log(`[WorkerRegistry] Worker "${rows[0].friendly_name}" (${worker_id}) signed off`); + + res.json({ + success: true, + message: `Goodbye ${rows[0].friendly_name}! Thanks for your work.` + }); + } catch (error: any) { + console.error('[WorkerRegistry] Deregister error:', error); + res.status(500).json({ success: false, error: error.message }); + } +}); + +// ============================================================ +// WORKER LISTING (for Dashboard) +// ============================================================ + +/** + * GET /api/worker-registry/workers + * List all workers (for dashboard) + * + * Query params: + * - status: filter by status (active, idle, offline, all) + * - role: filter by role + * - include_terminated: include terminated workers (default: false) + */ +router.get('/workers', async (req: Request, res: Response) => { + try { + const { status, role, include_terminated = 'false' } = req.query; + + let whereClause = include_terminated === 'true' ? 'WHERE 1=1' : "WHERE status != 'terminated'"; + const params: any[] = []; + let paramIndex = 1; + + if (status && status !== 'all') { + whereClause += ` AND status = $${paramIndex}`; + params.push(status); + paramIndex++; + } + + if (role) { + whereClause += ` AND role = $${paramIndex}`; + params.push(role); + paramIndex++; + } + + const { rows } = await pool.query(` + SELECT + id, + worker_id, + friendly_name, + role, + pod_name, + hostname, + ip_address, + status, + started_at, + last_heartbeat_at, + last_task_at, + tasks_completed, + tasks_failed, + current_task_id, + metadata, + EXTRACT(EPOCH FROM (NOW() - last_heartbeat_at)) as seconds_since_heartbeat, + CASE + WHEN status = 'offline' OR status = 'terminated' THEN status + WHEN last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale' + WHEN current_task_id IS NOT NULL THEN 'busy' + ELSE 'ready' + END as health_status, + created_at + FROM worker_registry + ${whereClause} + ORDER BY + CASE status + WHEN 'active' THEN 1 + WHEN 'idle' THEN 2 + WHEN 'offline' THEN 3 + ELSE 4 + END, + last_heartbeat_at DESC + `, params); + + // Get summary counts + const { rows: summary } = await pool.query(` + SELECT + COUNT(*) FILTER (WHERE status = 'active') as active_count, + COUNT(*) FILTER (WHERE status = 'idle') as idle_count, + COUNT(*) FILTER (WHERE status = 'offline') as offline_count, + COUNT(*) FILTER (WHERE status != 'terminated') as total_count, + COUNT(DISTINCT role) FILTER (WHERE status IN ('active', 'idle')) as active_roles + FROM worker_registry + `); + + res.json({ + success: true, + workers: rows, + summary: summary[0] + }); + } catch (error: any) { + console.error('[WorkerRegistry] List workers error:', error); + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * GET /api/worker-registry/workers/:workerId + * Get specific worker details + */ +router.get('/workers/:workerId', async (req: Request, res: Response) => { + try { + const { workerId } = req.params; + + const { rows } = await pool.query(` + SELECT * FROM worker_registry WHERE worker_id = $1 + `, [workerId]); + + if (rows.length === 0) { + return res.status(404).json({ success: false, error: 'Worker not found' }); + } + + res.json({ success: true, worker: rows[0] }); + } catch (error: any) { + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * DELETE /api/worker-registry/workers/:workerId + * Remove a worker (admin action) + */ +router.delete('/workers/:workerId', async (req: Request, res: Response) => { + try { + const { workerId } = req.params; + + // Release name + await pool.query('SELECT release_worker_name($1)', [workerId]); + + // Delete worker + const { rows } = await pool.query(` + DELETE FROM worker_registry WHERE worker_id = $1 RETURNING friendly_name + `, [workerId]); + + if (rows.length === 0) { + return res.status(404).json({ success: false, error: 'Worker not found' }); + } + + res.json({ success: true, message: `Worker ${rows[0].friendly_name} removed` }); + } catch (error: any) { + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * POST /api/worker-registry/cleanup + * Mark stale workers as offline + * + * Body: + * - stale_threshold_minutes: number (default: 5) + */ +router.post('/cleanup', async (req: Request, res: Response) => { + try { + const { stale_threshold_minutes = 5 } = req.body; + + const { rows } = await pool.query( + 'SELECT mark_stale_workers($1) as count', + [stale_threshold_minutes] + ); + + res.json({ + success: true, + stale_workers_marked: rows[0].count, + message: `Marked ${rows[0].count} stale workers as offline` + }); + } catch (error: any) { + res.status(500).json({ success: false, error: error.message }); + } +}); + +// ============================================================ +// NAME POOL MANAGEMENT +// ============================================================ + +/** + * GET /api/worker-registry/names + * List all names in the pool + */ +router.get('/names', async (_req: Request, res: Response) => { + try { + const { rows } = await pool.query(` + SELECT + id, + name, + in_use, + assigned_to, + assigned_at + FROM worker_name_pool + ORDER BY in_use DESC, name ASC + `); + + const { rows: summary } = await pool.query(` + SELECT + COUNT(*) as total, + COUNT(*) FILTER (WHERE in_use = true) as in_use, + COUNT(*) FILTER (WHERE in_use = false) as available + FROM worker_name_pool + `); + + res.json({ + success: true, + names: rows, + summary: summary[0] + }); + } catch (error: any) { + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * POST /api/worker-registry/names + * Add names to the pool + * + * Body: + * - names: string[] (required) - array of names to add + */ +router.post('/names', async (req: Request, res: Response) => { + try { + const { names } = req.body; + + if (!names || !Array.isArray(names) || names.length === 0) { + return res.status(400).json({ success: false, error: 'names array is required' }); + } + + const values = names.map(n => `('${n.replace(/'/g, "''")}')`).join(', '); + + const { rowCount } = await pool.query(` + INSERT INTO worker_name_pool (name) + VALUES ${values} + ON CONFLICT (name) DO NOTHING + `); + + res.json({ + success: true, + added: rowCount, + message: `Added ${rowCount} new names to the pool` + }); + } catch (error: any) { + res.status(500).json({ success: false, error: error.message }); + } +}); + +/** + * DELETE /api/worker-registry/names/:name + * Remove a name from the pool (only if not in use) + */ +router.delete('/names/:name', async (req: Request, res: Response) => { + try { + const { name } = req.params; + + const { rows } = await pool.query(` + DELETE FROM worker_name_pool + WHERE name = $1 AND in_use = false + RETURNING name + `, [name]); + + if (rows.length === 0) { + return res.status(400).json({ + success: false, + error: 'Name not found or currently in use' + }); + } + + res.json({ success: true, message: `Name "${name}" removed from pool` }); + } catch (error: any) { + res.status(500).json({ success: false, error: error.message }); + } +}); + +// ============================================================ +// ROLE MANAGEMENT +// ============================================================ + +/** + * GET /api/worker-registry/roles + * List available task roles + */ +router.get('/roles', async (_req: Request, res: Response) => { + // These are the roles the task handlers support + const roles = [ + { + id: 'product_refresh', + name: 'Product Refresh', + description: 'Re-crawl dispensary products for price/stock changes', + handler: 'handleProductRefresh' + }, + { + id: 'product_discovery', + name: 'Product Discovery', + description: 'Initial product discovery for new dispensaries', + handler: 'handleProductDiscovery' + }, + { + id: 'store_discovery', + name: 'Store Discovery', + description: 'Discover new dispensary locations', + handler: 'handleStoreDiscovery' + }, + { + id: 'entry_point_discovery', + name: 'Entry Point Discovery', + description: 'Resolve platform IDs from menu URLs', + handler: 'handleEntryPointDiscovery' + }, + { + id: 'analytics_refresh', + name: 'Analytics Refresh', + description: 'Refresh materialized views and analytics', + handler: 'handleAnalyticsRefresh' + } + ]; + + // Get active worker counts per role + try { + const { rows } = await pool.query(` + SELECT role, COUNT(*) as worker_count + FROM worker_registry + WHERE status IN ('active', 'idle') + GROUP BY role + `); + + const countMap = new Map(rows.map(r => [r.role, parseInt(r.worker_count)])); + + const rolesWithCounts = roles.map(r => ({ + ...r, + active_workers: countMap.get(r.id) || 0 + })); + + res.json({ success: true, roles: rolesWithCounts }); + } catch { + // If table doesn't exist yet, just return roles without counts + res.json({ success: true, roles: roles.map(r => ({ ...r, active_workers: 0 })) }); + } +}); + +/** + * GET /api/worker-registry/capacity + * Get capacity planning info + */ +router.get('/capacity', async (_req: Request, res: Response) => { + try { + // Get worker counts by role + const { rows: workerCounts } = await pool.query(` + SELECT role, COUNT(*) as count + FROM worker_registry + WHERE status IN ('active', 'idle') + GROUP BY role + `); + + // Get pending task counts by role (if worker_tasks exists) + let taskCounts: any[] = []; + try { + const result = await pool.query(` + SELECT role, COUNT(*) as pending_count + FROM worker_tasks + WHERE status = 'pending' + GROUP BY role + `); + taskCounts = result.rows; + } catch { + // worker_tasks might not exist yet + } + + // Get crawl-enabled store count + const storeCountResult = await pool.query(` + SELECT COUNT(*) as count + FROM dispensaries + WHERE crawl_enabled = true AND platform_dispensary_id IS NOT NULL + `); + const totalStores = parseInt(storeCountResult.rows[0].count); + + const workerMap = new Map(workerCounts.map(r => [r.role, parseInt(r.count)])); + const taskMap = new Map(taskCounts.map(r => [r.role, parseInt(r.pending_count)])); + + const roles = ['product_refresh', 'product_discovery', 'store_discovery', 'entry_point_discovery', 'analytics_refresh']; + + const capacity = roles.map(role => ({ + role, + active_workers: workerMap.get(role) || 0, + pending_tasks: taskMap.get(role) || 0, + // Rough estimate: 20 seconds per task, 4-hour cycle + tasks_per_worker_per_cycle: 720, + workers_needed_for_all_stores: Math.ceil(totalStores / 720) + })); + + res.json({ + success: true, + total_stores: totalStores, + capacity + }); + } catch (error: any) { + res.status(500).json({ success: false, error: error.message }); + } +}); + +export default router; diff --git a/backend/src/services/crawl-rotator.ts b/backend/src/services/crawl-rotator.ts index 09e1a5b4..4b9708cf 100644 --- a/backend/src/services/crawl-rotator.ts +++ b/backend/src/services/crawl-rotator.ts @@ -109,14 +109,14 @@ export class ProxyRotator { username, password, protocol, - is_active as "isActive", - last_used_at as "lastUsedAt", + active as "isActive", + last_tested_at as "lastUsedAt", failure_count as "failureCount", - success_count as "successCount", - avg_response_time_ms as "avgResponseTimeMs" + 0 as "successCount", + response_time_ms as "avgResponseTimeMs" FROM proxies - WHERE is_active = true - ORDER BY failure_count ASC, last_used_at ASC NULLS FIRST + WHERE active = true + ORDER BY failure_count ASC, last_tested_at ASC NULLS FIRST `); this.proxies = result.rows; @@ -192,11 +192,11 @@ export class ProxyRotator { UPDATE proxies SET failure_count = failure_count + 1, - last_failure_at = NOW(), - last_error = $2, - is_active = CASE WHEN failure_count >= 4 THEN false ELSE is_active END + updated_at = NOW(), + test_result = $2, + active = CASE WHEN failure_count >= 4 THEN false ELSE active END WHERE id = $1 - `, [proxyId, error || null]); + `, [proxyId, error || 'failed']); } catch (err) { console.error(`[ProxyRotator] Failed to update proxy ${proxyId}:`, err); } @@ -226,12 +226,13 @@ export class ProxyRotator { await this.pool.query(` UPDATE proxies SET - success_count = success_count + 1, - last_used_at = NOW(), - avg_response_time_ms = CASE - WHEN avg_response_time_ms IS NULL THEN $2 - ELSE (avg_response_time_ms * 0.8) + ($2 * 0.2) - END + last_tested_at = NOW(), + test_result = 'success', + response_time_ms = CASE + WHEN response_time_ms IS NULL THEN $2 + ELSE (response_time_ms * 0.8 + $2 * 0.2)::integer + END, + updated_at = NOW() WHERE id = $1 `, [proxyId, responseTimeMs || null]); } catch (err) { diff --git a/backend/src/services/ip2location.ts b/backend/src/services/ip2location.ts new file mode 100644 index 00000000..25fc296e --- /dev/null +++ b/backend/src/services/ip2location.ts @@ -0,0 +1,134 @@ +/** + * IP2Location Service + * + * Uses local IP2Location LITE DB3 database for IP geolocation. + * No external API calls, no rate limits. + * + * Database: IP2Location LITE DB3 (free, monthly updates) + * Fields: country, region, city, latitude, longitude + */ + +import path from 'path'; +import fs from 'fs'; + +// @ts-ignore - no types for ip2location-nodejs +const { IP2Location } = require('ip2location-nodejs'); + +const DB_PATH = process.env.IP2LOCATION_DB_PATH || + path.join(__dirname, '../../data/ip2location/IP2LOCATION-LITE-DB5.BIN'); + +let ip2location: any = null; +let dbLoaded = false; + +/** + * Initialize IP2Location database + */ +export function initIP2Location(): boolean { + if (dbLoaded) return true; + + try { + if (!fs.existsSync(DB_PATH)) { + console.warn(`IP2Location database not found at: ${DB_PATH}`); + console.warn('Run: ./scripts/download-ip2location.sh to download'); + return false; + } + + ip2location = new IP2Location(); + ip2location.open(DB_PATH); + dbLoaded = true; + console.log('IP2Location database loaded successfully'); + return true; + } catch (err) { + console.error('Failed to load IP2Location database:', err); + return false; + } +} + +/** + * Close IP2Location database + */ +export function closeIP2Location(): void { + if (ip2location) { + ip2location.close(); + ip2location = null; + dbLoaded = false; + } +} + +export interface GeoLocation { + city: string | null; + state: string | null; + stateCode: string | null; + country: string | null; + countryCode: string | null; + lat: number | null; + lng: number | null; +} + +/** + * Lookup IP address location + * + * @param ip - IPv4 or IPv6 address + * @returns Location data or null if not found + */ +export function lookupIP(ip: string): GeoLocation | null { + // Skip private/localhost IPs + if (!ip || ip === '127.0.0.1' || ip === '::1' || + ip.startsWith('192.168.') || ip.startsWith('10.') || + ip.startsWith('172.16.') || ip.startsWith('172.17.') || + ip.startsWith('::ffff:127.') || ip.startsWith('::ffff:192.168.') || + ip.startsWith('::ffff:10.')) { + return null; + } + + // Strip IPv6 prefix if present + const cleanIP = ip.replace(/^::ffff:/, ''); + + // Initialize on first use if not already loaded + if (!dbLoaded) { + if (!initIP2Location()) { + return null; + } + } + + try { + const result = ip2location.getAll(cleanIP); + + if (!result || result.ip === '?' || result.countryShort === '-') { + return null; + } + + // DB3 LITE doesn't include lat/lng - would need DB5+ for that + const lat = typeof result.latitude === 'number' && result.latitude !== 0 ? result.latitude : null; + const lng = typeof result.longitude === 'number' && result.longitude !== 0 ? result.longitude : null; + + return { + city: result.city !== '-' ? result.city : null, + state: result.region !== '-' ? result.region : null, + stateCode: null, // DB3 doesn't include state codes + country: result.countryLong !== '-' ? result.countryLong : null, + countryCode: result.countryShort !== '-' ? result.countryShort : null, + lat, + lng, + }; + } catch (err) { + console.error('IP2Location lookup error:', err); + return null; + } +} + +/** + * Check if IP2Location database is available + */ +export function isIP2LocationAvailable(): boolean { + if (dbLoaded) return true; + return fs.existsSync(DB_PATH); +} + +// Export singleton-style interface +export default { + init: initIP2Location, + close: closeIP2Location, + lookup: lookupIP, + isAvailable: isIP2LocationAvailable, +}; diff --git a/backend/src/services/scraper.ts b/backend/src/services/scraper.ts index b7f93327..3a5504d9 100755 --- a/backend/src/services/scraper.ts +++ b/backend/src/services/scraper.ts @@ -3,7 +3,7 @@ import StealthPlugin from 'puppeteer-extra-plugin-stealth'; import { Browser, Page } from 'puppeteer'; import { SocksProxyAgent } from 'socks-proxy-agent'; import { pool } from '../db/pool'; -import { uploadImageFromUrl, getImageUrl } from '../utils/minio'; +import { downloadProductImageLegacy } from '../utils/image-storage'; import { logger } from './logger'; import { registerScraper, updateScraperStats, completeScraper } from '../routes/scraper-monitor'; import { incrementProxyFailure, getActiveProxy, isBotDetectionError, putProxyInTimeout } from './proxy'; @@ -767,7 +767,8 @@ export async function saveProducts(storeId: number, categoryId: number, products if (product.imageUrl && !localImagePath) { try { - localImagePath = await uploadImageFromUrl(product.imageUrl, productId); + const result = await downloadProductImageLegacy(product.imageUrl, 0, productId); + localImagePath = result.urls?.original || null; await client.query(` UPDATE products SET local_image_path = $1 diff --git a/backend/src/tasks/handlers/entry-point-discovery.ts b/backend/src/tasks/handlers/entry-point-discovery.ts index ff3202b0..82a7f79d 100644 --- a/backend/src/tasks/handlers/entry-point-discovery.ts +++ b/backend/src/tasks/handlers/entry-point-discovery.ts @@ -1,13 +1,21 @@ /** * Entry Point Discovery Handler * - * Detects menu type and resolves platform IDs for a discovered store. + * Resolves platform IDs for a discovered store using Dutchie GraphQL. * This is the step between store_discovery and product_discovery. * - * TODO: Integrate with platform ID resolution when available + * Flow: + * 1. Load dispensary info from database + * 2. Extract slug from menu_url + * 3. Start stealth session (fingerprint + optional proxy) + * 4. Query Dutchie GraphQL to resolve slug → platform_dispensary_id + * 5. Update dispensary record with resolved ID + * 6. Queue product_discovery task if successful */ import { TaskContext, TaskResult } from '../task-worker'; +import { startSession, endSession } from '../../platforms/dutchie'; +import { resolveDispensaryIdWithDetails } from '../../platforms/dutchie/queries'; export async function handleEntryPointDiscovery(ctx: TaskContext): Promise { const { pool, task } = ctx; @@ -18,9 +26,11 @@ export async function handleEntryPointDiscovery(ctx: TaskContext): Promise ${platformId}`); + + await ctx.heartbeat(); + + // ============================================================ + // STEP 5: Update dispensary with resolved ID + // ============================================================ + await pool.query(` + UPDATE dispensaries + SET + platform_dispensary_id = $2, + menu_type = 'dutchie', + crawl_enabled = true, + updated_at = NOW() + WHERE id = $1 + `, [dispensaryId, platformId]); + + console.log(`[EntryPointDiscovery] Updated dispensary ${dispensaryId} with platform ID`); + + // ============================================================ + // STEP 6: Queue product_discovery task + // ============================================================ + await pool.query(` + INSERT INTO worker_tasks (role, dispensary_id, priority, scheduled_for) + VALUES ('product_discovery', $1, 5, NOW()) + ON CONFLICT DO NOTHING + `, [dispensaryId]); + + console.log(`[EntryPointDiscovery] Queued product_discovery task for dispensary ${dispensaryId}`); + + return { + success: true, + platformId, + slug, + queuedProductDiscovery: true, + }; + + } finally { + // Always end session + endSession(); + } - return { - success: true, - message: 'Slug extracted, awaiting platform ID resolution', - slug, - }; } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : 'Unknown error'; console.error(`[EntryPointDiscovery] Error for dispensary ${dispensaryId}:`, errorMessage); diff --git a/backend/src/tasks/handlers/index.ts b/backend/src/tasks/handlers/index.ts index 41a2357e..c5763d8e 100644 --- a/backend/src/tasks/handlers/index.ts +++ b/backend/src/tasks/handlers/index.ts @@ -4,7 +4,7 @@ * Exports all task handlers for the task worker. */ -export { handleProductResync } from './product-resync'; +export { handleProductRefresh } from './product-refresh'; export { handleProductDiscovery } from './product-discovery'; export { handleStoreDiscovery } from './store-discovery'; export { handleEntryPointDiscovery } from './entry-point-discovery'; diff --git a/backend/src/tasks/handlers/product-discovery.ts b/backend/src/tasks/handlers/product-discovery.ts index d90e6724..3e4b647e 100644 --- a/backend/src/tasks/handlers/product-discovery.ts +++ b/backend/src/tasks/handlers/product-discovery.ts @@ -6,11 +6,11 @@ */ import { TaskContext, TaskResult } from '../task-worker'; -import { handleProductResync } from './product-resync'; +import { handleProductRefresh } from './product-refresh'; export async function handleProductDiscovery(ctx: TaskContext): Promise { - // Product discovery is essentially the same as resync for the first time + // Product discovery is essentially the same as refresh for the first time // The main difference is in when this task is triggered (new store vs scheduled) console.log(`[ProductDiscovery] Starting initial product fetch for dispensary ${ctx.task.dispensary_id}`); - return handleProductResync(ctx); + return handleProductRefresh(ctx); } diff --git a/backend/src/tasks/handlers/product-resync.ts b/backend/src/tasks/handlers/product-refresh.ts similarity index 98% rename from backend/src/tasks/handlers/product-resync.ts rename to backend/src/tasks/handlers/product-refresh.ts index eef6517b..8a36568a 100644 --- a/backend/src/tasks/handlers/product-resync.ts +++ b/backend/src/tasks/handlers/product-refresh.ts @@ -1,5 +1,5 @@ /** - * Product Resync Handler + * Product Refresh Handler * * Re-crawls a store to capture price/stock changes using the GraphQL pipeline. * @@ -31,12 +31,12 @@ import { const normalizer = new DutchieNormalizer(); -export async function handleProductResync(ctx: TaskContext): Promise { +export async function handleProductRefresh(ctx: TaskContext): Promise { const { pool, task } = ctx; const dispensaryId = task.dispensary_id; if (!dispensaryId) { - return { success: false, error: 'No dispensary_id specified for product_resync task' }; + return { success: false, error: 'No dispensary_id specified for product_refresh task' }; } try { diff --git a/backend/src/tasks/index.ts b/backend/src/tasks/index.ts index fa110490..894123ff 100644 --- a/backend/src/tasks/index.ts +++ b/backend/src/tasks/index.ts @@ -17,7 +17,7 @@ export { export { TaskWorker, TaskContext, TaskResult } from './task-worker'; export { - handleProductResync, + handleProductRefresh, handleProductDiscovery, handleStoreDiscovery, handleEntryPointDiscovery, diff --git a/backend/src/tasks/start-pod.ts b/backend/src/tasks/start-pod.ts new file mode 100644 index 00000000..8276bf1a --- /dev/null +++ b/backend/src/tasks/start-pod.ts @@ -0,0 +1,93 @@ +#!/usr/bin/env npx tsx +/** + * Start Pod - Simulates a Kubernetes pod locally + * + * Starts 5 workers with a pod name from the predefined list. + * + * Usage: + * npx tsx src/tasks/start-pod.ts + * npx tsx src/tasks/start-pod.ts 0 # Starts pod "Aethelgard" with 5 workers + * npx tsx src/tasks/start-pod.ts 1 # Starts pod "Xylos" with 5 workers + */ + +import { spawn } from 'child_process'; +import path from 'path'; + +const POD_NAMES = [ + 'Aethelgard', + 'Xylos', + 'Kryll', + 'Coriolis', + 'Dimidium', + 'Veridia', + 'Zetani', + 'Talos IV', + 'Onyx', + 'Celestia', + 'Gormand', + 'Betha', + 'Ragnar', + 'Syphon', + 'Axiom', + 'Nadir', + 'Terra Nova', + 'Acheron', + 'Nexus', + 'Vespera', + 'Helios Prime', + 'Oasis', + 'Mordina', + 'Cygnus', + 'Umbra', +]; + +const WORKERS_PER_POD = 5; + +async function main() { + const podIndex = parseInt(process.argv[2] ?? '0', 10); + + if (podIndex < 0 || podIndex >= POD_NAMES.length) { + console.error(`Invalid pod index: ${podIndex}. Must be 0-${POD_NAMES.length - 1}`); + process.exit(1); + } + + const podName = POD_NAMES[podIndex]; + console.log(`[Pod] Starting pod "${podName}" with ${WORKERS_PER_POD} workers...`); + + const workerScript = path.join(__dirname, 'task-worker.ts'); + const workers: ReturnType[] = []; + + for (let i = 1; i <= WORKERS_PER_POD; i++) { + const workerId = `${podName}-worker-${i}`; + + const worker = spawn('npx', ['tsx', workerScript], { + env: { + ...process.env, + WORKER_ID: workerId, + POD_NAME: podName, + }, + stdio: 'inherit', + }); + + workers.push(worker); + console.log(`[Pod] Started worker ${i}/${WORKERS_PER_POD}: ${workerId}`); + } + + // Handle shutdown + const shutdown = () => { + console.log(`\n[Pod] Shutting down pod "${podName}"...`); + workers.forEach(w => w.kill('SIGTERM')); + setTimeout(() => process.exit(0), 2000); + }; + + process.on('SIGTERM', shutdown); + process.on('SIGINT', shutdown); + + // Keep the process alive + await new Promise(() => {}); +} + +main().catch(err => { + console.error('[Pod] Fatal error:', err); + process.exit(1); +}); diff --git a/backend/src/tasks/task-service.ts b/backend/src/tasks/task-service.ts index 83de7eac..979e3401 100644 --- a/backend/src/tasks/task-service.ts +++ b/backend/src/tasks/task-service.ts @@ -14,7 +14,7 @@ export type TaskRole = | 'store_discovery' | 'entry_point_discovery' | 'product_discovery' - | 'product_resync' + | 'product_refresh' | 'analytics_refresh'; export type TaskStatus = @@ -29,6 +29,8 @@ export interface WorkerTask { id: number; role: TaskRole; dispensary_id: number | null; + dispensary_name?: string; // JOINed from dispensaries + dispensary_slug?: string; // JOINed from dispensaries platform: string | null; status: TaskStatus; priority: number; @@ -128,13 +130,42 @@ class TaskService { /** * Claim a task atomically for a worker - * Uses the SQL function for proper locking + * If role is null, claims ANY available task (role-agnostic worker) */ - async claimTask(role: TaskRole, workerId: string): Promise { - const result = await pool.query( - `SELECT * FROM claim_task($1, $2)`, - [role, workerId] - ); + async claimTask(role: TaskRole | null, workerId: string): Promise { + if (role) { + // Role-specific claiming - use the SQL function + const result = await pool.query( + `SELECT * FROM claim_task($1, $2)`, + [role, workerId] + ); + return (result.rows[0] as WorkerTask) || null; + } + + // Role-agnostic claiming - claim ANY pending task + const result = await pool.query(` + UPDATE worker_tasks + SET + status = 'claimed', + worker_id = $1, + claimed_at = NOW() + WHERE id = ( + SELECT id FROM worker_tasks + WHERE status = 'pending' + AND (scheduled_for IS NULL OR scheduled_for <= NOW()) + -- Exclude stores that already have an active task + AND (dispensary_id IS NULL OR dispensary_id NOT IN ( + SELECT dispensary_id FROM worker_tasks + WHERE status IN ('claimed', 'running') + AND dispensary_id IS NOT NULL + )) + ORDER BY priority DESC, created_at ASC + LIMIT 1 + FOR UPDATE SKIP LOCKED + ) + RETURNING * + `, [workerId]); + return (result.rows[0] as WorkerTask) || null; } @@ -206,27 +237,27 @@ class TaskService { let paramIndex = 1; if (filter.role) { - conditions.push(`role = $${paramIndex++}`); + conditions.push(`t.role = $${paramIndex++}`); params.push(filter.role); } if (filter.status) { if (Array.isArray(filter.status)) { - conditions.push(`status = ANY($${paramIndex++})`); + conditions.push(`t.status = ANY($${paramIndex++})`); params.push(filter.status); } else { - conditions.push(`status = $${paramIndex++}`); + conditions.push(`t.status = $${paramIndex++}`); params.push(filter.status); } } if (filter.dispensary_id) { - conditions.push(`dispensary_id = $${paramIndex++}`); + conditions.push(`t.dispensary_id = $${paramIndex++}`); params.push(filter.dispensary_id); } if (filter.worker_id) { - conditions.push(`worker_id = $${paramIndex++}`); + conditions.push(`t.worker_id = $${paramIndex++}`); params.push(filter.worker_id); } @@ -235,9 +266,14 @@ class TaskService { const offset = filter.offset ?? 0; const result = await pool.query( - `SELECT * FROM worker_tasks + `SELECT + t.*, + d.name as dispensary_name, + d.slug as dispensary_slug + FROM worker_tasks t + LEFT JOIN dispensaries d ON d.id = t.dispensary_id ${whereClause} - ORDER BY created_at DESC + ORDER BY t.created_at DESC LIMIT ${limit} OFFSET ${offset}`, params ); diff --git a/backend/src/tasks/task-worker.ts b/backend/src/tasks/task-worker.ts index b90d4b76..df33597f 100644 --- a/backend/src/tasks/task-worker.ts +++ b/backend/src/tasks/task-worker.ts @@ -1,26 +1,58 @@ /** * Task Worker * - * A unified worker that processes tasks from the worker_tasks queue. - * Replaces the fragmented job systems (job_schedules, dispensary_crawl_jobs, etc.) + * A unified worker that pulls tasks from the worker_tasks queue. + * Workers register on startup, get a friendly name, and pull tasks. + * + * Architecture: + * - Tasks are generated on schedule (by scheduler or API) + * - Workers PULL tasks from the pool (not assigned to them) + * - Tasks are claimed in order of priority (DESC) then creation time (ASC) + * - Workers report heartbeats to worker_registry + * - Workers are ROLE-AGNOSTIC by default (can handle any task type) + * + * Stealth & Anti-Detection: + * PROXIES ARE REQUIRED - workers will fail to start if no proxies available. + * + * On startup, workers initialize the CrawlRotator which provides: + * - Proxy rotation: Loads proxies from `proxies` table, ALL requests use proxy + * - User-Agent rotation: Cycles through realistic browser fingerprints + * - Fingerprint rotation: Changes browser profile on blocks + * - Locale/timezone: Matches Accept-Language to target state + * + * The CrawlRotator is wired to the Dutchie client via setCrawlRotator(). + * Task handlers call startSession() which picks a random fingerprint. + * On 403 errors, the client automatically: + * 1. Records failure on current proxy + * 2. Rotates to next proxy + * 3. Rotates fingerprint + * 4. Retries the request * * Usage: - * WORKER_ROLE=product_resync npx tsx src/tasks/task-worker.ts + * npx tsx src/tasks/task-worker.ts # Role-agnostic (any task) + * WORKER_ROLE=product_refresh npx tsx src/tasks/task-worker.ts # Role-specific * * Environment: - * WORKER_ROLE - Which task role to process (required) - * WORKER_ID - Optional custom worker ID + * WORKER_ROLE - Which task role to process (optional, null = any task) + * WORKER_ID - Optional custom worker ID (auto-generated if not provided) + * POD_NAME - Kubernetes pod name (optional) * POLL_INTERVAL_MS - How often to check for tasks (default: 5000) * HEARTBEAT_INTERVAL_MS - How often to update heartbeat (default: 30000) + * API_BASE_URL - Backend API URL for registration (default: http://localhost:3010) */ import { Pool } from 'pg'; import { v4 as uuidv4 } from 'uuid'; import { taskService, TaskRole, WorkerTask } from './task-service'; import { getPool } from '../db/pool'; +import os from 'os'; + +// Stealth/rotation support +import { CrawlRotator } from '../services/crawl-rotator'; +import { setCrawlRotator } from '../platforms/dutchie'; // Task handlers by role -import { handleProductResync } from './handlers/product-resync'; +import { handleProductRefresh } from './handlers/product-refresh'; import { handleProductDiscovery } from './handlers/product-discovery'; import { handleStoreDiscovery } from './handlers/store-discovery'; import { handleEntryPointDiscovery } from './handlers/entry-point-discovery'; @@ -28,6 +60,7 @@ import { handleAnalyticsRefresh } from './handlers/analytics-refresh'; const POLL_INTERVAL_MS = parseInt(process.env.POLL_INTERVAL_MS || '5000'); const HEARTBEAT_INTERVAL_MS = parseInt(process.env.HEARTBEAT_INTERVAL_MS || '30000'); +const API_BASE_URL = process.env.API_BASE_URL || 'http://localhost:3010'; export interface TaskContext { pool: Pool; @@ -48,7 +81,7 @@ export interface TaskResult { type TaskHandler = (ctx: TaskContext) => Promise; const TASK_HANDLERS: Record = { - product_resync: handleProductResync, + product_refresh: handleProductRefresh, product_discovery: handleProductDiscovery, store_discovery: handleStoreDiscovery, entry_point_discovery: handleEntryPointDiscovery, @@ -58,15 +91,160 @@ const TASK_HANDLERS: Record = { export class TaskWorker { private pool: Pool; private workerId: string; - private role: TaskRole; + private role: TaskRole | null; // null = role-agnostic (any task) + private friendlyName: string = ''; private isRunning: boolean = false; private heartbeatInterval: NodeJS.Timeout | null = null; + private registryHeartbeatInterval: NodeJS.Timeout | null = null; private currentTask: WorkerTask | null = null; + private crawlRotator: CrawlRotator; - constructor(role: TaskRole, workerId?: string) { + constructor(role: TaskRole | null = null, workerId?: string) { this.pool = getPool(); this.role = role; - this.workerId = workerId || `worker-${role}-${uuidv4().slice(0, 8)}`; + this.workerId = workerId || `worker-${uuidv4().slice(0, 8)}`; + this.crawlRotator = new CrawlRotator(this.pool); + } + + /** + * Initialize stealth systems (proxy rotation, fingerprints) + * Called once on worker startup before processing any tasks. + * + * IMPORTANT: Proxies are REQUIRED. Workers will fail to start if no proxies available. + */ + private async initializeStealth(): Promise { + // Load proxies from database + await this.crawlRotator.initialize(); + + const stats = this.crawlRotator.proxy.getStats(); + if (stats.activeProxies === 0) { + throw new Error('No active proxies available. Workers MUST use proxies for all requests. Add proxies to the database before starting workers.'); + } + + console.log(`[TaskWorker] Loaded ${stats.activeProxies} proxies (${stats.avgSuccessRate.toFixed(1)}% avg success rate)`); + + // Wire rotator to Dutchie client - proxies will be used for ALL requests + setCrawlRotator(this.crawlRotator); + + console.log(`[TaskWorker] Stealth initialized: ${this.crawlRotator.userAgent.getCount()} fingerprints, proxy REQUIRED for all requests`); + } + + /** + * Register worker with the registry (get friendly name) + */ + private async register(): Promise { + try { + const response = await fetch(`${API_BASE_URL}/api/worker-registry/register`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + role: this.role, + worker_id: this.workerId, + pod_name: process.env.POD_NAME || process.env.HOSTNAME, + hostname: os.hostname(), + metadata: { + pid: process.pid, + node_version: process.version, + started_at: new Date().toISOString() + } + }) + }); + + const data = await response.json(); + if (data.success) { + this.friendlyName = data.friendly_name; + console.log(`[TaskWorker] ${data.message}`); + } else { + console.warn(`[TaskWorker] Registration warning: ${data.error}`); + this.friendlyName = this.workerId.slice(0, 12); + } + } catch (error: any) { + // Registration is optional - worker can still function without it + console.warn(`[TaskWorker] Could not register with API (will continue): ${error.message}`); + this.friendlyName = this.workerId.slice(0, 12); + } + } + + /** + * Deregister worker from the registry + */ + private async deregister(): Promise { + try { + await fetch(`${API_BASE_URL}/api/worker-registry/deregister`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ worker_id: this.workerId }) + }); + console.log(`[TaskWorker] ${this.friendlyName} signed off`); + } catch { + // Ignore deregistration errors + } + } + + /** + * Send heartbeat to registry with resource usage + */ + private async sendRegistryHeartbeat(): Promise { + try { + const memUsage = process.memoryUsage(); + const cpuUsage = process.cpuUsage(); + + await fetch(`${API_BASE_URL}/api/worker-registry/heartbeat`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + worker_id: this.workerId, + current_task_id: this.currentTask?.id || null, + status: this.currentTask ? 'active' : 'idle', + resources: { + memory_mb: Math.round(memUsage.heapUsed / 1024 / 1024), + memory_total_mb: Math.round(memUsage.heapTotal / 1024 / 1024), + memory_rss_mb: Math.round(memUsage.rss / 1024 / 1024), + cpu_user_ms: Math.round(cpuUsage.user / 1000), + cpu_system_ms: Math.round(cpuUsage.system / 1000), + } + }) + }); + } catch { + // Ignore heartbeat errors + } + } + + /** + * Report task completion to registry + */ + private async reportTaskCompletion(success: boolean): Promise { + try { + await fetch(`${API_BASE_URL}/api/worker-registry/task-completed`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + worker_id: this.workerId, + success + }) + }); + } catch { + // Ignore errors + } + } + + /** + * Start registry heartbeat interval + */ + private startRegistryHeartbeat(): void { + this.registryHeartbeatInterval = setInterval(async () => { + await this.sendRegistryHeartbeat(); + }, HEARTBEAT_INTERVAL_MS); + } + + /** + * Stop registry heartbeat interval + */ + private stopRegistryHeartbeat(): void { + if (this.registryHeartbeatInterval) { + clearInterval(this.registryHeartbeatInterval); + this.registryHeartbeatInterval = null; + } } /** @@ -74,7 +252,18 @@ export class TaskWorker { */ async start(): Promise { this.isRunning = true; - console.log(`[TaskWorker] Starting worker ${this.workerId} for role: ${this.role}`); + + // Initialize stealth systems (proxy rotation, fingerprints) + await this.initializeStealth(); + + // Register with the API to get a friendly name + await this.register(); + + // Start registry heartbeat + this.startRegistryHeartbeat(); + + const roleMsg = this.role ? `for role: ${this.role}` : '(role-agnostic - any task)'; + console.log(`[TaskWorker] ${this.friendlyName} starting ${roleMsg}`); while (this.isRunning) { try { @@ -91,10 +280,12 @@ export class TaskWorker { /** * Stop the worker */ - stop(): void { + async stop(): Promise { this.isRunning = false; this.stopHeartbeat(); - console.log(`[TaskWorker] Stopping worker ${this.workerId}...`); + this.stopRegistryHeartbeat(); + await this.deregister(); + console.log(`[TaskWorker] ${this.friendlyName} stopped`); } /** @@ -142,7 +333,8 @@ export class TaskWorker { if (result.success) { // Mark as completed await taskService.completeTask(task.id, result); - console.log(`[TaskWorker] Task ${task.id} completed successfully`); + await this.reportTaskCompletion(true); + console.log(`[TaskWorker] ${this.friendlyName} completed task ${task.id}`); // Chain next task if applicable const chainedTask = await taskService.chainNextTask({ @@ -156,12 +348,14 @@ export class TaskWorker { } else { // Mark as failed await taskService.failTask(task.id, result.error || 'Unknown error'); - console.log(`[TaskWorker] Task ${task.id} failed: ${result.error}`); + await this.reportTaskCompletion(false); + console.log(`[TaskWorker] ${this.friendlyName} failed task ${task.id}: ${result.error}`); } } catch (error: any) { // Mark as failed await taskService.failTask(task.id, error.message); - console.error(`[TaskWorker] Task ${task.id} threw error:`, error.message); + await this.reportTaskCompletion(false); + console.error(`[TaskWorker] ${this.friendlyName} task ${task.id} error:`, error.message); } finally { this.stopHeartbeat(); this.currentTask = null; @@ -201,7 +395,7 @@ export class TaskWorker { /** * Get worker info */ - getInfo(): { workerId: string; role: TaskRole; isRunning: boolean; currentTaskId: number | null } { + getInfo(): { workerId: string; role: TaskRole | null; isRunning: boolean; currentTaskId: number | null } { return { workerId: this.workerId, role: this.role, @@ -216,30 +410,27 @@ export class TaskWorker { // ============================================================ async function main(): Promise { - const role = process.env.WORKER_ROLE as TaskRole; - - if (!role) { - console.error('Error: WORKER_ROLE environment variable is required'); - console.error('Valid roles: store_discovery, entry_point_discovery, product_discovery, product_resync, analytics_refresh'); - process.exit(1); - } + const role = process.env.WORKER_ROLE as TaskRole | undefined; const validRoles: TaskRole[] = [ 'store_discovery', 'entry_point_discovery', 'product_discovery', - 'product_resync', + 'product_refresh', 'analytics_refresh', ]; - if (!validRoles.includes(role)) { + // If role specified, validate it + if (role && !validRoles.includes(role)) { console.error(`Error: Invalid WORKER_ROLE: ${role}`); console.error(`Valid roles: ${validRoles.join(', ')}`); + console.error('Or omit WORKER_ROLE for role-agnostic worker (any task)'); process.exit(1); } const workerId = process.env.WORKER_ID; - const worker = new TaskWorker(role, workerId); + // Pass null for role-agnostic, or the specific role + const worker = new TaskWorker(role || null, workerId); // Handle graceful shutdown process.on('SIGTERM', () => { diff --git a/cannaiq/src/lib/api.ts b/cannaiq/src/lib/api.ts index 2f8342a3..b96f7fd9 100755 --- a/cannaiq/src/lib/api.ts +++ b/cannaiq/src/lib/api.ts @@ -113,7 +113,7 @@ class ApiClient { }); } - async getDispensaries(params?: { limit?: number; offset?: number; search?: string; city?: string; state?: string; crawl_enabled?: string }) { + async getDispensaries(params?: { limit?: number; offset?: number; search?: string; city?: string; state?: string; crawl_enabled?: string; status?: string }) { const searchParams = new URLSearchParams(); if (params?.limit) searchParams.append('limit', params.limit.toString()); if (params?.offset) searchParams.append('offset', params.offset.toString()); @@ -121,10 +121,15 @@ class ApiClient { if (params?.city) searchParams.append('city', params.city); if (params?.state) searchParams.append('state', params.state); if (params?.crawl_enabled) searchParams.append('crawl_enabled', params.crawl_enabled); + if (params?.status) searchParams.append('status', params.status); const queryString = searchParams.toString() ? `?${searchParams.toString()}` : ''; return this.request<{ dispensaries: any[]; total: number; limit: number; offset: number; hasMore: boolean }>(`/api/dispensaries${queryString}`); } + async getDroppedStores() { + return this.request<{ dropped_count: number; dropped_stores: any[] }>('/api/dispensaries/stats/dropped'); + } + async getDispensary(slug: string) { return this.request(`/api/dispensaries/${slug}`); } diff --git a/cannaiq/src/pages/Dashboard.tsx b/cannaiq/src/pages/Dashboard.tsx index ff747b85..5a4802e8 100755 --- a/cannaiq/src/pages/Dashboard.tsx +++ b/cannaiq/src/pages/Dashboard.tsx @@ -46,12 +46,33 @@ export function Dashboard() { const [pendingChangesCount, setPendingChangesCount] = useState(0); const [showNotification, setShowNotification] = useState(false); const [taskCounts, setTaskCounts] = useState | null>(null); + const [droppedStoresCount, setDroppedStoresCount] = useState(0); + const [showDroppedAlert, setShowDroppedAlert] = useState(false); useEffect(() => { loadData(); checkNotificationStatus(); + checkDroppedStores(); }, []); + const checkDroppedStores = async () => { + try { + const data = await api.getDroppedStores(); + setDroppedStoresCount(data.dropped_count); + // Check if notification was dismissed for this count + const dismissedCount = localStorage.getItem('dismissedDroppedStoresCount'); + const isDismissed = dismissedCount && parseInt(dismissedCount) >= data.dropped_count; + setShowDroppedAlert(data.dropped_count > 0 && !isDismissed); + } catch (error) { + console.error('Failed to check dropped stores:', error); + } + }; + + const handleDismissDroppedAlert = () => { + localStorage.setItem('dismissedDroppedStoresCount', droppedStoresCount.toString()); + setShowDroppedAlert(false); + }; + const checkNotificationStatus = async () => { try { // Fetch real pending changes count from API @@ -214,6 +235,40 @@ export function Dashboard() { )} + {/* Dropped Stores Alert */} + {showDroppedAlert && ( +
+
+
+ +
+

+ {droppedStoresCount} dropped store{droppedStoresCount !== 1 ? 's' : ''} need{droppedStoresCount === 1 ? 's' : ''} review +

+

+ These stores were not found in the latest Dutchie discovery and may have stopped using the platform +

+
+
+
+ + +
+
+
+ )} +
{/* Header */}
diff --git a/cannaiq/src/pages/Dispensaries.tsx b/cannaiq/src/pages/Dispensaries.tsx index d2fef3d7..2b788c6e 100644 --- a/cannaiq/src/pages/Dispensaries.tsx +++ b/cannaiq/src/pages/Dispensaries.tsx @@ -13,6 +13,7 @@ export function Dispensaries() { const [searchTerm, setSearchTerm] = useState(''); const [debouncedSearch, setDebouncedSearch] = useState(''); const [filterState, setFilterState] = useState(''); + const [filterStatus, setFilterStatus] = useState(''); const [editingDispensary, setEditingDispensary] = useState(null); const [editForm, setEditForm] = useState({}); const [total, setTotal] = useState(0); @@ -51,6 +52,7 @@ export function Dispensaries() { offset, search: debouncedSearch || undefined, state: filterState || undefined, + status: filterStatus || undefined, crawl_enabled: 'all' }); setDispensaries(data.dispensaries); @@ -61,7 +63,7 @@ export function Dispensaries() { } finally { setLoading(false); } - }, [offset, debouncedSearch, filterState]); + }, [offset, debouncedSearch, filterState, filterStatus]); useEffect(() => { loadDispensaries(); @@ -110,6 +112,11 @@ export function Dispensaries() { setOffset(0); // Reset to first page }; + const handleStatusFilter = (status: string) => { + setFilterStatus(status); + setOffset(0); // Reset to first page + }; + return (
@@ -123,7 +130,7 @@ export function Dispensaries() { {/* Filters */}
-
+
+
+ + +
diff --git a/cannaiq/src/pages/JobQueue.tsx b/cannaiq/src/pages/JobQueue.tsx index 3faaa036..3dbdace9 100644 --- a/cannaiq/src/pages/JobQueue.tsx +++ b/cannaiq/src/pages/JobQueue.tsx @@ -2,81 +2,430 @@ import { useState, useEffect, useCallback } from 'react'; import { Layout } from '../components/Layout'; import { api } from '../lib/api'; import { - ListOrdered, - Play, - Pause, RefreshCw, XCircle, - RotateCcw, - ArrowUp, - ArrowDown, Clock, CheckCircle, - AlertTriangle, Activity, - Filter, - ChevronUp, - ChevronDown, + ChevronLeft, + ChevronRight, + Users, + Inbox, + Zap, + Timer, + Plus, + X, + Search, + Calendar, } from 'lucide-react'; -interface Job { +// Worker from registry +interface WorkerResources { + memory_mb?: number; + memory_total_mb?: number; + memory_rss_mb?: number; + cpu_user_ms?: number; + cpu_system_ms?: number; +} + +interface Worker { id: number; - dispensary_id: number; - dispensary_name: string | null; - city: string | null; - state: string | null; - job_type: string; - trigger_type: string; - priority: number; + worker_id: string; + friendly_name: string; + role: string; status: string; - scheduled_at: string | null; + pod_name: string | null; + hostname: string | null; + started_at: string; + last_heartbeat_at: string; + last_task_at: string | null; + tasks_completed: number; + tasks_failed: number; + current_task_id: number | null; + health_status: string; + seconds_since_heartbeat: number; + metadata?: WorkerResources; +} + +// Task from worker_tasks +interface Task { + id: number; + role: string; + dispensary_id: number | null; + dispensary_name?: string; + dispensary_slug?: string; + status: string; + priority: number; + claimed_by: string | null; + claimed_at: string | null; started_at: string | null; completed_at: string | null; - duration_ms: number | null; - products_found: number | null; - error_message: string | null; - retry_count: number; - max_retries: number; - worker_id: string | null; + error: string | null; + result: any; created_at: string; + updated_at: string; } -interface QueueStats { +interface TaskCounts { pending: number; running: number; - completed_24h: number; - failed_24h: number; - cancelled: number; - avg_duration_ms: number | null; - max_priority: number; - oldest_pending: string | null; - estimated_wait_ms: number; - queue_paused: boolean; + completed: number; + failed: number; + total: number; } -interface ScheduledWorker { +interface Store { id: number; - worker_name: string; - run_role: string; - job_name: string; - description: string; - enabled: boolean; - next_run_at: string | null; - last_run_at: string | null; - last_status: string | null; - base_interval_minutes: number; + name: string; + state_code: string; + crawl_enabled: boolean; } -interface JobsByType { - job_type: string; - count: string; +interface CreateTaskModalProps { + isOpen: boolean; + onClose: () => void; + onTaskCreated: () => void; } -function formatDuration(ms: number | null | undefined): string { - if (!ms) return '-'; - if (ms < 1000) return `${ms}ms`; - if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`; - return `${Math.round(ms / 60000)}m`; +const ROLES = [ + { id: 'product_refresh', name: 'Product Resync', description: 'Re-crawl products for price/stock changes' }, + { id: 'product_discovery', name: 'Product Discovery', description: 'Initial crawl for new dispensaries' }, + { id: 'store_discovery', name: 'Store Discovery', description: 'Discover new dispensary locations' }, + { id: 'entry_point_discovery', name: 'Entry Point Discovery', description: 'Resolve platform IDs from menu URLs' }, + { id: 'analytics_refresh', name: 'Analytics Refresh', description: 'Refresh materialized views' }, +]; + +function CreateTaskModal({ isOpen, onClose, onTaskCreated }: CreateTaskModalProps) { + const [role, setRole] = useState('product_refresh'); + const [priority, setPriority] = useState(10); + const [scheduleType, setScheduleType] = useState<'now' | 'scheduled'>('now'); + const [scheduledFor, setScheduledFor] = useState(''); + const [stores, setStores] = useState([]); + const [storeSearch, setStoreSearch] = useState(''); + const [selectedStores, setSelectedStores] = useState([]); + const [loading, setLoading] = useState(false); + const [storesLoading, setStoresLoading] = useState(false); + const [error, setError] = useState(null); + + // Fetch stores when modal opens + useEffect(() => { + if (isOpen) { + fetchStores(); + } + }, [isOpen]); + + const fetchStores = async () => { + setStoresLoading(true); + try { + const res = await api.get('/api/stores?limit=500'); + setStores(res.data.stores || res.data || []); + } catch (err) { + console.error('Failed to fetch stores:', err); + } finally { + setStoresLoading(false); + } + }; + + const filteredStores = stores.filter(s => + s.name.toLowerCase().includes(storeSearch.toLowerCase()) || + s.state_code?.toLowerCase().includes(storeSearch.toLowerCase()) + ); + + const toggleStore = (store: Store) => { + if (selectedStores.find(s => s.id === store.id)) { + setSelectedStores(selectedStores.filter(s => s.id !== store.id)); + } else { + setSelectedStores([...selectedStores, store]); + } + }; + + const selectAll = () => { + setSelectedStores(filteredStores); + }; + + const clearAll = () => { + setSelectedStores([]); + }; + + const handleSubmit = async () => { + setLoading(true); + setError(null); + + try { + const scheduledDate = scheduleType === 'scheduled' && scheduledFor + ? new Date(scheduledFor).toISOString() + : undefined; + + // For store_discovery and analytics_refresh, no store is needed + if (role === 'store_discovery' || role === 'analytics_refresh') { + await api.post('/api/tasks', { + role, + priority, + scheduled_for: scheduledDate, + platform: 'dutchie', + }); + } else if (selectedStores.length === 0) { + setError('Please select at least one store'); + setLoading(false); + return; + } else { + // Create tasks for each selected store + for (const store of selectedStores) { + await api.post('/api/tasks', { + role, + dispensary_id: store.id, + priority, + scheduled_for: scheduledDate, + platform: 'dutchie', + }); + } + } + + onTaskCreated(); + onClose(); + // Reset form + setSelectedStores([]); + setPriority(10); + setScheduleType('now'); + setScheduledFor(''); + } catch (err: any) { + setError(err.response?.data?.error || err.message || 'Failed to create task'); + } finally { + setLoading(false); + } + }; + + if (!isOpen) return null; + + const needsStore = role !== 'store_discovery' && role !== 'analytics_refresh'; + + return ( +
+
+ {/* Backdrop */} +
+ + {/* Modal */} +
+ {/* Header */} +
+

Create New Task

+ +
+ + {/* Body */} +
+ {error && ( +
+ {error} +
+ )} + + {/* Role Selection */} +
+ +
+ {ROLES.map(r => ( + + ))} +
+
+ + {/* Store Selection (for roles that need it) */} + {needsStore && ( +
+ +
+ {/* Search */} +
+
+ + setStoreSearch(e.target.value)} + placeholder="Search stores..." + className="w-full pl-9 pr-3 py-2 text-sm border border-gray-200 rounded" + /> +
+
+ + | + +
+
+ + {/* Store List */} +
+ {storesLoading ? ( +
+ + Loading stores... +
+ ) : filteredStores.length === 0 ? ( +
No stores found
+ ) : ( + filteredStores.map(store => ( + + )) + )} +
+
+
+ )} + + {/* Priority */} +
+ + setPriority(parseInt(e.target.value))} + className="w-full h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer" + /> +
+ 0 (Low - Batch) + 10 (Normal) + 50 (High) + 100 (Urgent) +
+
+ + {/* Schedule */} +
+ +
+ + +
+ + {scheduleType === 'scheduled' && ( +
+
+ + setScheduledFor(e.target.value)} + className="w-full pl-9 pr-3 py-2 text-sm border border-gray-200 rounded" + /> +
+
+ )} +
+
+ + {/* Footer */} +
+
+ {needsStore ? ( + selectedStores.length > 0 ? ( + `Will create ${selectedStores.length} task${selectedStores.length > 1 ? 's' : ''}` + ) : ( + 'Select stores to create tasks' + ) + ) : ( + 'Will create 1 task' + )} +
+
+ + +
+
+
+
+
+ ); } function formatRelativeTime(dateStr: string | null): string { @@ -84,236 +433,311 @@ function formatRelativeTime(dateStr: string | null): string { const date = new Date(dateStr); const now = new Date(); const diffMs = now.getTime() - date.getTime(); + const diffSecs = Math.round(diffMs / 1000); const diffMins = Math.round(diffMs / 60000); - if (diffMins < 1) return 'just now'; + if (diffSecs < 60) return `${diffSecs}s ago`; if (diffMins < 60) return `${diffMins}m ago`; if (diffMins < 1440) return `${Math.round(diffMins / 60)}h ago`; return `${Math.round(diffMins / 1440)}d ago`; } -function formatFutureTime(dateStr: string | null): string { - if (!dateStr) return 'not scheduled'; - const date = new Date(dateStr); - const now = new Date(); - const diffMs = date.getTime() - now.getTime(); - const diffMins = Math.round(diffMs / 60000); +function formatDuration(startStr: string | null, endStr: string | null): string { + if (!startStr) return '-'; + const start = new Date(startStr); + const end = endStr ? new Date(endStr) : new Date(); + const diffMs = end.getTime() - start.getTime(); - if (diffMins < 0) return 'overdue'; - if (diffMins < 1) return 'now'; - if (diffMins < 60) return `in ${diffMins}m`; - if (diffMins < 1440) return `in ${Math.round(diffMins / 60)}h`; - return `in ${Math.round(diffMins / 1440)}d`; + if (diffMs < 1000) return `${diffMs}ms`; + if (diffMs < 60000) return `${(diffMs / 1000).toFixed(1)}s`; + const mins = Math.floor(diffMs / 60000); + const secs = Math.floor((diffMs % 60000) / 1000); + if (mins < 60) return `${mins}m ${secs}s`; + const hrs = Math.floor(mins / 60); + return `${hrs}h ${mins % 60}m`; } -function formatIntervalHuman(minutes: number): string { - if (minutes < 60) return `every ${minutes}m`; - if (minutes < 1440) return `every ${Math.round(minutes / 60)}h`; - return `every ${Math.round(minutes / 1440)}d`; +// Live timer component for running tasks +function LiveTimer({ startedAt, isRunning }: { startedAt: string | null; isRunning: boolean }) { + const [, setTick] = useState(0); + + useEffect(() => { + if (!isRunning || !startedAt) return; + const interval = setInterval(() => setTick(t => t + 1), 1000); + return () => clearInterval(interval); + }, [isRunning, startedAt]); + + if (!startedAt) return -; + + const duration = formatDuration(startedAt, null); + + if (isRunning) { + return ( + + + {duration} + + ); + } + + return {duration}; } -interface StatusBadgeProps { - status: string; - errorMessage?: string | null; - productsFound?: number | null; - durationMs?: number | null; - triggerType?: string | null; -} - -function StatusBadge({ status, errorMessage, productsFound, durationMs, triggerType }: StatusBadgeProps) { - const config: Record = { - pending: { - bg: 'bg-yellow-100', - text: 'text-yellow-700', - icon: Clock, - tooltip: triggerType === 'scheduled' - ? 'Waiting in queue (scheduled crawl)' - : triggerType === 'manual' - ? 'Waiting in queue (manually triggered)' - : 'Waiting in queue for a worker to pick up' - }, - running: { - bg: 'bg-blue-100', - text: 'text-blue-700', - icon: Activity, - tooltip: 'Currently being processed by a worker' - }, - completed: { - bg: 'bg-green-100', - text: 'text-green-700', - icon: CheckCircle, - tooltip: productsFound !== null && productsFound !== undefined - ? `✓ Found ${productsFound} products in ${formatDuration(durationMs)}` - : durationMs - ? `✓ Completed in ${formatDuration(durationMs)}` - : '✓ Successfully completed' - }, - failed: { - bg: 'bg-red-100', - text: 'text-red-700', - icon: XCircle, - tooltip: errorMessage - ? `✗ Error: ${errorMessage.slice(0, 100)}${errorMessage.length > 100 ? '...' : ''}` - : '✗ Job failed - check details for error' - }, - cancelled: { - bg: 'bg-gray-100', - text: 'text-gray-700', - icon: XCircle, - tooltip: 'Job was cancelled by user' - }, +function WorkerStatusBadge({ status, healthStatus }: { status: string; healthStatus: string }) { + const getColors = () => { + if (healthStatus === 'offline' || status === 'offline') return 'bg-gray-100 text-gray-600'; + if (healthStatus === 'stale') return 'bg-yellow-100 text-yellow-700'; + if (healthStatus === 'busy' || status === 'active') return 'bg-blue-100 text-blue-700'; + if (healthStatus === 'ready' || status === 'idle') return 'bg-green-100 text-green-700'; + return 'bg-gray-100 text-gray-600'; }; - const cfg = config[status] || { bg: 'bg-gray-100', text: 'text-gray-700', icon: Clock, tooltip: status }; + return ( + + {healthStatus || status} + + ); +} + +function TaskStatusBadge({ status }: { status: string }) { + const config: Record = { + pending: { bg: 'bg-yellow-100', text: 'text-yellow-700', icon: Clock }, + running: { bg: 'bg-blue-100', text: 'text-blue-700', icon: Activity }, + completed: { bg: 'bg-green-100', text: 'text-green-700', icon: CheckCircle }, + failed: { bg: 'bg-red-100', text: 'text-red-700', icon: XCircle }, + }; + + const cfg = config[status] || { bg: 'bg-gray-100', text: 'text-gray-700', icon: Clock }; const Icon = cfg.icon; return ( - + {status} ); } -function PriorityBadge({ priority }: { priority: number }) { - let bg = 'bg-gray-100'; - let text = 'text-gray-700'; - - if (priority >= 80) { - bg = 'bg-red-100'; - text = 'text-red-700'; - } else if (priority >= 50) { - bg = 'bg-orange-100'; - text = 'text-orange-700'; - } else if (priority >= 20) { - bg = 'bg-yellow-100'; - text = 'text-yellow-700'; - } +function RoleBadge({ role }: { role: string }) { + const colors: Record = { + product_refresh: 'bg-emerald-100 text-emerald-700', + product_discovery: 'bg-blue-100 text-blue-700', + store_discovery: 'bg-purple-100 text-purple-700', + entry_point_discovery: 'bg-orange-100 text-orange-700', + analytics_refresh: 'bg-pink-100 text-pink-700', + }; return ( - - {priority} + + {role.replace(/_/g, ' ')} ); } +function PriorityBadge({ priority }: { priority: number }) { + let bg = 'bg-gray-100 text-gray-700'; + if (priority >= 80) bg = 'bg-red-100 text-red-700'; + else if (priority >= 50) bg = 'bg-orange-100 text-orange-700'; + else if (priority >= 20) bg = 'bg-yellow-100 text-yellow-700'; + + return ( + + P{priority} + + ); +} + +// Pod visualization - shows pod as hub with worker nodes radiating out +function PodVisualization({ podName, workers }: { podName: string; workers: Worker[] }) { + const busyCount = workers.filter(w => w.current_task_id !== null).length; + const allBusy = busyCount === workers.length; + const allIdle = busyCount === 0; + + // Aggregate resource stats for the pod + const totalMemoryMb = workers.reduce((sum, w) => sum + (w.metadata?.memory_rss_mb || 0), 0); + const totalCpuUserMs = workers.reduce((sum, w) => sum + (w.metadata?.cpu_user_ms || 0), 0); + const totalCpuSystemMs = workers.reduce((sum, w) => sum + (w.metadata?.cpu_system_ms || 0), 0); + const totalCompleted = workers.reduce((sum, w) => sum + w.tasks_completed, 0); + const totalFailed = workers.reduce((sum, w) => sum + w.tasks_failed, 0); + + // Format CPU time + const formatCpuTime = (ms: number) => { + if (ms < 1000) return `${ms}ms`; + if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`; + return `${(ms / 60000).toFixed(1)}m`; + }; + + // Pod color based on worker status + const podColor = allBusy ? 'bg-blue-500' : allIdle ? 'bg-emerald-500' : 'bg-yellow-500'; + const podBorder = allBusy ? 'border-blue-400' : allIdle ? 'border-emerald-400' : 'border-yellow-400'; + const podGlow = allBusy ? 'shadow-blue-200' : allIdle ? 'shadow-emerald-200' : 'shadow-yellow-200'; + + // Build pod tooltip + const podTooltip = [ + `Pod: ${podName}`, + `Workers: ${busyCount}/${workers.length} busy`, + `Memory: ${totalMemoryMb} MB (RSS)`, + `CPU: ${formatCpuTime(totalCpuUserMs)} user, ${formatCpuTime(totalCpuSystemMs)} system`, + `Tasks: ${totalCompleted} completed, ${totalFailed} failed`, + ].join('\n'); + + return ( +
+ {/* Pod hub */} +
+ {/* Center pod circle */} +
+ {podName} +
+ + {/* Worker nodes radiating out */} + {workers.map((worker, index) => { + const angle = (index * 360) / workers.length - 90; // Start from top + const radians = (angle * Math.PI) / 180; + const radius = 55; // Distance from center + const x = Math.cos(radians) * radius; + const y = Math.sin(radians) * radius; + + const isBusy = worker.current_task_id !== null; + const workerColor = isBusy ? 'bg-blue-500' : 'bg-emerald-500'; + const workerBorder = isBusy ? 'border-blue-300' : 'border-emerald-300'; + + // Line from center to worker + const lineLength = radius - 10; + const lineX = Math.cos(radians) * (lineLength / 2 + 10); + const lineY = Math.sin(radians) * (lineLength / 2 + 10); + + return ( +
+ {/* Connection line */} +
+ {/* Worker node */} +
+ {index + 1} +
+
+ ); + })} +
+ + {/* Pod stats */} +
+

+ {busyCount}/{workers.length} busy +

+
+
+ ); +} + +// Group workers by pod +function groupWorkersByPod(workers: Worker[]): Map { + const pods = new Map(); + for (const worker of workers) { + const podName = worker.pod_name || 'Unknown'; + if (!pods.has(podName)) { + pods.set(podName, []); + } + pods.get(podName)!.push(worker); + } + return pods; +} + export function JobQueue() { - const [jobs, setJobs] = useState([]); - const [stats, setStats] = useState(null); - const [byType, setByType] = useState([]); - const [scheduledWorkers, setScheduledWorkers] = useState([]); + const [workers, setWorkers] = useState([]); + const [tasks, setTasks] = useState([]); + const [counts, setCounts] = useState(null); const [loading, setLoading] = useState(true); const [error, setError] = useState(null); - const [statusFilter, setStatusFilter] = useState('pending'); - const [expandedJob, setExpandedJob] = useState(null); - const [updating, setUpdating] = useState(null); + const [showCreateModal, setShowCreateModal] = useState(false); - const fetchData = useCallback(async () => { + // Pagination + const [taskPage, setTaskPage] = useState(0); + const tasksPerPage = 25; + + // Cleanup stale workers (called once on page load) + const cleanupStaleWorkers = useCallback(async () => { try { - const [jobsRes, statsRes, workersRes] = await Promise.all([ - api.get(`/api/job-queue?status=${statusFilter}&limit=100`), - api.get('/api/job-queue/stats'), - api.get('/api/workers'), + await api.post('/api/worker-registry/cleanup', { stale_threshold_minutes: 2 }); + } catch (err: any) { + console.error('Failed to cleanup stale workers:', err); + } + }, []); + + // Fetch workers + const fetchWorkers = useCallback(async () => { + try { + const workersRes = await api.get('/api/worker-registry/workers'); + setWorkers(workersRes.data.workers || []); + } catch (err: any) { + console.error('Failed to fetch workers:', err); + } + }, []); + + // Fetch tasks and counts (auto-refresh every 15s) + const fetchTasks = useCallback(async () => { + try { + const taskUrl = `/api/tasks?limit=${tasksPerPage}&offset=${taskPage * tasksPerPage}`; + + const [tasksRes, countsRes] = await Promise.all([ + api.get(taskUrl), + api.get('/api/tasks/counts'), ]); - setJobs(jobsRes.data.jobs || []); - setStats(statsRes.data.stats); - setByType(statsRes.data.by_type || []); - - // Map workers and sort by next_run_at - const workers = (workersRes.data.workers || []) - .filter((w: any) => w.enabled) - .map((w: any) => ({ - id: w.id, - worker_name: w.worker_name, - run_role: w.run_role || w.worker_role, - job_name: w.job_name, - description: w.description, - enabled: w.enabled, - next_run_at: w.next_run_at, - last_run_at: w.last_run_at, - last_status: w.last_status, - base_interval_minutes: w.base_interval_minutes, - })) - .sort((a: ScheduledWorker, b: ScheduledWorker) => { - if (!a.next_run_at) return 1; - if (!b.next_run_at) return -1; - return new Date(a.next_run_at).getTime() - new Date(b.next_run_at).getTime(); - }); - setScheduledWorkers(workers); + setTasks(tasksRes.data.tasks || []); + setCounts(countsRes.data); setError(null); } catch (err: any) { - setError(err.message || 'Failed to fetch queue data'); + console.error('Fetch error:', err); + setError(err.message || 'Failed to fetch data'); } finally { setLoading(false); } - }, [statusFilter]); + }, [taskPage]); + // Initial load - cleanup stale workers first, then fetch useEffect(() => { - fetchData(); - const interval = setInterval(fetchData, 5000); + cleanupStaleWorkers().then(() => { + fetchWorkers(); + fetchTasks(); + }); + }, [cleanupStaleWorkers, fetchWorkers, fetchTasks]); + + // Auto-refresh tasks every 15 seconds + useEffect(() => { + const interval = setInterval(fetchTasks, 15000); return () => clearInterval(interval); - }, [fetchData]); + }, [fetchTasks]); - const handlePriorityChange = async (jobId: number, newPriority: number) => { - setUpdating(jobId); - try { - await api.put(`/api/job-queue/${jobId}/priority`, { priority: newPriority }); - fetchData(); - } catch (err: any) { - console.error('Failed to update priority:', err); - } finally { - setUpdating(null); - } - }; + // Refresh workers every 60 seconds + useEffect(() => { + const interval = setInterval(fetchWorkers, 60000); + return () => clearInterval(interval); + }, [fetchWorkers]); - const handleCancel = async (jobId: number) => { - if (!confirm('Cancel this job?')) return; - setUpdating(jobId); - try { - await api.post(`/api/job-queue/${jobId}/cancel`); - fetchData(); - } catch (err: any) { - console.error('Failed to cancel job:', err); - } finally { - setUpdating(null); - } - }; - - const handleRetry = async (jobId: number) => { - setUpdating(jobId); - try { - await api.post(`/api/job-queue/${jobId}/retry`); - fetchData(); - } catch (err: any) { - console.error('Failed to retry job:', err); - } finally { - setUpdating(null); - } - }; - - const handlePauseResume = async () => { - try { - if (stats?.queue_paused) { - await api.post('/api/job-queue/resume'); - } else { - await api.post('/api/job-queue/pause'); - } - fetchData(); - } catch (err: any) { - console.error('Failed to pause/resume:', err); - } - }; - - const handleBumpPriority = async (jobId: number, delta: number) => { - const job = jobs.find(j => j.id === jobId); - if (!job) return; - const newPriority = Math.max(0, Math.min(100, job.priority + delta)); - await handlePriorityChange(jobId, newPriority); - }; + // Get active workers (for display) + const activeWorkers = workers.filter(w => w.status !== 'offline' && w.status !== 'terminated'); + const busyWorkers = workers.filter(w => w.current_task_id !== null); if (loading) { return ( @@ -331,42 +755,27 @@ export function JobQueue() { {/* Header */}
-

Job Queue

+

Task Queue

- Manage crawler job priorities and queue + Workers pull tasks from the pool by priority (auto-refresh every 15s)

-
- - -
+
+ {/* Create Task Modal */} + setShowCreateModal(false)} + onTaskCreated={fetchTasks} + /> + {error && (

{error}

@@ -374,16 +783,27 @@ export function JobQueue() { )} {/* Stats Cards */} - {stats && ( -
+ {counts && ( +
+
+
+
+ +
+
+

Active Workers

+

{activeWorkers.length}

+
+
+
- +
-

Pending

-

{stats.pending}

+

Pending Tasks

+

{counts.pending}

@@ -394,7 +814,7 @@ export function JobQueue() {

Running

-

{stats.running}

+

{counts.running}

@@ -404,8 +824,8 @@ export function JobQueue() {
-

Completed (24h)

-

{stats.completed_24h}

+

Completed

+

{counts.completed}

@@ -415,309 +835,187 @@ export function JobQueue() {
-

Failed (24h)

-

{stats.failed_24h}

-
-
-
-
-
-
- -
-
-

Max Priority

-

{stats.max_priority}

-
-
-
-
-
-
- -
-
-

Est. Wait

-

{formatDuration(stats.estimated_wait_ms)}

+

Failed

+

{counts.failed}

)} - {/* Queue Paused Warning */} - {stats?.queue_paused && ( -
- -

Queue is paused. Jobs will not be processed until resumed.

-
- )} - - {/* Scheduled Worker Tasks */} - {scheduledWorkers.length > 0 && ( -
-
-

Upcoming Scheduled Tasks

-

Workers and their next scheduled runs

-
-
- {scheduledWorkers.map((worker) => { - const nextRunDate = worker.next_run_at ? new Date(worker.next_run_at) : null; - const isOverdue = nextRunDate && nextRunDate.getTime() < Date.now(); - const isImminent = nextRunDate && (nextRunDate.getTime() - Date.now()) < 15 * 60 * 1000; // within 15 min - - return ( -
-
- {/* Worker Avatar */} -
- {worker.worker_name?.charAt(0) || '?'} -
- - {/* Worker Info */} -
-
- {worker.worker_name} - - {worker.run_role} - -
-

- {worker.description || worker.job_name} -

-
-
- - {/* Schedule Info */} -
-
- {formatFutureTime(worker.next_run_at)} -
-
- {formatIntervalHuman(worker.base_interval_minutes)} -
-
-
- ); - })} + {/* Pods & Workers Section */} +
+
+
+
+

+ + Worker Pods ({Array.from(groupWorkersByPod(workers)).length} pods, {activeWorkers.length} workers) +

+

+ idle + | + busy + | + mixed +

+
+
+ {busyWorkers.length} busy, {activeWorkers.length - busyWorkers.length} idle +
- )} - {/* Jobs by Type */} - {byType.length > 0 && ( -
-

Pending by Type

-
- {byType.map((t) => ( - - {t.job_type}: {t.count} - - ))} + {workers.length === 0 ? ( +
+ +

No worker pods running

+

Start pods to process tasks from the queue

-
- )} - - {/* Filter */} -
-
- - Status: -
-
- {['pending', 'running', 'completed', 'failed', 'cancelled', 'all'].map((status) => ( - - ))} -
+ ) : ( +
+
+ {Array.from(groupWorkersByPod(workers)).map(([podName, podWorkers]) => ( + + ))} +
+
+ )}
- {/* Jobs Table */} + {/* Task Pool Section */}
+
+
+
+

+ + Task Pool +

+

+ Tasks waiting to be picked up by workers +

+
+ +
+
+ - - - - - - - + + + + + + + - {jobs.length === 0 ? ( + {tasks.length === 0 ? ( ) : ( - jobs.map((job) => ( - <> - setExpandedJob(expandedJob === job.id ? null : job.id)} - > + tasks.map((task) => { + // Find worker assigned to this task + const assignedWorker = task.claimed_by + ? workers.find(w => w.worker_id === task.claimed_by) + : null; + + return ( + + - - - - + + - {expandedJob === job.id && ( - - - - )} - - )) + ); + }) )}
PriorityDispensaryJob TypeStatusCreatedDurationActionsPriorityRoleDispensaryStatusAssigned ToCreatedDuration
- No jobs found + +

No tasks found

-
- - {job.status === 'pending' && ( -
- - -
- )} -
+
-
-

{job.dispensary_name || `ID: ${job.dispensary_id}`}

- {job.city &&

{job.city}, {job.state}

} -
+ +
+ {task.dispensary_slug ? ( + + {task.dispensary_slug.length > 25 + ? task.dispensary_slug.slice(0, 25) + '…' + : task.dispensary_slug} + + ) : task.dispensary_name ? ( + + {task.dispensary_name.length > 25 + ? task.dispensary_name.slice(0, 25) + '…' + : task.dispensary_name} + + ) : task.dispensary_id ? ( + ID: {task.dispensary_id} + ) : ( + - + )} {job.job_type} - + {formatRelativeTime(job.created_at)}{formatDuration(job.duration_ms)} -
- {job.status === 'pending' && ( - <> - - - - )} - {(job.status === 'failed' || job.status === 'cancelled') && ( - - )} -
+
+ {assignedWorker ? ( + + + {assignedWorker.friendly_name} + + ) : task.claimed_by ? ( + {task.claimed_by.slice(0, 12)}... + ) : ( + Unassigned + )} + + {formatRelativeTime(task.created_at)} + + {task.status === 'running' ? ( + + ) : task.started_at ? ( + formatDuration(task.started_at, task.completed_at) + ) : ( + '-' + )}
-
-
-

Job ID

-

{job.id}

-
-
-

Trigger

-

{job.trigger_type}

-
-
-

Retries

-

{job.retry_count} / {job.max_retries}

-
-
-

Worker

-

{job.worker_id || '-'}

-
- {job.products_found !== null && ( -
-

Products Found

-

{job.products_found}

-
- )} - {job.error_message && ( -
-

Error

-

{job.error_message}

-
- )} - {job.status === 'pending' && ( -
- -
- handlePriorityChange(job.id, parseInt(e.target.value))} - className="flex-1" - /> - handlePriorityChange(job.id, parseInt(e.target.value) || 0)} - className="w-16 px-2 py-1 border rounded text-sm" - /> -
-
- )} -
-
+ + {/* Pagination */} +
+
+ Showing {taskPage * tasksPerPage + 1} - {Math.min((taskPage + 1) * tasksPerPage, taskPage * tasksPerPage + tasks.length)} tasks +
+
+ + Page {taskPage + 1} + +
+
diff --git a/cannaiq/src/pages/TasksDashboard.tsx b/cannaiq/src/pages/TasksDashboard.tsx index e4c6e0e3..a474aa3d 100644 --- a/cannaiq/src/pages/TasksDashboard.tsx +++ b/cannaiq/src/pages/TasksDashboard.tsx @@ -69,7 +69,7 @@ const ROLES = [ 'store_discovery', 'entry_point_discovery', 'product_discovery', - 'product_resync', + 'product_refresh', 'analytics_refresh', ]; diff --git a/cannaiq/src/pages/WorkersDashboard.tsx b/cannaiq/src/pages/WorkersDashboard.tsx index 61739986..253ff631 100644 --- a/cannaiq/src/pages/WorkersDashboard.tsx +++ b/cannaiq/src/pages/WorkersDashboard.tsx @@ -1,378 +1,238 @@ import { useState, useEffect, useCallback } from 'react'; import { Layout } from '../components/Layout'; -import { WorkerRoleBadge, formatScope } from '../components/WorkerRoleBadge'; import { api } from '../lib/api'; import { Users, - Play, - Clock, + RefreshCw, + ChevronLeft, + ChevronRight, + Activity, CheckCircle, XCircle, - AlertTriangle, - RefreshCw, - ChevronDown, - ChevronUp, - Activity, - Plus, - Settings, - X, - Save, + Clock, + Zap, + Timer, + Cpu, + Heart, + Gauge, } from 'lucide-react'; -interface Schedule { +// Worker from registry +interface Worker { id: number; - job_name: string; - description: string; - worker_name: string; - worker_role: string; - enabled: boolean; - base_interval_minutes: number; - jitter_minutes: number; - next_run_at: string | null; - last_run_at: string | null; - last_status: string | null; - job_config: any; -} - -interface RunLog { - id: number; - schedule_id: number; - job_name: string; - status: string; - started_at: string; - completed_at: string | null; - items_processed: number; - items_succeeded: number; - items_failed: number; - error_message: string | null; - metadata: any; - worker_name: string; - run_role: string; - duration_seconds?: number; -} - -interface MonitorSummary { - running_scheduled_jobs: number; - running_dispensary_crawl_jobs: number; - successful_jobs_24h: number; - failed_jobs_24h: number; - successful_crawls_24h: number; - failed_crawls_24h: number; - products_found_24h: number; - snapshots_created_24h: number; - last_job_started: string | null; - last_job_completed: string | null; - nextRuns: Schedule[]; -} - -interface WorkerRole { - id: string; - name: string; - description: string; -} - -interface StateOption { - state_code: string; - state_name: string; - dispensary_count: number; -} - -interface DispensaryOption { - id: number; - name: string; - city: string; - state_code: string; -} - -interface ChainOption { - id: number; - name: string; - dispensary_count: number; -} - -interface NewWorkerForm { - name: string; + worker_id: string; + friendly_name: string; role: string; - description: string; - interval_minutes: number; - jitter_minutes: number; - assignment_type: string; - assigned_dispensary_ids: number[]; - assigned_state_codes: string[]; - assigned_chain_ids: number[]; + status: string; + pod_name: string | null; + hostname: string | null; + started_at: string; + last_heartbeat_at: string; + last_task_at: string | null; + tasks_completed: number; + tasks_failed: number; + current_task_id: number | null; + health_status: string; + seconds_since_heartbeat: number; } -function formatDuration(seconds: number | null | undefined): string { - if (!seconds) return '-'; - if (seconds < 60) return `${Math.round(seconds)}s`; - if (seconds < 3600) return `${Math.round(seconds / 60)}m`; - return `${Math.round(seconds / 3600)}h ${Math.round((seconds % 3600) / 60)}m`; +// Current task info +interface Task { + id: number; + role: string; + dispensary_id: number | null; + dispensary_name?: string; + status: string; + priority: number; + started_at: string | null; + claimed_by: string | null; + worker_id: string | null; } -function formatRelativeTime(dateStr: string | null | undefined): string { +function formatRelativeTime(dateStr: string | null): string { if (!dateStr) return '-'; const date = new Date(dateStr); const now = new Date(); const diffMs = now.getTime() - date.getTime(); + const diffSecs = Math.round(diffMs / 1000); const diffMins = Math.round(diffMs / 60000); - if (diffMins < 0) { - const futureMins = Math.abs(diffMins); - if (futureMins < 60) return `in ${futureMins}m`; - if (futureMins < 1440) return `in ${Math.round(futureMins / 60)}h`; - return `in ${Math.round(futureMins / 1440)}d`; - } - - if (diffMins < 1) return 'just now'; + if (diffSecs < 60) return `${diffSecs}s ago`; if (diffMins < 60) return `${diffMins}m ago`; if (diffMins < 1440) return `${Math.round(diffMins / 60)}h ago`; return `${Math.round(diffMins / 1440)}d ago`; } -function StatusBadge({ status }: { status: string | null }) { - if (!status) return -; +function formatDuration(startStr: string | null): string { + if (!startStr) return '-'; + const start = new Date(startStr); + const now = new Date(); + const diffMs = now.getTime() - start.getTime(); - const config: Record = { - success: { bg: 'bg-green-100', text: 'text-green-700', icon: CheckCircle }, - running: { bg: 'bg-blue-100', text: 'text-blue-700', icon: Activity }, - pending: { bg: 'bg-yellow-100', text: 'text-yellow-700', icon: Clock }, - error: { bg: 'bg-red-100', text: 'text-red-700', icon: XCircle }, - partial: { bg: 'bg-orange-100', text: 'text-orange-700', icon: AlertTriangle }, - }; + if (diffMs < 1000) return `${diffMs}ms`; + if (diffMs < 60000) return `${(diffMs / 1000).toFixed(0)}s`; + const mins = Math.floor(diffMs / 60000); + const secs = Math.floor((diffMs % 60000) / 1000); + if (mins < 60) return `${mins}m ${secs}s`; + const hrs = Math.floor(mins / 60); + return `${hrs}h ${mins % 60}m`; +} - const cfg = config[status] || { bg: 'bg-gray-100', text: 'text-gray-700', icon: Clock }; - const Icon = cfg.icon; +function formatUptime(startStr: string | null): string { + if (!startStr) return '-'; + const start = new Date(startStr); + const now = new Date(); + const diffMs = now.getTime() - start.getTime(); + const diffMins = Math.floor(diffMs / 60000); + + if (diffMins < 60) return `${diffMins}m`; + const hrs = Math.floor(diffMins / 60); + if (hrs < 24) return `${hrs}h ${diffMins % 60}m`; + const days = Math.floor(hrs / 24); + return `${days}d ${hrs % 24}h`; +} + +// Calculate utilization: tasks per hour +function calculateUtilization(tasksCompleted: number, tasksFailed: number, startedAt: string | null): { rate: string; color: string } { + if (!startedAt) return { rate: '-', color: 'text-gray-400' }; + + const start = new Date(startedAt); + const now = new Date(); + const hoursUp = (now.getTime() - start.getTime()) / (1000 * 60 * 60); + + if (hoursUp < 0.01) return { rate: '-', color: 'text-gray-400' }; // Too short to measure + + const totalTasks = tasksCompleted + tasksFailed; + const tasksPerHour = totalTasks / hoursUp; + + // Color based on utilization level + let color = 'text-gray-500'; + if (tasksPerHour >= 10) color = 'text-emerald-600'; + else if (tasksPerHour >= 5) color = 'text-blue-600'; + else if (tasksPerHour >= 1) color = 'text-yellow-600'; + else if (tasksPerHour > 0) color = 'text-orange-600'; + + if (tasksPerHour >= 1) { + return { rate: `${tasksPerHour.toFixed(1)}/hr`, color }; + } + // If less than 1/hr, show per day + const tasksPerDay = tasksPerHour * 24; + return { rate: `${tasksPerDay.toFixed(1)}/day`, color }; +} + +// Live timer component +function LiveTimer({ startedAt }: { startedAt: string | null }) { + const [, setTick] = useState(0); + + useEffect(() => { + if (!startedAt) return; + const interval = setInterval(() => setTick(t => t + 1), 1000); + return () => clearInterval(interval); + }, [startedAt]); + + if (!startedAt) return -; return ( - + + + {formatDuration(startedAt)} + + ); +} + +function RoleBadge({ role }: { role: string }) { + const colors: Record = { + product_refresh: 'bg-emerald-100 text-emerald-700', + product_discovery: 'bg-blue-100 text-blue-700', + store_discovery: 'bg-purple-100 text-purple-700', + entry_point_discovery: 'bg-orange-100 text-orange-700', + analytics_refresh: 'bg-pink-100 text-pink-700', + }; + + return ( + + {role.replace(/_/g, ' ')} + + ); +} + +function HealthBadge({ status, healthStatus }: { status: string; healthStatus: string }) { + const getConfig = () => { + if (healthStatus === 'offline' || status === 'offline') { + return { bg: 'bg-gray-100', text: 'text-gray-600', label: 'offline', icon: XCircle }; + } + if (healthStatus === 'stale') { + return { bg: 'bg-yellow-100', text: 'text-yellow-700', label: 'stale', icon: Clock }; + } + if (healthStatus === 'busy' || status === 'active') { + return { bg: 'bg-blue-100', text: 'text-blue-700', label: 'busy', icon: Activity }; + } + return { bg: 'bg-green-100', text: 'text-green-700', label: 'ready', icon: CheckCircle }; + }; + + const config = getConfig(); + const Icon = config.icon; + + return ( + - {status} + {config.label} ); } export function WorkersDashboard() { - const [schedules, setSchedules] = useState([]); - const [selectedWorker, setSelectedWorker] = useState(null); - const [workerLogs, setWorkerLogs] = useState([]); - const [summary, setSummary] = useState(null); + const [workers, setWorkers] = useState([]); + const [tasks, setTasks] = useState([]); const [loading, setLoading] = useState(true); - const [logsLoading, setLogsLoading] = useState(false); const [error, setError] = useState(null); - const [triggering, setTriggering] = useState(null); - const [showCreateModal, setShowCreateModal] = useState(false); - const [showEditModal, setShowEditModal] = useState(false); - const [availableRoles, setAvailableRoles] = useState([]); - const [availableStates, setAvailableStates] = useState([]); - const [availableDispensaries, setAvailableDispensaries] = useState([]); - const [availableChains, setAvailableChains] = useState([]); - const [dispensarySearch, setDispensarySearch] = useState(''); - const [saving, setSaving] = useState(false); - const [newWorker, setNewWorker] = useState({ - name: '', - role: 'product_sync', - description: '', - interval_minutes: 240, - jitter_minutes: 30, - assignment_type: 'all', - assigned_dispensary_ids: [], - assigned_state_codes: [], - assigned_chain_ids: [], - }); - const fetchRoles = useCallback(async () => { - try { - const res = await api.get('/api/workers/roles'); - setAvailableRoles(res.data.roles || []); - } catch (err) { - console.error('Failed to fetch roles:', err); - } - }, []); - - const fetchStates = useCallback(async () => { - try { - const res = await api.get('/api/workers/states'); - setAvailableStates(res.data.states || []); - } catch (err) { - console.error('Failed to fetch states:', err); - } - }, []); - - const fetchChains = useCallback(async () => { - try { - const res = await api.get('/api/workers/chains'); - setAvailableChains(res.data.chains || []); - } catch (err) { - console.error('Failed to fetch chains:', err); - } - }, []); - - const fetchDispensaries = useCallback(async (search: string = '') => { - try { - const res = await api.get(`/api/workers/dispensaries?search=${encodeURIComponent(search)}&limit=50`); - setAvailableDispensaries(res.data.dispensaries || []); - } catch (err) { - console.error('Failed to fetch dispensaries:', err); - } - }, []); + // Pagination + const [page, setPage] = useState(0); + const workersPerPage = 15; const fetchData = useCallback(async () => { try { - // Use the workers endpoint - const workersRes = await api.get('/api/workers'); + // Fetch workers from registry + const workersRes = await api.get('/api/worker-registry/workers'); - // Map workers API response format to component's expected format - const workersList = workersRes.data.workers || []; - setSchedules(workersList.map((w: any) => ({ - id: w.id, - job_name: w.job_name || w.worker_name, - description: w.description, - worker_name: w.worker_name, - worker_role: w.run_role || w.worker_role, - enabled: w.enabled, - base_interval_minutes: w.base_interval_minutes, - jitter_minutes: w.jitter_minutes, - next_run_at: w.next_run_at, - last_run_at: w.last_run_at, - last_status: w.last_status, - job_config: { scope: w.scope }, - }))); + // Fetch running tasks to get current task details + const tasksRes = await api.get('/api/tasks?status=running&limit=100'); - // Calculate summary from workers data - const successSchedules = workersList.filter((w: any) => w.last_status === 'success'); - const failedSchedules = workersList.filter((w: any) => w.last_status === 'error'); - const runningSchedules = workersList.filter((w: any) => w.last_status === 'running'); - - setSummary({ - running_scheduled_jobs: runningSchedules.length, - running_dispensary_crawl_jobs: 0, - successful_jobs_24h: successSchedules.length, - failed_jobs_24h: failedSchedules.length, - successful_crawls_24h: successSchedules.length, - failed_crawls_24h: failedSchedules.length, - products_found_24h: 0, - snapshots_created_24h: 0, - last_job_started: null, - last_job_completed: null, - nextRuns: [], - }); + setWorkers(workersRes.data.workers || []); + setTasks(tasksRes.data.tasks || []); setError(null); } catch (err: any) { + console.error('Fetch error:', err); setError(err.message || 'Failed to fetch data'); } finally { setLoading(false); } }, []); - const handleCreateWorker = async () => { - if (!newWorker.name || !newWorker.role) { - setError('Name and role are required'); - return; - } - - setSaving(true); - try { - await api.post('/api/workers/definitions', { - name: newWorker.name, - role: newWorker.role, - description: newWorker.description, - interval_minutes: newWorker.interval_minutes, - jitter_minutes: newWorker.jitter_minutes, - assignment_type: newWorker.assignment_type, - assigned_dispensary_ids: newWorker.assignment_type === 'dispensary' ? newWorker.assigned_dispensary_ids : [], - assigned_state_codes: newWorker.assignment_type === 'state' ? newWorker.assigned_state_codes : [], - assigned_chain_ids: newWorker.assignment_type === 'chain' ? newWorker.assigned_chain_ids : [], - }); - setShowCreateModal(false); - setNewWorker({ - name: '', - role: 'product_sync', - description: '', - interval_minutes: 240, - jitter_minutes: 30, - assignment_type: 'all', - assigned_dispensary_ids: [], - assigned_state_codes: [], - assigned_chain_ids: [], - }); - setDispensarySearch(''); - fetchData(); - } catch (err: any) { - setError(err.response?.data?.error || err.message || 'Failed to create worker'); - } finally { - setSaving(false); - } - }; - - const handleUpdateSchedule = async (scheduleId: number, updates: Partial) => { - try { - await api.put(`/api/workers/${scheduleId}/schedule`, updates); - fetchData(); - } catch (err: any) { - setError(err.response?.data?.error || err.message || 'Failed to update schedule'); - } - }; - - const fetchWorkerLogs = useCallback(async (scheduleId: number) => { - setLogsLoading(true); - try { - const res = await api.get(`/api/workers/${scheduleId}/logs?limit=20`); - setWorkerLogs(res.data.logs || []); - } catch (err: any) { - console.error('Failed to fetch worker logs:', err); - setWorkerLogs([]); - } finally { - setLogsLoading(false); - } - }, []); - useEffect(() => { fetchData(); - fetchRoles(); - fetchStates(); - fetchChains(); - fetchDispensaries(); - const interval = setInterval(fetchData, 5000); // Refresh every 5 seconds + const interval = setInterval(fetchData, 5000); return () => clearInterval(interval); - }, [fetchData, fetchRoles, fetchStates, fetchChains, fetchDispensaries]); + }, [fetchData]); - // Debounce dispensary search - useEffect(() => { - const timer = setTimeout(() => { - if (dispensarySearch) { - fetchDispensaries(dispensarySearch); - } - }, 300); - return () => clearTimeout(timer); - }, [dispensarySearch, fetchDispensaries]); + // Paginated workers + const paginatedWorkers = workers.slice( + page * workersPerPage, + (page + 1) * workersPerPage + ); + const totalPages = Math.ceil(workers.length / workersPerPage); - useEffect(() => { - if (selectedWorker) { - fetchWorkerLogs(selectedWorker.id); - } else { - setWorkerLogs([]); - } - }, [selectedWorker, fetchWorkerLogs]); + // Stats + const activeWorkers = workers.filter(w => w.status !== 'offline' && w.status !== 'terminated'); + const busyWorkers = workers.filter(w => w.current_task_id !== null); + const idleWorkers = activeWorkers.filter(w => w.current_task_id === null); + const totalCompleted = workers.reduce((sum, w) => sum + w.tasks_completed, 0); + const totalFailed = workers.reduce((sum, w) => sum + w.tasks_failed, 0); - const handleSelectWorker = (schedule: Schedule) => { - if (selectedWorker?.id === schedule.id) { - setSelectedWorker(null); - } else { - setSelectedWorker(schedule); - } - }; - - const handleTrigger = async (scheduleId: number) => { - setTriggering(scheduleId); - try { - await api.post(`/api/workers/${scheduleId}/trigger`); - // Refresh data after trigger - setTimeout(fetchData, 1000); - } catch (err: any) { - console.error('Failed to trigger worker:', err); - } finally { - setTriggering(null); - } + // Get task info for a worker + const getWorkerTask = (workerId: string): Task | undefined => { + return tasks.find(t => t.claimed_by === workerId); }; if (loading) { @@ -391,27 +251,18 @@ export function WorkersDashboard() { {/* Header */}
-

Crawler Workers

+

Workers

- Named workforce dashboard - Alice, Henry, Bella, Oscar + {workers.length} registered workers ({busyWorkers.length} busy, {idleWorkers.length} idle)

-
- - -
+
{error && ( @@ -420,565 +271,223 @@ export function WorkersDashboard() {
)} - {/* Summary Cards */} - {summary && ( -
-
-
-
- -
-
-

Running Jobs

-

- {summary.running_scheduled_jobs + summary.running_dispensary_crawl_jobs} -

-
+ {/* Stats Cards */} +
+
+
+
+
-
-
-
-
- -
-
-

Successful (24h)

-

{summary.successful_jobs_24h}

-
-
-
-
-
-
- -
-
-

Failed (24h)

-

{summary.failed_jobs_24h}

-
-
-
-
-
-
- -
-
-

Active Workers

-

{schedules.filter(s => s.enabled).length}

-
+
+

Total Workers

+

{workers.length}

- )} +
+
+
+ +
+
+

Busy

+

{busyWorkers.length}

+
+
+
+
+
+
+ +
+
+

Idle

+

{idleWorkers.length}

+
+
+
+
+
+
+ +
+
+

Completed

+

{totalCompleted}

+
+
+
+
+
+
+ +
+
+

Failed

+

{totalFailed}

+
+
+
+
{/* Workers Table */}
-
-

Workers

-
- - - - - - - - - - - - - - {schedules.map((schedule) => ( - handleSelectWorker(schedule)} +
+

+ Registered Workers +

+ {totalPages > 1 && ( +
+
- - - - - - + + + + {page + 1} / {totalPages} + + + + )} + + + {workers.length === 0 ? ( +
+ +

No workers registered

+

Workers will appear here when they start up and register

+
+ ) : ( +
- Worker - - Role - - Scope - - Last Run - - Next Run - - Status - - Actions -
-
- - - {schedule.worker_name || schedule.job_name} - - {selectedWorker?.id === schedule.id ? ( - - ) : ( - - )} -
- {schedule.description && ( -

{schedule.description}

- )} -
- - - {formatScope(schedule.job_config)} - - {formatRelativeTime(schedule.last_run_at)} - - {schedule.enabled ? formatRelativeTime(schedule.next_run_at) : 'disabled'} - - - - -
+ + + + + + + + + + - ))} - -
WorkerRoleStatusCurrent TaskTask DurationUtilizationHeartbeatUptime
-
+ + + {paginatedWorkers.map((worker) => { + const currentTask = worker.current_task_id + ? tasks.find(t => t.id === worker.current_task_id) + : undefined; - {/* Worker Detail Pane */} - {selectedWorker && ( -
-
-
-
-

- {selectedWorker.worker_name || selectedWorker.job_name} -

- -
-
- Scope: {formatScope(selectedWorker.job_config)} -
-
- {selectedWorker.description && ( -

{selectedWorker.description}

- )} -
- - {/* Run History */} -
-

Recent Run History

- {logsLoading ? ( -
- -
- ) : workerLogs.length === 0 ? ( -

No run history available

- ) : ( - - - - - - - - - - - - - {workerLogs.map((log) => { - const duration = log.completed_at - ? (new Date(log.completed_at).getTime() - - new Date(log.started_at).getTime()) / - 1000 - : null; - const visLost = log.metadata?.visibilityLostCount; - const visRestored = log.metadata?.visibilityRestoredCount; - - return ( - - - - - + + + + - + + - - - ); - })} - -
- Started - - Duration - - Status - - Processed - - Visibility Stats - - Error -
- {formatRelativeTime(log.started_at)} - - {formatDuration(duration)} - - - - {log.items_succeeded} - / - {log.items_processed} - {log.items_failed > 0 && ( - <> - ( - {log.items_failed} failed - ) - + return ( +
+
+
+ {worker.friendly_name?.charAt(0) || '?'} +
+
+

{worker.friendly_name}

+

{worker.worker_id.slice(0, 20)}...

+
+
+
+ + + + + {worker.current_task_id ? ( +
+ Task #{worker.current_task_id} + {currentTask?.dispensary_name && ( +

{currentTask.dispensary_name}

)} -
- {visLost !== undefined || visRestored !== undefined ? ( - - {visLost !== undefined && visLost > 0 && ( - - -{visLost} lost - - )} - {visRestored !== undefined && visRestored > 0 && ( - - +{visRestored} restored - - )} - {visLost === 0 && visRestored === 0 && ( - no changes - )} + + ) : ( + Idle + )} + + {currentTask?.started_at ? ( + + ) : ( + - + )} + + {(() => { + const util = calculateUtilization(worker.tasks_completed, worker.tasks_failed, worker.started_at); + return ( +
+ + {util.rate} + + ({worker.tasks_completed}✓ {worker.tasks_failed > 0 ? `${worker.tasks_failed}✗` : ''}) - ) : ( - - - )} -
- {log.error_message || '-'} -
+
+ ); + })()} + + +
+ + {formatRelativeTime(worker.last_heartbeat_at)} +
+ + + {formatUptime(worker.started_at)} + + + ); + })} + + + )} + + {/* Pagination Footer */} + {workers.length > 0 && ( +
+
+ Showing {page * workersPerPage + 1} - {Math.min((page + 1) * workersPerPage, workers.length)} of {workers.length} workers +
+ {totalPages > 1 && ( +
+ {Array.from({ length: totalPages }, (_, i) => ( + + ))} +
)}
-
- )} - - {/* Create Worker Modal */} - {showCreateModal && ( -
-
-
-

Create New Worker

- -
-
- {/* Name */} -
- - setNewWorker({ ...newWorker, name: e.target.value })} - placeholder="e.g., Charlie, Eve, Frank" - className="w-full px-3 py-2 border border-gray-300 rounded-lg focus:ring-2 focus:ring-emerald-500 focus:border-emerald-500" - /> -

Give your worker a friendly name

-
- - {/* Role */} -
- - -
- - {/* Description */} -
- -