Compare commits

..

1 Commits

Author SHA1 Message Date
Kelly
cdab44c757 fix(ui): Show git hash instead of version number in sidebar
🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-11 08:09:06 -07:00
96 changed files with 3306 additions and 6805 deletions

View File

@@ -1,194 +0,0 @@
steps:
# ===========================================
# PR VALIDATION: Parallel type checks (PRs only)
# ===========================================
typecheck-backend:
image: code.cannabrands.app/creationshop/node:20
commands:
- cd backend
- npm ci --prefer-offline
- npx tsc --noEmit
depends_on: []
when:
event: pull_request
typecheck-cannaiq:
image: code.cannabrands.app/creationshop/node:20
commands:
- cd cannaiq
- npm ci --prefer-offline
- npx tsc --noEmit
depends_on: []
when:
event: pull_request
typecheck-findadispo:
image: code.cannabrands.app/creationshop/node:20
commands:
- cd findadispo/frontend
- npm ci --prefer-offline
- npx tsc --noEmit 2>/dev/null || true
depends_on: []
when:
event: pull_request
typecheck-findagram:
image: code.cannabrands.app/creationshop/node:20
commands:
- cd findagram/frontend
- npm ci --prefer-offline
- npx tsc --noEmit 2>/dev/null || true
depends_on: []
when:
event: pull_request
# ===========================================
# AUTO-MERGE: Merge PR after all checks pass
# ===========================================
auto-merge:
image: alpine:latest
environment:
GITEA_TOKEN:
from_secret: gitea_token
commands:
- apk add --no-cache curl
- |
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
curl -s -X POST \
-H "Authorization: token $GITEA_TOKEN" \
-H "Content-Type: application/json" \
-d '{"Do":"merge"}' \
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
depends_on:
- typecheck-backend
- typecheck-cannaiq
- typecheck-findadispo
- typecheck-findagram
when:
event: pull_request
# ===========================================
# MASTER DEPLOY: Parallel Docker builds
# NOTE: cache_from/cache_to removed due to plugin bug splitting on commas
# ===========================================
docker-backend:
image: woodpeckerci/plugin-docker-buildx
settings:
registry: code.cannabrands.app
repo: code.cannabrands.app/creationshop/dispensary-scraper
tags:
- latest
- ${CI_COMMIT_SHA:0:8}
dockerfile: backend/Dockerfile
context: backend
username:
from_secret: registry_username
password:
from_secret: registry_password
platforms: linux/amd64
provenance: false
build_args:
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
APP_GIT_SHA: ${CI_COMMIT_SHA}
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
depends_on: []
when:
branch: master
event: push
docker-cannaiq:
image: woodpeckerci/plugin-docker-buildx
settings:
registry: code.cannabrands.app
repo: code.cannabrands.app/creationshop/cannaiq-frontend
tags:
- latest
- ${CI_COMMIT_SHA:0:8}
dockerfile: cannaiq/Dockerfile
context: cannaiq
username:
from_secret: registry_username
password:
from_secret: registry_password
platforms: linux/amd64
provenance: false
depends_on: []
when:
branch: master
event: push
docker-findadispo:
image: woodpeckerci/plugin-docker-buildx
settings:
registry: code.cannabrands.app
repo: code.cannabrands.app/creationshop/findadispo-frontend
tags:
- latest
- ${CI_COMMIT_SHA:0:8}
dockerfile: findadispo/frontend/Dockerfile
context: findadispo/frontend
username:
from_secret: registry_username
password:
from_secret: registry_password
platforms: linux/amd64
provenance: false
depends_on: []
when:
branch: master
event: push
docker-findagram:
image: woodpeckerci/plugin-docker-buildx
settings:
registry: code.cannabrands.app
repo: code.cannabrands.app/creationshop/findagram-frontend
tags:
- latest
- ${CI_COMMIT_SHA:0:8}
dockerfile: findagram/frontend/Dockerfile
context: findagram/frontend
username:
from_secret: registry_username
password:
from_secret: registry_password
platforms: linux/amd64
provenance: false
depends_on: []
when:
branch: master
event: push
# ===========================================
# STAGE 3: Deploy and Run Migrations
# ===========================================
deploy:
image: bitnami/kubectl:latest
environment:
KUBECONFIG_CONTENT:
from_secret: kubeconfig_data
commands:
- mkdir -p ~/.kube
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
- chmod 600 ~/.kube/config
# Deploy backend first
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
# Note: Migrations run automatically at startup via auto-migrate
# Deploy remaining services
# Resilience: ensure workers are scaled up if at 0
- REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
depends_on:
- docker-backend
- docker-cannaiq
- docker-findadispo
- docker-findagram
when:
branch: master
event: push

View File

@@ -1,38 +1,46 @@
steps:
# ===========================================
# PR VALIDATION: Only typecheck changed projects
# PR VALIDATION: Parallel type checks (PRs only)
# ===========================================
typecheck-backend:
image: code.cannabrands.app/creationshop/node:20
commands:
- npm config set cache /npm-cache/backend --global
- cd backend
- npm ci --prefer-offline
- npx tsc --noEmit
volumes:
- npm-cache:/npm-cache
depends_on: []
when:
event: pull_request
path:
include: ['backend/**']
typecheck-cannaiq:
image: code.cannabrands.app/creationshop/node:20
commands:
- npm config set cache /npm-cache/cannaiq --global
- cd cannaiq
- npm ci --prefer-offline
- npx tsc --noEmit
volumes:
- npm-cache:/npm-cache
depends_on: []
when:
event: pull_request
path:
include: ['cannaiq/**']
# findadispo/findagram typechecks skipped - they have || true anyway
typecheck-findadispo:
image: code.cannabrands.app/creationshop/node:20
commands:
- cd findadispo/frontend
- npm ci --prefer-offline
- npx tsc --noEmit 2>/dev/null || true
depends_on: []
when:
event: pull_request
typecheck-findagram:
image: code.cannabrands.app/creationshop/node:20
commands:
- cd findagram/frontend
- npm ci --prefer-offline
- npx tsc --noEmit 2>/dev/null || true
depends_on: []
when:
event: pull_request
# ===========================================
# AUTO-MERGE: Merge PR after all checks pass
@@ -54,6 +62,8 @@ steps:
depends_on:
- typecheck-backend
- typecheck-cannaiq
- typecheck-findadispo
- typecheck-findagram
when:
event: pull_request
@@ -76,8 +86,6 @@ steps:
from_secret: registry_password
platforms: linux/amd64
provenance: false
cache_from: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache
cache_to: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache,mode=max
build_args:
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
APP_GIT_SHA: ${CI_COMMIT_SHA}
@@ -104,8 +112,6 @@ steps:
from_secret: registry_password
platforms: linux/amd64
provenance: false
cache_from: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache
cache_to: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache,mode=max
depends_on: []
when:
branch: master
@@ -127,8 +133,6 @@ steps:
from_secret: registry_password
platforms: linux/amd64
provenance: false
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache,mode=max
depends_on: []
when:
branch: master
@@ -150,15 +154,38 @@ steps:
from_secret: registry_password
platforms: linux/amd64
provenance: false
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache,mode=max
depends_on: []
when:
branch: master
event: push
# ===========================================
# STAGE 3: Deploy and Run Migrations
# STAGE 3: Run Database Migrations (before deploy)
# ===========================================
migrate:
image: code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8}
environment:
CANNAIQ_DB_HOST:
from_secret: db_host
CANNAIQ_DB_PORT:
from_secret: db_port
CANNAIQ_DB_NAME:
from_secret: db_name
CANNAIQ_DB_USER:
from_secret: db_user
CANNAIQ_DB_PASS:
from_secret: db_pass
commands:
- cd /app
- node dist/db/migrate.js
depends_on:
- docker-backend
when:
branch: master
event: push
# ===========================================
# STAGE 4: Deploy (after migrations)
# ===========================================
deploy:
image: bitnami/kubectl:latest
@@ -169,20 +196,15 @@ steps:
- mkdir -p ~/.kube
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
- chmod 600 ~/.kube/config
# Deploy backend first
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
# Note: Migrations run automatically at startup via auto-migrate
# Deploy remaining services
# Resilience: ensure workers are scaled up if at 0
- REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
depends_on:
- docker-backend
- migrate
- docker-cannaiq
- docker-findadispo
- docker-findagram

1444
CLAUDE.md

File diff suppressed because it is too large Load Diff

View File

@@ -1,218 +0,0 @@
# CannaiQ Backend Codebase Map
**Last Updated:** 2025-12-12
**Purpose:** Help Claude and developers understand which code is current vs deprecated
---
## Quick Reference: What to Use
### For Crawling/Scraping
| Task | Use This | NOT This |
|------|----------|----------|
| Fetch products | `src/tasks/handlers/payload-fetch.ts` | `src/hydration/*` |
| Process products | `src/tasks/handlers/product-refresh.ts` | `src/scraper-v2/*` |
| GraphQL client | `src/platforms/dutchie/client.ts` | `src/dutchie-az/services/graphql-client.ts` |
| Worker system | `src/tasks/task-worker.ts` | `src/dutchie-az/services/worker.ts` |
### For Database
| Task | Use This | NOT This |
|------|----------|----------|
| Get DB pool | `src/db/pool.ts` | `src/dutchie-az/db/connection.ts` |
| Run migrations | `src/db/migrate.ts` (CLI only) | Never import at runtime |
| Query products | `store_products` table | `products`, `dutchie_products` |
| Query stores | `dispensaries` table | `stores` table |
### For Discovery
| Task | Use This |
|------|----------|
| Discover stores | `src/discovery/*.ts` |
| Run discovery | `npx tsx src/scripts/run-discovery.ts` |
---
## Directory Status
### ACTIVE DIRECTORIES (Use These)
```
src/
├── auth/ # JWT/session auth, middleware
├── db/ # Database pool, migrations
├── discovery/ # Dutchie store discovery pipeline
├── middleware/ # Express middleware
├── multi-state/ # Multi-state query support
├── platforms/ # Platform-specific clients (Dutchie, Jane, etc)
│ └── dutchie/ # THE Dutchie client - use this one
├── routes/ # Express API routes
├── services/ # Core services (logger, scheduler, etc)
├── tasks/ # Task system (workers, handlers, scheduler)
│ └── handlers/ # Task handlers (payload_fetch, product_refresh, etc)
├── types/ # TypeScript types
└── utils/ # Utilities (storage, image processing)
```
### DEPRECATED DIRECTORIES (DO NOT USE)
```
src/
├── hydration/ # DEPRECATED - Old pipeline approach
├── scraper-v2/ # DEPRECATED - Old scraper engine
├── canonical-hydration/# DEPRECATED - Merged into tasks/handlers
├── dutchie-az/ # PARTIAL - Some parts deprecated, some active
│ ├── db/ # DEPRECATED - Use src/db/pool.ts
│ └── services/ # PARTIAL - worker.ts still runs, graphql-client.ts deprecated
├── portals/ # FUTURE - Not yet implemented
├── seo/ # PARTIAL - Settings work, templates WIP
└── system/ # DEPRECATED - Old orchestration system
```
### DEPRECATED FILES (DO NOT USE)
```
src/dutchie-az/db/connection.ts # Use src/db/pool.ts instead
src/dutchie-az/services/graphql-client.ts # Use src/platforms/dutchie/client.ts
src/hydration/*.ts # Entire directory deprecated
src/scraper-v2/*.ts # Entire directory deprecated
```
---
## Key Files Reference
### Entry Points
| File | Purpose | Status |
|------|---------|--------|
| `src/index.ts` | Main Express server | ACTIVE |
| `src/dutchie-az/services/worker.ts` | Worker process entry | ACTIVE |
| `src/tasks/task-worker.ts` | Task worker (new system) | ACTIVE |
### Dutchie Integration
| File | Purpose | Status |
|------|---------|--------|
| `src/platforms/dutchie/client.ts` | GraphQL client, hashes, curl | **PRIMARY** |
| `src/platforms/dutchie/queries.ts` | High-level query functions | ACTIVE |
| `src/platforms/dutchie/index.ts` | Re-exports | ACTIVE |
### Task Handlers
| File | Purpose | Status |
|------|---------|--------|
| `src/tasks/handlers/payload-fetch.ts` | Fetch products from Dutchie | **PRIMARY** |
| `src/tasks/handlers/product-refresh.ts` | Process payload into DB | **PRIMARY** |
| `src/tasks/handlers/menu-detection.ts` | Detect menu type | ACTIVE |
| `src/tasks/handlers/id-resolution.ts` | Resolve platform IDs | ACTIVE |
| `src/tasks/handlers/image-download.ts` | Download product images | ACTIVE |
### Database
| File | Purpose | Status |
|------|---------|--------|
| `src/db/pool.ts` | Canonical DB pool | **PRIMARY** |
| `src/db/migrate.ts` | Migration runner (CLI only) | CLI ONLY |
| `src/db/auto-migrate.ts` | Auto-run migrations on startup | ACTIVE |
### Configuration
| File | Purpose | Status |
|------|---------|--------|
| `.env` | Environment variables | ACTIVE |
| `package.json` | Dependencies | ACTIVE |
| `tsconfig.json` | TypeScript config | ACTIVE |
---
## GraphQL Hashes (CRITICAL)
The correct hashes are in `src/platforms/dutchie/client.ts`:
```typescript
export const GRAPHQL_HASHES = {
FilteredProducts: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0',
GetAddressBasedDispensaryData: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b',
ConsumerDispensaries: '0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b',
GetAllCitiesByState: 'ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6',
};
```
**ALWAYS** use `Status: 'Active'` for FilteredProducts (not `null` or `'All'`).
---
## Scripts Reference
### Useful Scripts (in `src/scripts/`)
| Script | Purpose |
|--------|---------|
| `run-discovery.ts` | Run Dutchie discovery |
| `crawl-single-store.ts` | Test crawl a single store |
| `test-dutchie-graphql.ts` | Test GraphQL queries |
### One-Off Scripts (probably don't need)
| Script | Purpose |
|--------|---------|
| `harmonize-az-dispensaries.ts` | One-time data cleanup |
| `bootstrap-stores-for-dispensaries.ts` | One-time migration |
| `backfill-*.ts` | Historical backfill scripts |
---
## API Routes
### Active Routes (in `src/routes/`)
| Route File | Mount Point | Purpose |
|------------|-------------|---------|
| `auth.ts` | `/api/auth` | Login/logout/session |
| `stores.ts` | `/api/stores` | Store CRUD |
| `dashboard.ts` | `/api/dashboard` | Dashboard stats |
| `workers.ts` | `/api/workers` | Worker monitoring |
| `pipeline.ts` | `/api/pipeline` | Crawl triggers |
| `discovery.ts` | `/api/discovery` | Discovery management |
| `analytics.ts` | `/api/analytics` | Analytics queries |
| `wordpress.ts` | `/api/v1/wordpress` | WordPress plugin API |
---
## Documentation Files
### Current Docs (in `backend/docs/`)
| Doc | Purpose | Currency |
|-----|---------|----------|
| `TASK_WORKFLOW_2024-12-10.md` | Task system architecture | CURRENT |
| `WORKER_TASK_ARCHITECTURE.md` | Worker/task design | CURRENT |
| `CRAWL_PIPELINE.md` | Crawl pipeline overview | CURRENT |
| `ORGANIC_SCRAPING_GUIDE.md` | Browser-based scraping | CURRENT |
| `CODEBASE_MAP.md` | This file | CURRENT |
| `ANALYTICS_V2_EXAMPLES.md` | Analytics API examples | CURRENT |
| `BRAND_INTELLIGENCE_API.md` | Brand API docs | CURRENT |
### Root Docs
| Doc | Purpose | Currency |
|-----|---------|----------|
| `CLAUDE.md` | Claude instructions | **PRIMARY** |
| `README.md` | Project overview | NEEDS UPDATE |
---
## Common Mistakes to Avoid
1. **Don't use `src/hydration/`** - It's an old approach that was superseded by the task system
2. **Don't use `src/dutchie-az/db/connection.ts`** - Use `src/db/pool.ts` instead
3. **Don't import `src/db/migrate.ts` at runtime** - It will crash. Only use for CLI migrations.
4. **Don't query `stores` table** - It's empty. Use `dispensaries`.
5. **Don't query `products` table** - It's empty. Use `store_products`.
6. **Don't use wrong GraphQL hash** - Always get hash from `GRAPHQL_HASHES` in client.ts
7. **Don't use `Status: null`** - It returns 0 products. Use `Status: 'Active'`.
---
## When in Doubt
1. Check if the file is imported in `src/index.ts` - if not, it may be deprecated
2. Check the last modified date - older files may be stale
3. Look for `DEPRECATED` comments in the code
4. Ask: "Is there a newer version of this in `src/tasks/` or `src/platforms/`?"
5. Read the relevant doc in `docs/` before modifying code

View File

@@ -1,343 +0,0 @@
# CannaiQ Query API
Query raw crawl payload data with flexible filters, sorting, and aggregation.
## Base URL
```
https://cannaiq.co/api/payloads
```
## Authentication
Include your API key in the header:
```
X-API-Key: your-api-key
```
---
## Endpoints
### 1. Query Products
Filter and search products from a store's latest crawl data.
```
GET /api/payloads/store/{dispensaryId}/query
```
#### Query Parameters
| Parameter | Type | Description |
|-----------|------|-------------|
| `brand` | string | Filter by brand name (partial match) |
| `category` | string | Filter by category (flower, vape, edible, etc.) |
| `subcategory` | string | Filter by subcategory |
| `strain_type` | string | Filter by strain (indica, sativa, hybrid, cbd) |
| `in_stock` | boolean | Filter by stock status (true/false) |
| `price_min` | number | Minimum price |
| `price_max` | number | Maximum price |
| `thc_min` | number | Minimum THC percentage |
| `thc_max` | number | Maximum THC percentage |
| `search` | string | Search product name (partial match) |
| `fields` | string | Comma-separated fields to return |
| `limit` | number | Max results (default 100, max 1000) |
| `offset` | number | Skip results for pagination |
| `sort` | string | Sort by: name, price, thc, brand |
| `order` | string | Sort order: asc, desc |
#### Available Fields
When using `fields` parameter, you can request:
- `id` - Product ID
- `name` - Product name
- `brand` - Brand name
- `category` - Product category
- `subcategory` - Product subcategory
- `strain_type` - Indica/Sativa/Hybrid/CBD
- `price` - Current price
- `price_med` - Medical price
- `price_rec` - Recreational price
- `thc` - THC percentage
- `cbd` - CBD percentage
- `weight` - Product weight/size
- `status` - Stock status
- `in_stock` - Boolean in-stock flag
- `image_url` - Product image
- `description` - Product description
#### Examples
**Get all flower products under $40:**
```
GET /api/payloads/store/112/query?category=flower&price_max=40
```
**Search for "Blue Dream" with high THC:**
```
GET /api/payloads/store/112/query?search=blue+dream&thc_min=20
```
**Get only name and price for Alien Labs products:**
```
GET /api/payloads/store/112/query?brand=Alien+Labs&fields=name,price,thc
```
**Get top 10 highest THC products:**
```
GET /api/payloads/store/112/query?sort=thc&order=desc&limit=10
```
**Paginate through in-stock products:**
```
GET /api/payloads/store/112/query?in_stock=true&limit=50&offset=0
GET /api/payloads/store/112/query?in_stock=true&limit=50&offset=50
```
#### Response
```json
{
"success": true,
"dispensaryId": 112,
"payloadId": 45,
"fetchedAt": "2025-12-11T10:30:00Z",
"query": {
"filters": {
"brand": "Alien Labs",
"category": null,
"price_max": null
},
"sort": "price",
"order": "asc",
"limit": 100,
"offset": 0
},
"pagination": {
"total": 15,
"returned": 15,
"limit": 100,
"offset": 0,
"has_more": false
},
"products": [
{
"id": "507f1f77bcf86cd799439011",
"name": "Alien Labs - Baklava 3.5g",
"brand": "Alien Labs",
"category": "flower",
"strain_type": "hybrid",
"price": 55,
"thc": "28.5",
"in_stock": true
}
]
}
```
---
### 2. Aggregate Data
Group products and calculate metrics.
```
GET /api/payloads/store/{dispensaryId}/aggregate
```
#### Query Parameters
| Parameter | Type | Description |
|-----------|------|-------------|
| `group_by` | string | **Required.** Field to group by: brand, category, subcategory, strain_type |
| `metrics` | string | Comma-separated metrics (default: count) |
#### Available Metrics
- `count` - Number of products
- `avg_price` - Average price
- `min_price` - Lowest price
- `max_price` - Highest price
- `avg_thc` - Average THC percentage
- `in_stock_count` - Number of in-stock products
#### Examples
**Count products by brand:**
```
GET /api/payloads/store/112/aggregate?group_by=brand
```
**Get price stats by category:**
```
GET /api/payloads/store/112/aggregate?group_by=category&metrics=count,avg_price,min_price,max_price
```
**Get THC averages by strain type:**
```
GET /api/payloads/store/112/aggregate?group_by=strain_type&metrics=count,avg_thc
```
**Brand analysis with stock info:**
```
GET /api/payloads/store/112/aggregate?group_by=brand&metrics=count,avg_price,in_stock_count
```
#### Response
```json
{
"success": true,
"dispensaryId": 112,
"payloadId": 45,
"fetchedAt": "2025-12-11T10:30:00Z",
"groupBy": "brand",
"metrics": ["count", "avg_price"],
"totalProducts": 450,
"groupCount": 85,
"aggregations": [
{
"brand": "Alien Labs",
"count": 15,
"avg_price": 52.33
},
{
"brand": "Connected",
"count": 12,
"avg_price": 48.50
}
]
}
```
---
### 3. Compare Stores (Price Comparison)
Query the same data from multiple stores and compare in your app:
```javascript
// Get flower prices from Store A
const storeA = await fetch('/api/payloads/store/112/query?category=flower&fields=name,brand,price');
// Get flower prices from Store B
const storeB = await fetch('/api/payloads/store/115/query?category=flower&fields=name,brand,price');
// Compare in your app
const dataA = await storeA.json();
const dataB = await storeB.json();
// Find matching products and compare prices
```
---
### 4. Price History
For historical price data, use the snapshots endpoint:
```
GET /api/v1/products/{productId}/history?days=30
```
Or compare payloads over time:
```
GET /api/payloads/store/{dispensaryId}/diff?from={payloadId1}&to={payloadId2}
```
The diff endpoint shows:
- Products added
- Products removed
- Price changes
- Stock changes
---
### 5. List Stores
Get available dispensaries to query:
```
GET /api/stores
```
Returns all stores with their IDs, names, and locations.
---
## Use Cases
### Price Comparison App
```javascript
// 1. Get stores in Arizona
const stores = await fetch('/api/stores?state=AZ').then(r => r.json());
// 2. Query flower prices from each store
const prices = await Promise.all(
stores.map(store =>
fetch(`/api/payloads/store/${store.id}/query?category=flower&fields=name,brand,price`)
.then(r => r.json())
)
);
// 3. Build comparison matrix in your app
```
### Brand Analytics Dashboard
```javascript
// Get brand presence across stores
const brandData = await Promise.all(
storeIds.map(id =>
fetch(`/api/payloads/store/${id}/aggregate?group_by=brand&metrics=count,avg_price`)
.then(r => r.json())
)
);
// Aggregate brand presence across all stores
```
### Deal Finder
```javascript
// Find high-THC flower under $30
const deals = await fetch(
'/api/payloads/store/112/query?category=flower&price_max=30&thc_min=20&in_stock=true&sort=thc&order=desc'
).then(r => r.json());
```
### Inventory Tracker
```javascript
// Get products that went out of stock
const diff = await fetch('/api/payloads/store/112/diff').then(r => r.json());
const outOfStock = diff.details.stockChanges.filter(
p => p.newStatus !== 'Active'
);
```
---
## Rate Limits
- Default: 100 requests/minute per API key
- Contact support for higher limits
## Error Responses
```json
{
"success": false,
"error": "Error message here"
}
```
Common errors:
- `404` - Store or payload not found
- `400` - Missing required parameter
- `401` - Invalid or missing API key
- `429` - Rate limit exceeded

View File

@@ -362,148 +362,6 @@ SET status = 'pending', retry_count = retry_count + 1
WHERE status = 'failed' AND retry_count < max_retries;
```
## Concurrent Task Processing (Added 2024-12)
Workers can now process multiple tasks concurrently within a single worker instance. This improves throughput by utilizing async I/O efficiently.
### Architecture
```
┌─────────────────────────────────────────────────────────────┐
│ Pod (K8s) │
│ │
│ ┌─────────────────────────────────────────────────────┐ │
│ │ TaskWorker │ │
│ │ │ │
│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │
│ │ │ Task 1 │ │ Task 2 │ │ Task 3 │ (concurrent)│ │
│ │ └─────────┘ └─────────┘ └─────────┘ │ │
│ │ │ │
│ │ Resource Monitor │ │
│ │ ├── Memory: 65% (threshold: 85%) │ │
│ │ ├── CPU: 45% (threshold: 90%) │ │
│ │ └── Status: Normal │ │
│ └─────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────┘
```
### Environment Variables
| Variable | Default | Description |
|----------|---------|-------------|
| `MAX_CONCURRENT_TASKS` | 3 | Maximum tasks a worker will run concurrently |
| `MEMORY_BACKOFF_THRESHOLD` | 0.85 | Back off when heap memory exceeds 85% |
| `CPU_BACKOFF_THRESHOLD` | 0.90 | Back off when CPU exceeds 90% |
| `BACKOFF_DURATION_MS` | 10000 | How long to wait when backing off (10s) |
### How It Works
1. **Main Loop**: Worker continuously tries to fill up to `MAX_CONCURRENT_TASKS`
2. **Resource Monitoring**: Before claiming a new task, worker checks memory and CPU
3. **Backoff**: If resources exceed thresholds, worker pauses and stops claiming new tasks
4. **Concurrent Execution**: Tasks run in parallel using `Promise` - they don't block each other
5. **Graceful Shutdown**: On SIGTERM/decommission, worker stops claiming but waits for active tasks
### Resource Monitoring
```typescript
// ResourceStats interface
interface ResourceStats {
memoryPercent: number; // Current heap usage as decimal (0.0-1.0)
memoryMb: number; // Current heap used in MB
memoryTotalMb: number; // Total heap available in MB
cpuPercent: number; // CPU usage as percentage (0-100)
isBackingOff: boolean; // True if worker is in backoff state
backoffReason: string; // Why the worker is backing off
}
```
### Heartbeat Data
Workers report the following in their heartbeat:
```json
{
"worker_id": "worker-abc123",
"current_task_id": 456,
"current_task_ids": [456, 457, 458],
"active_task_count": 3,
"max_concurrent_tasks": 3,
"status": "active",
"resources": {
"memory_mb": 256,
"memory_total_mb": 512,
"memory_rss_mb": 320,
"memory_percent": 50,
"cpu_user_ms": 12500,
"cpu_system_ms": 3200,
"cpu_percent": 45,
"is_backing_off": false,
"backoff_reason": null
}
}
```
### Backoff Behavior
When resources exceed thresholds:
1. Worker logs the backoff reason:
```
[TaskWorker] MyWorker backing off: Memory at 87.3% (threshold: 85%)
```
2. Worker stops claiming new tasks but continues existing tasks
3. After `BACKOFF_DURATION_MS`, worker rechecks resources
4. When resources return to normal:
```
[TaskWorker] MyWorker resuming normal operation
```
### UI Display
The Workers Dashboard shows:
- **Tasks Column**: `2/3 tasks` (active/max concurrent)
- **Resources Column**: Memory % and CPU % with color coding
- Green: < 50%
- Yellow: 50-74%
- Amber: 75-89%
- Red: 90%+
- **Backing Off**: Orange warning badge when worker is in backoff state
### Task Count Badge Details
```
┌─────────────────────────────────────────────┐
│ Worker: "MyWorker" │
│ Tasks: 2/3 tasks #456, #457
│ Resources: 🧠 65% 💻 45% │
│ Status: ● Active │
└─────────────────────────────────────────────┘
```
### Best Practices
1. **Start Conservative**: Use `MAX_CONCURRENT_TASKS=3` initially
2. **Monitor Resources**: Watch for frequent backoffs in logs
3. **Tune Per Workload**: I/O-bound tasks benefit from higher concurrency
4. **Scale Horizontally**: Add more pods rather than cranking concurrency too high
### Code References
| File | Purpose |
|------|---------|
| `src/tasks/task-worker.ts:68-71` | Concurrency environment variables |
| `src/tasks/task-worker.ts:104-111` | ResourceStats interface |
| `src/tasks/task-worker.ts:149-179` | getResourceStats() method |
| `src/tasks/task-worker.ts:184-196` | shouldBackOff() method |
| `src/tasks/task-worker.ts:462-516` | mainLoop() with concurrent claiming |
| `src/routes/worker-registry.ts:148-195` | Heartbeat endpoint handling |
| `cannaiq/src/pages/WorkersDashboard.tsx:233-305` | UI components for resources |
## Monitoring
### Logs

View File

@@ -1,297 +0,0 @@
# Organic Browser-Based Scraping Guide
**Last Updated:** 2025-12-12
**Status:** Production-ready proof of concept
---
## Overview
This document describes the "organic" browser-based approach to scraping Dutchie dispensary menus. Unlike direct curl/axios requests, this method uses a real browser session to make API calls, making requests appear natural and reducing detection risk.
---
## Why Organic Scraping?
| Approach | Detection Risk | Speed | Complexity |
|----------|---------------|-------|------------|
| Direct curl | Higher | Fast | Low |
| curl-impersonate | Medium | Fast | Medium |
| **Browser-based (organic)** | **Lowest** | Slower | Higher |
Direct curl requests can be fingerprinted via:
- TLS fingerprint (cipher suites, extensions)
- Header order and values
- Missing cookies/session data
- Request patterns
Browser-based requests inherit:
- Real Chrome TLS fingerprint
- Session cookies from page visit
- Natural header order
- JavaScript execution environment
---
## Implementation
### Dependencies
```bash
npm install puppeteer puppeteer-extra puppeteer-extra-plugin-stealth
```
### Core Script: `test-intercept.js`
Located at: `backend/test-intercept.js`
```javascript
const puppeteer = require('puppeteer-extra');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
const fs = require('fs');
puppeteer.use(StealthPlugin());
async function capturePayload(config) {
const { dispensaryId, platformId, cName, outputPath } = config;
const browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const page = await browser.newPage();
// STEP 1: Establish session by visiting the menu
const embedUrl = `https://dutchie.com/embedded-menu/${cName}?menuType=rec`;
await page.goto(embedUrl, { waitUntil: 'networkidle2', timeout: 60000 });
// STEP 2: Fetch ALL products using GraphQL from browser context
const result = await page.evaluate(async (platformId) => {
const allProducts = [];
let pageNum = 0;
const perPage = 100;
let totalCount = 0;
const sessionId = 'browser-session-' + Date.now();
while (pageNum < 30) {
const variables = {
includeEnterpriseSpecials: false,
productsFilter: {
dispensaryId: platformId,
pricingType: 'rec',
Status: 'Active', // CRITICAL: Must be 'Active', not null
types: [],
useCache: true,
isDefaultSort: true,
sortBy: 'popularSortIdx',
sortDirection: 1,
bypassOnlineThresholds: true,
isKioskMenu: false,
removeProductsBelowOptionThresholds: false,
},
page: pageNum,
perPage: perPage,
};
const extensions = {
persistedQuery: {
version: 1,
sha256Hash: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0'
}
};
const qs = new URLSearchParams({
operationName: 'FilteredProducts',
variables: JSON.stringify(variables),
extensions: JSON.stringify(extensions)
});
const response = await fetch(`https://dutchie.com/api-3/graphql?${qs}`, {
method: 'GET',
headers: {
'Accept': 'application/json',
'content-type': 'application/json',
'x-dutchie-session': sessionId,
'apollographql-client-name': 'Marketplace (production)',
},
credentials: 'include'
});
const json = await response.json();
const data = json?.data?.filteredProducts;
if (!data?.products) break;
allProducts.push(...data.products);
if (pageNum === 0) totalCount = data.queryInfo?.totalCount || 0;
if (allProducts.length >= totalCount) break;
pageNum++;
await new Promise(r => setTimeout(r, 200)); // Polite delay
}
return { products: allProducts, totalCount };
}, platformId);
await browser.close();
// STEP 3: Save payload
const payload = {
dispensaryId,
platformId,
cName,
fetchedAt: new Date().toISOString(),
productCount: result.products.length,
products: result.products,
};
fs.writeFileSync(outputPath, JSON.stringify(payload, null, 2));
return payload;
}
```
---
## Critical Parameters
### GraphQL Hash (FilteredProducts)
```
ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0
```
**WARNING:** Using the wrong hash returns HTTP 400.
### Status Parameter
| Value | Result |
|-------|--------|
| `'Active'` | Returns in-stock products (1019 in test) |
| `null` | Returns 0 products |
| `'All'` | Returns HTTP 400 |
**ALWAYS use `Status: 'Active'`**
### Required Headers
```javascript
{
'Accept': 'application/json',
'content-type': 'application/json',
'x-dutchie-session': 'unique-session-id',
'apollographql-client-name': 'Marketplace (production)',
}
```
### Endpoint
```
https://dutchie.com/api-3/graphql
```
---
## Performance Benchmarks
Test store: AZ-Deeply-Rooted (1019 products)
| Metric | Value |
|--------|-------|
| Total products | 1019 |
| Time | 18.5 seconds |
| Payload size | 11.8 MB |
| Pages fetched | 11 (100 per page) |
| Success rate | 100% |
---
## Payload Format
The output matches the existing `payload-fetch.ts` handler format:
```json
{
"dispensaryId": 123,
"platformId": "6405ef617056e8014d79101b",
"cName": "AZ-Deeply-Rooted",
"fetchedAt": "2025-12-12T05:05:19.837Z",
"productCount": 1019,
"products": [
{
"id": "6927508db4851262f629a869",
"Name": "Product Name",
"brand": { "name": "Brand Name", ... },
"type": "Flower",
"THC": "25%",
"Prices": [...],
"Options": [...],
...
}
]
}
```
---
## Integration Points
### As a Task Handler
The organic approach can be integrated as an alternative to curl-based fetching:
```typescript
// In src/tasks/handlers/organic-payload-fetch.ts
export async function handleOrganicPayloadFetch(ctx: TaskContext): Promise<TaskResult> {
// Use puppeteer-based capture
// Save to same payload storage
// Queue product_refresh task
}
```
### Worker Configuration
Add to job_schedules:
```sql
INSERT INTO job_schedules (name, role, cron_expression)
VALUES ('organic_product_crawl', 'organic_payload_fetch', '0 */6 * * *');
```
---
## Troubleshooting
### HTTP 400 Bad Request
- Check hash is correct: `ee29c060...`
- Verify Status is `'Active'` (string, not null)
### 0 Products Returned
- Status was likely `null` or `'All'` - use `'Active'`
- Check platformId is valid MongoDB ObjectId
### Session Not Established
- Increase timeout on initial page.goto()
- Check cName is valid (matches embedded-menu URL)
### Detection/Blocking
- StealthPlugin should handle most cases
- Add random delays between pages
- Use headless: 'new' (not true/false)
---
## Files Reference
| File | Purpose |
|------|---------|
| `backend/test-intercept.js` | Proof of concept script |
| `backend/src/platforms/dutchie/client.ts` | GraphQL hashes, curl implementation |
| `backend/src/tasks/handlers/payload-fetch.ts` | Current curl-based handler |
| `backend/src/utils/payload-storage.ts` | Payload save/load utilities |
---
## See Also
- `DUTCHIE_CRAWL_WORKFLOW.md` - Full crawl pipeline documentation
- `TASK_WORKFLOW_2024-12-10.md` - Task system architecture
- `CLAUDE.md` - Project rules and constraints

View File

@@ -1,25 +0,0 @@
# ARCHIVED DOCUMENTATION
**WARNING: These docs may be outdated or inaccurate.**
The code has evolved significantly. These docs are kept for historical reference only.
## What to Use Instead
**The single source of truth is:**
- `CLAUDE.md` (root) - Essential rules and quick reference
- `docs/CODEBASE_MAP.md` - Current file/directory reference
## Why Archive?
These docs were written during development iterations and may reference:
- Old file paths that no longer exist
- Deprecated approaches (hydration, scraper-v2)
- APIs that have changed
- Database schemas that evolved
## If You Need Details
1. First check CODEBASE_MAP.md for current file locations
2. Then read the actual source code
3. Only use archive docs as a last resort for historical context

View File

@@ -1,27 +0,0 @@
-- Migration: Worker Commands Table
-- Purpose: Store commands for workers (decommission, etc.)
-- Workers poll this table after each task to check for commands
CREATE TABLE IF NOT EXISTS worker_commands (
id SERIAL PRIMARY KEY,
worker_id TEXT NOT NULL,
command TEXT NOT NULL, -- 'decommission', 'pause', 'resume'
reason TEXT,
issued_by TEXT,
issued_at TIMESTAMPTZ DEFAULT NOW(),
acknowledged_at TIMESTAMPTZ,
executed_at TIMESTAMPTZ,
status TEXT DEFAULT 'pending' -- 'pending', 'acknowledged', 'executed', 'cancelled'
);
-- Index for worker lookups
CREATE INDEX IF NOT EXISTS idx_worker_commands_worker_id ON worker_commands(worker_id);
CREATE INDEX IF NOT EXISTS idx_worker_commands_pending ON worker_commands(worker_id, status) WHERE status = 'pending';
-- Add decommission_requested column to worker_registry for quick checks
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested BOOLEAN DEFAULT FALSE;
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_reason TEXT;
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested_at TIMESTAMPTZ;
-- Comment
COMMENT ON TABLE worker_commands IS 'Commands issued to workers (decommission after task, pause, etc.)';

View File

@@ -1,88 +0,0 @@
-- Migration 083: Discovery Run Tracking
-- Tracks progress of store discovery runs step-by-step
-- Main discovery runs table
CREATE TABLE IF NOT EXISTS discovery_runs (
id SERIAL PRIMARY KEY,
platform VARCHAR(50) NOT NULL DEFAULT 'dutchie',
status VARCHAR(20) NOT NULL DEFAULT 'running', -- running, completed, failed
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
finished_at TIMESTAMPTZ,
task_id INTEGER REFERENCES worker_task_queue(id),
-- Totals
states_total INTEGER DEFAULT 0,
states_completed INTEGER DEFAULT 0,
locations_discovered INTEGER DEFAULT 0,
locations_promoted INTEGER DEFAULT 0,
new_store_ids INTEGER[] DEFAULT '{}',
-- Error info
error_message TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Per-state progress within a run
CREATE TABLE IF NOT EXISTS discovery_run_states (
id SERIAL PRIMARY KEY,
run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
state_code VARCHAR(2) NOT NULL,
status VARCHAR(20) NOT NULL DEFAULT 'pending', -- pending, running, completed, failed
started_at TIMESTAMPTZ,
finished_at TIMESTAMPTZ,
-- Results
cities_found INTEGER DEFAULT 0,
locations_found INTEGER DEFAULT 0,
locations_upserted INTEGER DEFAULT 0,
new_dispensary_ids INTEGER[] DEFAULT '{}',
-- Error info
error_message TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(run_id, state_code)
);
-- Step-by-step log for detailed progress tracking
CREATE TABLE IF NOT EXISTS discovery_run_steps (
id SERIAL PRIMARY KEY,
run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
state_code VARCHAR(2),
step_name VARCHAR(100) NOT NULL,
status VARCHAR(20) NOT NULL DEFAULT 'started', -- started, completed, failed
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
finished_at TIMESTAMPTZ,
-- Details (JSON for flexibility)
details JSONB DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Indexes for querying
CREATE INDEX IF NOT EXISTS idx_discovery_runs_status ON discovery_runs(status);
CREATE INDEX IF NOT EXISTS idx_discovery_runs_platform ON discovery_runs(platform);
CREATE INDEX IF NOT EXISTS idx_discovery_runs_started_at ON discovery_runs(started_at DESC);
CREATE INDEX IF NOT EXISTS idx_discovery_run_states_run_id ON discovery_run_states(run_id);
CREATE INDEX IF NOT EXISTS idx_discovery_run_steps_run_id ON discovery_run_steps(run_id);
-- View for latest run status per platform
CREATE OR REPLACE VIEW v_latest_discovery_runs AS
SELECT DISTINCT ON (platform)
id,
platform,
status,
started_at,
finished_at,
states_total,
states_completed,
locations_discovered,
locations_promoted,
array_length(new_store_ids, 1) as new_stores_count,
error_message,
EXTRACT(EPOCH FROM (COALESCE(finished_at, NOW()) - started_at)) as duration_seconds
FROM discovery_runs
ORDER BY platform, started_at DESC;

View File

@@ -1,253 +0,0 @@
-- Migration 084: Dual Transport Preflight System
-- Workers run both curl and http (Puppeteer) preflights on startup
-- Tasks can require a specific transport method
-- ===================================================================
-- PART 1: Add preflight columns to worker_registry
-- ===================================================================
-- Preflight status for curl/axios transport (proxy-based)
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS preflight_curl_status VARCHAR(20) DEFAULT 'pending';
-- Preflight status for http/Puppeteer transport (browser-based)
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS preflight_http_status VARCHAR(20) DEFAULT 'pending';
-- Timestamps for when each preflight completed
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS preflight_curl_at TIMESTAMPTZ;
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS preflight_http_at TIMESTAMPTZ;
-- Error messages for failed preflights
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS preflight_curl_error TEXT;
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS preflight_http_error TEXT;
-- Response time for successful preflights (ms)
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS preflight_curl_ms INTEGER;
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS preflight_http_ms INTEGER;
-- Constraints for preflight status values
ALTER TABLE worker_registry
DROP CONSTRAINT IF EXISTS valid_preflight_curl_status;
ALTER TABLE worker_registry
ADD CONSTRAINT valid_preflight_curl_status
CHECK (preflight_curl_status IN ('pending', 'passed', 'failed', 'skipped'));
ALTER TABLE worker_registry
DROP CONSTRAINT IF EXISTS valid_preflight_http_status;
ALTER TABLE worker_registry
ADD CONSTRAINT valid_preflight_http_status
CHECK (preflight_http_status IN ('pending', 'passed', 'failed', 'skipped'));
-- ===================================================================
-- PART 2: Add method column to worker_tasks
-- ===================================================================
-- Transport method requirement for the task
-- NULL = no preference (any worker can claim)
-- 'curl' = requires curl/axios transport (proxy-based, fast)
-- 'http' = requires http/Puppeteer transport (browser-based, anti-detect)
ALTER TABLE worker_tasks
ADD COLUMN IF NOT EXISTS method VARCHAR(10);
-- Constraint for valid method values
ALTER TABLE worker_tasks
DROP CONSTRAINT IF EXISTS valid_task_method;
ALTER TABLE worker_tasks
ADD CONSTRAINT valid_task_method
CHECK (method IS NULL OR method IN ('curl', 'http'));
-- Index for method-based task claiming
CREATE INDEX IF NOT EXISTS idx_worker_tasks_method
ON worker_tasks(method)
WHERE status = 'pending';
-- Set default method for all existing pending tasks to 'http'
-- ALL current tasks require Puppeteer/browser-based transport
UPDATE worker_tasks
SET method = 'http'
WHERE method IS NULL;
-- ===================================================================
-- PART 3: Update claim_task function for method compatibility
-- ===================================================================
CREATE OR REPLACE FUNCTION claim_task(
p_role VARCHAR(50),
p_worker_id VARCHAR(100),
p_curl_passed BOOLEAN DEFAULT TRUE,
p_http_passed BOOLEAN DEFAULT FALSE
) RETURNS worker_tasks AS $$
DECLARE
claimed_task worker_tasks;
BEGIN
UPDATE worker_tasks
SET
status = 'claimed',
worker_id = p_worker_id,
claimed_at = NOW(),
updated_at = NOW()
WHERE id = (
SELECT id FROM worker_tasks
WHERE role = p_role
AND status = 'pending'
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
-- Method compatibility: worker must have passed the required preflight
AND (
method IS NULL -- No preference, any worker can claim
OR (method = 'curl' AND p_curl_passed = TRUE)
OR (method = 'http' AND p_http_passed = TRUE)
)
-- Exclude stores that already have an active task
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
SELECT dispensary_id FROM worker_tasks
WHERE status IN ('claimed', 'running')
AND dispensary_id IS NOT NULL
))
ORDER BY priority DESC, created_at ASC
LIMIT 1
FOR UPDATE SKIP LOCKED
)
RETURNING * INTO claimed_task;
RETURN claimed_task;
END;
$$ LANGUAGE plpgsql;
-- ===================================================================
-- PART 4: Update v_active_workers view
-- ===================================================================
DROP VIEW IF EXISTS v_active_workers;
CREATE VIEW v_active_workers AS
SELECT
wr.id,
wr.worker_id,
wr.friendly_name,
wr.role,
wr.status,
wr.pod_name,
wr.hostname,
wr.started_at,
wr.last_heartbeat_at,
wr.last_task_at,
wr.tasks_completed,
wr.tasks_failed,
wr.current_task_id,
-- Preflight status
wr.preflight_curl_status,
wr.preflight_http_status,
wr.preflight_curl_at,
wr.preflight_http_at,
wr.preflight_curl_error,
wr.preflight_http_error,
wr.preflight_curl_ms,
wr.preflight_http_ms,
-- Computed fields
EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
CASE
WHEN wr.status = 'offline' THEN 'offline'
WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
WHEN wr.current_task_id IS NOT NULL THEN 'busy'
ELSE 'ready'
END as health_status,
-- Capability flags (can this worker handle curl/http tasks?)
(wr.preflight_curl_status = 'passed') as can_curl,
(wr.preflight_http_status = 'passed') as can_http
FROM worker_registry wr
WHERE wr.status != 'terminated'
ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
-- ===================================================================
-- PART 5: View for task queue with method info
-- ===================================================================
DROP VIEW IF EXISTS v_task_history;
CREATE VIEW v_task_history AS
SELECT
t.id,
t.role,
t.dispensary_id,
d.name as dispensary_name,
t.platform,
t.status,
t.priority,
t.method,
t.worker_id,
t.scheduled_for,
t.claimed_at,
t.started_at,
t.completed_at,
t.error_message,
t.retry_count,
t.created_at,
EXTRACT(EPOCH FROM (t.completed_at - t.started_at)) as duration_sec
FROM worker_tasks t
LEFT JOIN dispensaries d ON d.id = t.dispensary_id
ORDER BY t.created_at DESC;
-- ===================================================================
-- PART 6: Helper function to update worker preflight status
-- ===================================================================
CREATE OR REPLACE FUNCTION update_worker_preflight(
p_worker_id VARCHAR(100),
p_transport VARCHAR(10), -- 'curl' or 'http'
p_status VARCHAR(20), -- 'passed', 'failed', 'skipped'
p_response_ms INTEGER DEFAULT NULL,
p_error TEXT DEFAULT NULL
) RETURNS VOID AS $$
BEGIN
IF p_transport = 'curl' THEN
UPDATE worker_registry
SET
preflight_curl_status = p_status,
preflight_curl_at = NOW(),
preflight_curl_ms = p_response_ms,
preflight_curl_error = p_error,
updated_at = NOW()
WHERE worker_id = p_worker_id;
ELSIF p_transport = 'http' THEN
UPDATE worker_registry
SET
preflight_http_status = p_status,
preflight_http_at = NOW(),
preflight_http_ms = p_response_ms,
preflight_http_error = p_error,
updated_at = NOW()
WHERE worker_id = p_worker_id;
END IF;
END;
$$ LANGUAGE plpgsql;
-- ===================================================================
-- Comments
-- ===================================================================
COMMENT ON COLUMN worker_registry.preflight_curl_status IS 'Status of curl/axios preflight: pending, passed, failed, skipped';
COMMENT ON COLUMN worker_registry.preflight_http_status IS 'Status of http/Puppeteer preflight: pending, passed, failed, skipped';
COMMENT ON COLUMN worker_registry.preflight_curl_at IS 'When curl preflight completed';
COMMENT ON COLUMN worker_registry.preflight_http_at IS 'When http preflight completed';
COMMENT ON COLUMN worker_registry.preflight_curl_error IS 'Error message if curl preflight failed';
COMMENT ON COLUMN worker_registry.preflight_http_error IS 'Error message if http preflight failed';
COMMENT ON COLUMN worker_registry.preflight_curl_ms IS 'Response time of successful curl preflight (ms)';
COMMENT ON COLUMN worker_registry.preflight_http_ms IS 'Response time of successful http preflight (ms)';
COMMENT ON COLUMN worker_tasks.method IS 'Transport method required: NULL=any, curl=proxy-based, http=browser-based';
COMMENT ON FUNCTION claim_task IS 'Atomically claim a task, respecting method requirements and per-store locking';
COMMENT ON FUNCTION update_worker_preflight IS 'Update a workers preflight status for a given transport';

View File

@@ -1,59 +0,0 @@
-- Migration 085: Trusted Origins Management
-- Allows admin to manage trusted IPs and domains via UI instead of hardcoded values
-- Trusted origins table (IPs and domains that bypass API key auth)
CREATE TABLE IF NOT EXISTS trusted_origins (
id SERIAL PRIMARY KEY,
-- Origin type: 'ip', 'domain', 'pattern'
origin_type VARCHAR(20) NOT NULL CHECK (origin_type IN ('ip', 'domain', 'pattern')),
-- The actual value
-- For ip: '127.0.0.1', '::1', '192.168.1.0/24'
-- For domain: 'cannaiq.co', 'findadispo.com'
-- For pattern: '^https://.*\.cannabrands\.app$' (regex)
origin_value VARCHAR(255) NOT NULL,
-- Description for admin reference
description TEXT,
-- Active flag
active BOOLEAN DEFAULT true,
-- Audit
created_at TIMESTAMPTZ DEFAULT NOW(),
created_by INTEGER REFERENCES users(id),
updated_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(origin_type, origin_value)
);
-- Index for quick lookups
CREATE INDEX IF NOT EXISTS idx_trusted_origins_active ON trusted_origins(active) WHERE active = true;
CREATE INDEX IF NOT EXISTS idx_trusted_origins_type ON trusted_origins(origin_type, active);
-- Seed with current hardcoded values
INSERT INTO trusted_origins (origin_type, origin_value, description) VALUES
-- Trusted IPs (localhost)
('ip', '127.0.0.1', 'Localhost IPv4'),
('ip', '::1', 'Localhost IPv6'),
('ip', '::ffff:127.0.0.1', 'Localhost IPv4-mapped IPv6'),
-- Trusted domains
('domain', 'cannaiq.co', 'CannaiQ production'),
('domain', 'www.cannaiq.co', 'CannaiQ production (www)'),
('domain', 'findadispo.com', 'FindADispo production'),
('domain', 'www.findadispo.com', 'FindADispo production (www)'),
('domain', 'findagram.co', 'Findagram production'),
('domain', 'www.findagram.co', 'Findagram production (www)'),
('domain', 'localhost:3010', 'Local backend dev'),
('domain', 'localhost:8080', 'Local admin dev'),
('domain', 'localhost:5173', 'Local Vite dev'),
-- Pattern-based (regex)
('pattern', '^https://.*\.cannabrands\.app$', 'All cannabrands.app subdomains'),
('pattern', '^https://.*\.cannaiq\.co$', 'All cannaiq.co subdomains')
ON CONFLICT (origin_type, origin_value) DO NOTHING;
-- Add comment
COMMENT ON TABLE trusted_origins IS 'IPs and domains that bypass API key authentication. Managed via /admin.';

View File

@@ -35,8 +35,6 @@
"puppeteer-extra-plugin-stealth": "^2.11.2",
"sharp": "^0.32.0",
"socks-proxy-agent": "^8.0.2",
"swagger-jsdoc": "^6.2.8",
"swagger-ui-express": "^5.0.1",
"user-agents": "^1.1.669",
"uuid": "^9.0.1",
"zod": "^3.22.4"
@@ -49,53 +47,11 @@
"@types/node": "^20.10.5",
"@types/node-cron": "^3.0.11",
"@types/pg": "^8.15.6",
"@types/swagger-jsdoc": "^6.0.4",
"@types/swagger-ui-express": "^4.1.8",
"@types/uuid": "^9.0.7",
"tsx": "^4.7.0",
"typescript": "^5.3.3"
}
},
"node_modules/@apidevtools/json-schema-ref-parser": {
"version": "9.1.2",
"resolved": "https://registry.npmjs.org/@apidevtools/json-schema-ref-parser/-/json-schema-ref-parser-9.1.2.tgz",
"integrity": "sha512-r1w81DpR+KyRWd3f+rk6TNqMgedmAxZP5v5KWlXQWlgMUUtyEJch0DKEci1SorPMiSeM8XPl7MZ3miJ60JIpQg==",
"dependencies": {
"@jsdevtools/ono": "^7.1.3",
"@types/json-schema": "^7.0.6",
"call-me-maybe": "^1.0.1",
"js-yaml": "^4.1.0"
}
},
"node_modules/@apidevtools/openapi-schemas": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/@apidevtools/openapi-schemas/-/openapi-schemas-2.1.0.tgz",
"integrity": "sha512-Zc1AlqrJlX3SlpupFGpiLi2EbteyP7fXmUOGup6/DnkRgjP9bgMM/ag+n91rsv0U1Gpz0H3VILA/o3bW7Ua6BQ==",
"engines": {
"node": ">=10"
}
},
"node_modules/@apidevtools/swagger-methods": {
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/@apidevtools/swagger-methods/-/swagger-methods-3.0.2.tgz",
"integrity": "sha512-QAkD5kK2b1WfjDS/UQn/qQkbwF31uqRjPTrsCs5ZG9BQGAkjwvqGFjjPqAuzac/IYzpPtRzjCP1WrTuAIjMrXg=="
},
"node_modules/@apidevtools/swagger-parser": {
"version": "10.0.3",
"resolved": "https://registry.npmjs.org/@apidevtools/swagger-parser/-/swagger-parser-10.0.3.tgz",
"integrity": "sha512-sNiLY51vZOmSPFZA5TF35KZ2HbgYklQnTSDnkghamzLb3EkNtcQnrBQEj5AOCxHpTtXpqMCRM1CrmV2rG6nw4g==",
"dependencies": {
"@apidevtools/json-schema-ref-parser": "^9.0.6",
"@apidevtools/openapi-schemas": "^2.0.4",
"@apidevtools/swagger-methods": "^3.0.2",
"@jsdevtools/ono": "^7.1.3",
"call-me-maybe": "^1.0.1",
"z-schema": "^5.0.1"
},
"peerDependencies": {
"openapi-types": ">=7"
}
},
"node_modules/@babel/code-frame": {
"version": "7.27.1",
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.27.1.tgz",
@@ -538,11 +494,6 @@
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
},
"node_modules/@jsdevtools/ono": {
"version": "7.1.3",
"resolved": "https://registry.npmjs.org/@jsdevtools/ono/-/ono-7.1.3.tgz",
"integrity": "sha512-4JQNk+3mVzK3xh2rqd6RB4J46qUR19azEHBneZyTZM+c456qOrbbM/5xcR8huNCCcbVt7+UmizG6GuUvPvKUYg=="
},
"node_modules/@jsep-plugin/assignment": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.3.0.tgz",
@@ -810,12 +761,6 @@
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
},
"node_modules/@scarf/scarf": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/@scarf/scarf/-/scarf-1.4.0.tgz",
"integrity": "sha512-xxeapPiUXdZAE3che6f3xogoJPeZgig6omHEy1rIY5WVsB3H2BHNnZH+gHG6x91SCWyQCzWGsuL2Hh3ClO5/qQ==",
"hasInstallScript": true
},
"node_modules/@tootallnate/quickjs-emscripten": {
"version": "0.23.0",
"resolved": "https://registry.npmjs.org/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz",
@@ -910,11 +855,6 @@
"resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
"integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="
},
"node_modules/@types/json-schema": {
"version": "7.0.15",
"resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz",
"integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA=="
},
"node_modules/@types/jsonwebtoken": {
"version": "9.0.10",
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
@@ -1020,22 +960,6 @@
"@types/node": "*"
}
},
"node_modules/@types/swagger-jsdoc": {
"version": "6.0.4",
"resolved": "https://registry.npmjs.org/@types/swagger-jsdoc/-/swagger-jsdoc-6.0.4.tgz",
"integrity": "sha512-W+Xw5epcOZrF/AooUM/PccNMSAFOKWZA5dasNyMujTwsBkU74njSJBpvCCJhHAJ95XRMzQrrW844Btu0uoetwQ==",
"dev": true
},
"node_modules/@types/swagger-ui-express": {
"version": "4.1.8",
"resolved": "https://registry.npmjs.org/@types/swagger-ui-express/-/swagger-ui-express-4.1.8.tgz",
"integrity": "sha512-AhZV8/EIreHFmBV5wAs0gzJUNq9JbbSXgJLQubCC0jtIo6prnI9MIRRxnU4MZX9RB9yXxF1V4R7jtLl/Wcj31g==",
"dev": true,
"dependencies": {
"@types/express": "*",
"@types/serve-static": "*"
}
},
"node_modules/@types/uuid": {
"version": "9.0.8",
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
@@ -1510,11 +1434,6 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/call-me-maybe": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/call-me-maybe/-/call-me-maybe-1.0.2.tgz",
"integrity": "sha512-HpX65o1Hnr9HH25ojC1YGs7HCQLq0GCOibSaWER0eNpgJ/Z1MZv2mTc7+xh6WOPxbRVcmgbv4hGU+uSQ/2xFZQ=="
},
"node_modules/callsites": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
@@ -1675,14 +1594,6 @@
"node": ">= 0.8"
}
},
"node_modules/commander": {
"version": "6.2.0",
"resolved": "https://registry.npmjs.org/commander/-/commander-6.2.0.tgz",
"integrity": "sha512-zP4jEKbe8SHzKJYQmq8Y9gYjtO/POJLgIdKgV7B9qNmABVFVc+ctqSX6iXh4mCpJfRBOabiZ2YKPg8ciDw6C+Q==",
"engines": {
"node": ">= 6"
}
},
"node_modules/concat-map": {
"version": "0.0.1",
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
@@ -1952,17 +1863,6 @@
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1232444.tgz",
"integrity": "sha512-pM27vqEfxSxRkTMnF+XCmxSEb6duO5R+t8A9DEEJgy4Wz2RVanje2mmj99B6A3zv2r/qGfYlOvYznUhuokizmg=="
},
"node_modules/doctrine": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz",
"integrity": "sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==",
"dependencies": {
"esutils": "^2.0.2"
},
"engines": {
"node": ">=6.0.0"
}
},
"node_modules/dom-serializer": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
@@ -3358,12 +3258,6 @@
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
"integrity": "sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ=="
},
"node_modules/lodash.get": {
"version": "4.4.2",
"resolved": "https://registry.npmjs.org/lodash.get/-/lodash.get-4.4.2.tgz",
"integrity": "sha512-z+Uw/vLuy6gQe8cfaFWD7p0wVv8fJl3mbzXh33RS+0oW2wvUqiRXiQ69gLWSLpgB5/6sU+r6BlQR0MBILadqTQ==",
"deprecated": "This package is deprecated. Use the optional chaining (?.) operator instead."
},
"node_modules/lodash.includes": {
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/lodash.includes/-/lodash.includes-4.3.0.tgz",
@@ -3379,12 +3273,6 @@
"resolved": "https://registry.npmjs.org/lodash.isboolean/-/lodash.isboolean-3.0.3.tgz",
"integrity": "sha512-Bz5mupy2SVbPHURB98VAcw+aHh4vRV5IPNhILUCsOzRmsTmSQ17jIuqopAentWoehktxGd9e/hbIXq980/1QJg=="
},
"node_modules/lodash.isequal": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/lodash.isequal/-/lodash.isequal-4.5.0.tgz",
"integrity": "sha512-pDo3lu8Jhfjqls6GkMgpahsF9kCyayhgykjyLMNFTKWrpVdAQtYyB4muAMWozBB4ig/dtWAmsMxLEI8wuz+DYQ==",
"deprecated": "This package is deprecated. Use require('node:util').isDeepStrictEqual instead."
},
"node_modules/lodash.isinteger": {
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/lodash.isinteger/-/lodash.isinteger-4.0.4.tgz",
@@ -3405,11 +3293,6 @@
"resolved": "https://registry.npmjs.org/lodash.isstring/-/lodash.isstring-4.0.1.tgz",
"integrity": "sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw=="
},
"node_modules/lodash.mergewith": {
"version": "4.6.2",
"resolved": "https://registry.npmjs.org/lodash.mergewith/-/lodash.mergewith-4.6.2.tgz",
"integrity": "sha512-GK3g5RPZWTRSeLSpgP8Xhra+pnjBC56q9FZYe1d5RN3TJ35dbkGy3YqBSMbyCrlbi+CM9Z3Jk5yTL7RCsqboyQ=="
},
"node_modules/lodash.once": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/lodash.once/-/lodash.once-4.1.1.tgz",
@@ -3865,12 +3748,6 @@
"wrappy": "1"
}
},
"node_modules/openapi-types": {
"version": "12.1.3",
"resolved": "https://registry.npmjs.org/openapi-types/-/openapi-types-12.1.3.tgz",
"integrity": "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==",
"peer": true
},
"node_modules/openid-client": {
"version": "6.8.1",
"resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.1.tgz",
@@ -5311,78 +5188,6 @@
}
]
},
"node_modules/swagger-jsdoc": {
"version": "6.2.8",
"resolved": "https://registry.npmjs.org/swagger-jsdoc/-/swagger-jsdoc-6.2.8.tgz",
"integrity": "sha512-VPvil1+JRpmJ55CgAtn8DIcpBs0bL5L3q5bVQvF4tAW/k/9JYSj7dCpaYCAv5rufe0vcCbBRQXGvzpkWjvLklQ==",
"dependencies": {
"commander": "6.2.0",
"doctrine": "3.0.0",
"glob": "7.1.6",
"lodash.mergewith": "^4.6.2",
"swagger-parser": "^10.0.3",
"yaml": "2.0.0-1"
},
"bin": {
"swagger-jsdoc": "bin/swagger-jsdoc.js"
},
"engines": {
"node": ">=12.0.0"
}
},
"node_modules/swagger-jsdoc/node_modules/glob": {
"version": "7.1.6",
"resolved": "https://registry.npmjs.org/glob/-/glob-7.1.6.tgz",
"integrity": "sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==",
"deprecated": "Glob versions prior to v9 are no longer supported",
"dependencies": {
"fs.realpath": "^1.0.0",
"inflight": "^1.0.4",
"inherits": "2",
"minimatch": "^3.0.4",
"once": "^1.3.0",
"path-is-absolute": "^1.0.0"
},
"engines": {
"node": "*"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/swagger-parser": {
"version": "10.0.3",
"resolved": "https://registry.npmjs.org/swagger-parser/-/swagger-parser-10.0.3.tgz",
"integrity": "sha512-nF7oMeL4KypldrQhac8RyHerJeGPD1p2xDh900GPvc+Nk7nWP6jX2FcC7WmkinMoAmoO774+AFXcWsW8gMWEIg==",
"dependencies": {
"@apidevtools/swagger-parser": "10.0.3"
},
"engines": {
"node": ">=10"
}
},
"node_modules/swagger-ui-dist": {
"version": "5.31.0",
"resolved": "https://registry.npmjs.org/swagger-ui-dist/-/swagger-ui-dist-5.31.0.tgz",
"integrity": "sha512-zSUTIck02fSga6rc0RZP3b7J7wgHXwLea8ZjgLA3Vgnb8QeOl3Wou2/j5QkzSGeoz6HusP/coYuJl33aQxQZpg==",
"dependencies": {
"@scarf/scarf": "=1.4.0"
}
},
"node_modules/swagger-ui-express": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/swagger-ui-express/-/swagger-ui-express-5.0.1.tgz",
"integrity": "sha512-SrNU3RiBGTLLmFU8GIJdOdanJTl4TOmT27tt3bWWHppqYmAZ6IDuEuBvMU6nZq0zLEe6b/1rACXCgLZqO6ZfrA==",
"dependencies": {
"swagger-ui-dist": ">=5.0.0"
},
"engines": {
"node": ">= v0.10.32"
},
"peerDependencies": {
"express": ">=4.0.0 || >=5.0.0-beta"
}
},
"node_modules/tar": {
"version": "6.2.1",
"resolved": "https://registry.npmjs.org/tar/-/tar-6.2.1.tgz",
@@ -5601,14 +5406,6 @@
"uuid": "dist/bin/uuid"
}
},
"node_modules/validator": {
"version": "13.15.23",
"resolved": "https://registry.npmjs.org/validator/-/validator-13.15.23.tgz",
"integrity": "sha512-4yoz1kEWqUjzi5zsPbAS/903QXSYp0UOtHsPpp7p9rHAw/W+dkInskAE386Fat3oKRROwO98d9ZB0G4cObgUyw==",
"engines": {
"node": ">= 0.10"
}
},
"node_modules/vary": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
@@ -5787,14 +5584,6 @@
"resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz",
"integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A=="
},
"node_modules/yaml": {
"version": "2.0.0-1",
"resolved": "https://registry.npmjs.org/yaml/-/yaml-2.0.0-1.tgz",
"integrity": "sha512-W7h5dEhywMKenDJh2iX/LABkbFnBxasD27oyXWDS/feDsxiw0dD5ncXdYXgkvAsXIY2MpW/ZKkr9IU30DBdMNQ==",
"engines": {
"node": ">= 6"
}
},
"node_modules/yargs": {
"version": "17.7.2",
"resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
@@ -5829,34 +5618,6 @@
"fd-slicer": "~1.1.0"
}
},
"node_modules/z-schema": {
"version": "5.0.5",
"resolved": "https://registry.npmjs.org/z-schema/-/z-schema-5.0.5.tgz",
"integrity": "sha512-D7eujBWkLa3p2sIpJA0d1pr7es+a7m0vFAnZLlCEKq/Ij2k0MLi9Br2UPxoxdYystm5K1yeBGzub0FlYUEWj2Q==",
"dependencies": {
"lodash.get": "^4.4.2",
"lodash.isequal": "^4.5.0",
"validator": "^13.7.0"
},
"bin": {
"z-schema": "bin/z-schema"
},
"engines": {
"node": ">=8.0.0"
},
"optionalDependencies": {
"commander": "^9.4.1"
}
},
"node_modules/z-schema/node_modules/commander": {
"version": "9.5.0",
"resolved": "https://registry.npmjs.org/commander/-/commander-9.5.0.tgz",
"integrity": "sha512-KRs7WVDKg86PWiuAqhDrAQnTXZKraVcCc6vFdL14qrZ/DcWwuRo7VoiYXalXO7S5GKpqYiVEwCbgFDfxNHKJBQ==",
"optional": true,
"engines": {
"node": "^12.20.0 || >=14"
}
},
"node_modules/zod": {
"version": "3.25.76",
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",

View File

@@ -49,8 +49,6 @@
"puppeteer-extra-plugin-stealth": "^2.11.2",
"sharp": "^0.32.0",
"socks-proxy-agent": "^8.0.2",
"swagger-jsdoc": "^6.2.8",
"swagger-ui-express": "^5.0.1",
"user-agents": "^1.1.669",
"uuid": "^9.0.1",
"zod": "^3.22.4"
@@ -63,8 +61,6 @@
"@types/node": "^20.10.5",
"@types/node-cron": "^3.0.11",
"@types/pg": "^8.15.6",
"@types/swagger-jsdoc": "^6.0.4",
"@types/swagger-ui-express": "^4.1.8",
"@types/uuid": "^9.0.7",
"tsx": "^4.7.0",
"typescript": "^5.3.3"

View File

@@ -1,46 +0,0 @@
# DEPRECATED CODE - DO NOT USE
**These directories contain OLD, ABANDONED code.**
## What's Here
| Directory | What It Was | Why Deprecated |
|-----------|-------------|----------------|
| `hydration/` | Old pipeline for processing crawl data | Replaced by `src/tasks/handlers/` |
| `scraper-v2/` | Old Puppeteer-based scraper engine | Replaced by curl-based `src/platforms/dutchie/client.ts` |
| `canonical-hydration/` | Intermediate step toward canonical schema | Merged into task handlers |
## What to Use Instead
| Old (DONT USE) | New (USE THIS) |
|----------------|----------------|
| `hydration/normalizers/dutchie.ts` | `src/tasks/handlers/product-refresh.ts` |
| `hydration/producer.ts` | `src/tasks/handlers/payload-fetch.ts` |
| `scraper-v2/engine.ts` | `src/platforms/dutchie/client.ts` |
| `scraper-v2/scheduler.ts` | `src/services/task-scheduler.ts` |
## Why Keep This Code?
- Historical reference only
- Some patterns may be useful for debugging
- Will be deleted once confirmed not needed
## Claude Instructions
**IF YOU ARE CLAUDE:**
1. NEVER import from `src/_deprecated/`
2. NEVER reference these files as examples
3. NEVER try to "fix" or "update" code in here
4. If you see imports from these directories, suggest replacing them
**Correct imports:**
```typescript
// GOOD
import { executeGraphQL } from '../platforms/dutchie/client';
import { pool } from '../db/pool';
// BAD - DO NOT USE
import { something } from '../_deprecated/hydration/...';
import { something } from '../_deprecated/scraper-v2/...';
```

View File

@@ -1,584 +0,0 @@
/**
* System API Routes
*
* Provides REST API endpoints for system monitoring and control:
* - /api/system/sync/* - Sync orchestrator
* - /api/system/dlq/* - Dead-letter queue
* - /api/system/integrity/* - Integrity checks
* - /api/system/fix/* - Auto-fix routines
* - /api/system/alerts/* - System alerts
* - /metrics - Prometheus metrics
*
* Phase 5: Full Production Sync + Monitoring
*/
import { Router, Request, Response } from 'express';
import { Pool } from 'pg';
import {
SyncOrchestrator,
MetricsService,
DLQService,
AlertService,
IntegrityService,
AutoFixService,
} from '../services';
export function createSystemRouter(pool: Pool): Router {
const router = Router();
// Initialize services
const metrics = new MetricsService(pool);
const dlq = new DLQService(pool);
const alerts = new AlertService(pool);
const integrity = new IntegrityService(pool, alerts);
const autoFix = new AutoFixService(pool, alerts);
const orchestrator = new SyncOrchestrator(pool, metrics, dlq, alerts);
// ============================================================
// SYNC ORCHESTRATOR ENDPOINTS
// ============================================================
/**
* GET /api/system/sync/status
* Get current sync status
*/
router.get('/sync/status', async (_req: Request, res: Response) => {
try {
const status = await orchestrator.getStatus();
res.json(status);
} catch (error) {
console.error('[System] Sync status error:', error);
res.status(500).json({ error: 'Failed to get sync status' });
}
});
/**
* POST /api/system/sync/run
* Trigger a sync run
*/
router.post('/sync/run', async (req: Request, res: Response) => {
try {
const triggeredBy = req.body.triggeredBy || 'api';
const result = await orchestrator.runSync();
res.json({
success: true,
triggeredBy,
metrics: result,
});
} catch (error) {
console.error('[System] Sync run error:', error);
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : 'Sync run failed',
});
}
});
/**
* GET /api/system/sync/queue-depth
* Get queue depth information
*/
router.get('/sync/queue-depth', async (_req: Request, res: Response) => {
try {
const depth = await orchestrator.getQueueDepth();
res.json(depth);
} catch (error) {
console.error('[System] Queue depth error:', error);
res.status(500).json({ error: 'Failed to get queue depth' });
}
});
/**
* GET /api/system/sync/health
* Get sync health status
*/
router.get('/sync/health', async (_req: Request, res: Response) => {
try {
const health = await orchestrator.getHealth();
res.status(health.healthy ? 200 : 503).json(health);
} catch (error) {
console.error('[System] Health check error:', error);
res.status(500).json({ healthy: false, error: 'Health check failed' });
}
});
/**
* POST /api/system/sync/pause
* Pause the orchestrator
*/
router.post('/sync/pause', async (req: Request, res: Response) => {
try {
const reason = req.body.reason || 'Manual pause';
await orchestrator.pause(reason);
res.json({ success: true, message: 'Orchestrator paused' });
} catch (error) {
console.error('[System] Pause error:', error);
res.status(500).json({ error: 'Failed to pause orchestrator' });
}
});
/**
* POST /api/system/sync/resume
* Resume the orchestrator
*/
router.post('/sync/resume', async (_req: Request, res: Response) => {
try {
await orchestrator.resume();
res.json({ success: true, message: 'Orchestrator resumed' });
} catch (error) {
console.error('[System] Resume error:', error);
res.status(500).json({ error: 'Failed to resume orchestrator' });
}
});
// ============================================================
// DLQ ENDPOINTS
// ============================================================
/**
* GET /api/system/dlq
* List DLQ payloads
*/
router.get('/dlq', async (req: Request, res: Response) => {
try {
const options = {
status: req.query.status as string,
errorType: req.query.errorType as string,
dispensaryId: req.query.dispensaryId ? parseInt(req.query.dispensaryId as string) : undefined,
limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
};
const result = await dlq.listPayloads(options);
res.json(result);
} catch (error) {
console.error('[System] DLQ list error:', error);
res.status(500).json({ error: 'Failed to list DLQ payloads' });
}
});
/**
* GET /api/system/dlq/stats
* Get DLQ statistics
*/
router.get('/dlq/stats', async (_req: Request, res: Response) => {
try {
const stats = await dlq.getStats();
res.json(stats);
} catch (error) {
console.error('[System] DLQ stats error:', error);
res.status(500).json({ error: 'Failed to get DLQ stats' });
}
});
/**
* GET /api/system/dlq/summary
* Get DLQ summary by error type
*/
router.get('/dlq/summary', async (_req: Request, res: Response) => {
try {
const summary = await dlq.getSummary();
res.json(summary);
} catch (error) {
console.error('[System] DLQ summary error:', error);
res.status(500).json({ error: 'Failed to get DLQ summary' });
}
});
/**
* GET /api/system/dlq/:id
* Get a specific DLQ payload
*/
router.get('/dlq/:id', async (req: Request, res: Response) => {
try {
const payload = await dlq.getPayload(req.params.id);
if (!payload) {
return res.status(404).json({ error: 'Payload not found' });
}
res.json(payload);
} catch (error) {
console.error('[System] DLQ get error:', error);
res.status(500).json({ error: 'Failed to get DLQ payload' });
}
});
/**
* POST /api/system/dlq/:id/retry
* Retry a DLQ payload
*/
router.post('/dlq/:id/retry', async (req: Request, res: Response) => {
try {
const result = await dlq.retryPayload(req.params.id);
if (result.success) {
res.json(result);
} else {
res.status(400).json(result);
}
} catch (error) {
console.error('[System] DLQ retry error:', error);
res.status(500).json({ error: 'Failed to retry payload' });
}
});
/**
* POST /api/system/dlq/:id/abandon
* Abandon a DLQ payload
*/
router.post('/dlq/:id/abandon', async (req: Request, res: Response) => {
try {
const reason = req.body.reason || 'Manually abandoned';
const abandonedBy = req.body.abandonedBy || 'api';
const success = await dlq.abandonPayload(req.params.id, reason, abandonedBy);
res.json({ success });
} catch (error) {
console.error('[System] DLQ abandon error:', error);
res.status(500).json({ error: 'Failed to abandon payload' });
}
});
/**
* POST /api/system/dlq/bulk-retry
* Bulk retry payloads by error type
*/
router.post('/dlq/bulk-retry', async (req: Request, res: Response) => {
try {
const { errorType } = req.body;
if (!errorType) {
return res.status(400).json({ error: 'errorType is required' });
}
const result = await dlq.bulkRetryByErrorType(errorType);
res.json(result);
} catch (error) {
console.error('[System] DLQ bulk retry error:', error);
res.status(500).json({ error: 'Failed to bulk retry' });
}
});
// ============================================================
// INTEGRITY CHECK ENDPOINTS
// ============================================================
/**
* POST /api/system/integrity/run
* Run all integrity checks
*/
router.post('/integrity/run', async (req: Request, res: Response) => {
try {
const triggeredBy = req.body.triggeredBy || 'api';
const result = await integrity.runAllChecks(triggeredBy);
res.json(result);
} catch (error) {
console.error('[System] Integrity run error:', error);
res.status(500).json({ error: 'Failed to run integrity checks' });
}
});
/**
* GET /api/system/integrity/runs
* Get recent integrity check runs
*/
router.get('/integrity/runs', async (req: Request, res: Response) => {
try {
const limit = req.query.limit ? parseInt(req.query.limit as string) : 10;
const runs = await integrity.getRecentRuns(limit);
res.json(runs);
} catch (error) {
console.error('[System] Integrity runs error:', error);
res.status(500).json({ error: 'Failed to get integrity runs' });
}
});
/**
* GET /api/system/integrity/runs/:runId
* Get results for a specific integrity run
*/
router.get('/integrity/runs/:runId', async (req: Request, res: Response) => {
try {
const results = await integrity.getRunResults(req.params.runId);
res.json(results);
} catch (error) {
console.error('[System] Integrity run results error:', error);
res.status(500).json({ error: 'Failed to get run results' });
}
});
// ============================================================
// AUTO-FIX ENDPOINTS
// ============================================================
/**
* GET /api/system/fix/routines
* Get available fix routines
*/
router.get('/fix/routines', (_req: Request, res: Response) => {
try {
const routines = autoFix.getAvailableRoutines();
res.json(routines);
} catch (error) {
console.error('[System] Get routines error:', error);
res.status(500).json({ error: 'Failed to get routines' });
}
});
/**
* POST /api/system/fix/:routine
* Run a fix routine
*/
router.post('/fix/:routine', async (req: Request, res: Response) => {
try {
const routineName = req.params.routine;
const dryRun = req.body.dryRun === true;
const triggeredBy = req.body.triggeredBy || 'api';
const result = await autoFix.runRoutine(routineName as any, triggeredBy, { dryRun });
res.json(result);
} catch (error) {
console.error('[System] Fix routine error:', error);
res.status(500).json({ error: 'Failed to run fix routine' });
}
});
/**
* GET /api/system/fix/runs
* Get recent fix runs
*/
router.get('/fix/runs', async (req: Request, res: Response) => {
try {
const limit = req.query.limit ? parseInt(req.query.limit as string) : 20;
const runs = await autoFix.getRecentRuns(limit);
res.json(runs);
} catch (error) {
console.error('[System] Fix runs error:', error);
res.status(500).json({ error: 'Failed to get fix runs' });
}
});
// ============================================================
// ALERTS ENDPOINTS
// ============================================================
/**
* GET /api/system/alerts
* List alerts
*/
router.get('/alerts', async (req: Request, res: Response) => {
try {
const options = {
status: req.query.status as any,
severity: req.query.severity as any,
type: req.query.type as string,
limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
};
const result = await alerts.listAlerts(options);
res.json(result);
} catch (error) {
console.error('[System] Alerts list error:', error);
res.status(500).json({ error: 'Failed to list alerts' });
}
});
/**
* GET /api/system/alerts/active
* Get active alerts
*/
router.get('/alerts/active', async (_req: Request, res: Response) => {
try {
const activeAlerts = await alerts.getActiveAlerts();
res.json(activeAlerts);
} catch (error) {
console.error('[System] Active alerts error:', error);
res.status(500).json({ error: 'Failed to get active alerts' });
}
});
/**
* GET /api/system/alerts/summary
* Get alert summary
*/
router.get('/alerts/summary', async (_req: Request, res: Response) => {
try {
const summary = await alerts.getSummary();
res.json(summary);
} catch (error) {
console.error('[System] Alerts summary error:', error);
res.status(500).json({ error: 'Failed to get alerts summary' });
}
});
/**
* POST /api/system/alerts/:id/acknowledge
* Acknowledge an alert
*/
router.post('/alerts/:id/acknowledge', async (req: Request, res: Response) => {
try {
const alertId = parseInt(req.params.id);
const acknowledgedBy = req.body.acknowledgedBy || 'api';
const success = await alerts.acknowledgeAlert(alertId, acknowledgedBy);
res.json({ success });
} catch (error) {
console.error('[System] Acknowledge alert error:', error);
res.status(500).json({ error: 'Failed to acknowledge alert' });
}
});
/**
* POST /api/system/alerts/:id/resolve
* Resolve an alert
*/
router.post('/alerts/:id/resolve', async (req: Request, res: Response) => {
try {
const alertId = parseInt(req.params.id);
const resolvedBy = req.body.resolvedBy || 'api';
const success = await alerts.resolveAlert(alertId, resolvedBy);
res.json({ success });
} catch (error) {
console.error('[System] Resolve alert error:', error);
res.status(500).json({ error: 'Failed to resolve alert' });
}
});
/**
* POST /api/system/alerts/bulk-acknowledge
* Bulk acknowledge alerts
*/
router.post('/alerts/bulk-acknowledge', async (req: Request, res: Response) => {
try {
const { ids, acknowledgedBy } = req.body;
if (!ids || !Array.isArray(ids)) {
return res.status(400).json({ error: 'ids array is required' });
}
const count = await alerts.bulkAcknowledge(ids, acknowledgedBy || 'api');
res.json({ acknowledged: count });
} catch (error) {
console.error('[System] Bulk acknowledge error:', error);
res.status(500).json({ error: 'Failed to bulk acknowledge' });
}
});
// ============================================================
// METRICS ENDPOINTS
// ============================================================
/**
* GET /api/system/metrics
* Get all current metrics
*/
router.get('/metrics', async (_req: Request, res: Response) => {
try {
const allMetrics = await metrics.getAllMetrics();
res.json(allMetrics);
} catch (error) {
console.error('[System] Metrics error:', error);
res.status(500).json({ error: 'Failed to get metrics' });
}
});
/**
* GET /api/system/metrics/:name
* Get a specific metric
*/
router.get('/metrics/:name', async (req: Request, res: Response) => {
try {
const metric = await metrics.getMetric(req.params.name);
if (!metric) {
return res.status(404).json({ error: 'Metric not found' });
}
res.json(metric);
} catch (error) {
console.error('[System] Metric error:', error);
res.status(500).json({ error: 'Failed to get metric' });
}
});
/**
* GET /api/system/metrics/:name/history
* Get metric time series
*/
router.get('/metrics/:name/history', async (req: Request, res: Response) => {
try {
const hours = req.query.hours ? parseInt(req.query.hours as string) : 24;
const history = await metrics.getMetricHistory(req.params.name, hours);
res.json(history);
} catch (error) {
console.error('[System] Metric history error:', error);
res.status(500).json({ error: 'Failed to get metric history' });
}
});
/**
* GET /api/system/errors
* Get error summary
*/
router.get('/errors', async (_req: Request, res: Response) => {
try {
const summary = await metrics.getErrorSummary();
res.json(summary);
} catch (error) {
console.error('[System] Error summary error:', error);
res.status(500).json({ error: 'Failed to get error summary' });
}
});
/**
* GET /api/system/errors/recent
* Get recent errors
*/
router.get('/errors/recent', async (req: Request, res: Response) => {
try {
const limit = req.query.limit ? parseInt(req.query.limit as string) : 50;
const errorType = req.query.type as string;
const errors = await metrics.getRecentErrors(limit, errorType);
res.json(errors);
} catch (error) {
console.error('[System] Recent errors error:', error);
res.status(500).json({ error: 'Failed to get recent errors' });
}
});
/**
* POST /api/system/errors/acknowledge
* Acknowledge errors
*/
router.post('/errors/acknowledge', async (req: Request, res: Response) => {
try {
const { ids, acknowledgedBy } = req.body;
if (!ids || !Array.isArray(ids)) {
return res.status(400).json({ error: 'ids array is required' });
}
const count = await metrics.acknowledgeErrors(ids, acknowledgedBy || 'api');
res.json({ acknowledged: count });
} catch (error) {
console.error('[System] Acknowledge errors error:', error);
res.status(500).json({ error: 'Failed to acknowledge errors' });
}
});
return router;
}
/**
* Create Prometheus metrics endpoint (standalone)
*/
export function createPrometheusRouter(pool: Pool): Router {
const router = Router();
const metrics = new MetricsService(pool);
/**
* GET /metrics
* Prometheus-compatible metrics endpoint
*/
router.get('/', async (_req: Request, res: Response) => {
try {
const prometheusOutput = await metrics.getPrometheusMetrics();
res.set('Content-Type', 'text/plain; version=0.0.4');
res.send(prometheusOutput);
} catch (error) {
console.error('[Prometheus] Metrics error:', error);
res.status(500).send('# Error generating metrics');
}
});
return router;
}

View File

@@ -7,7 +7,6 @@
*
* NO username/password auth in API. Use tokens only.
*
* Trusted origins are managed via /admin and stored in the trusted_origins table.
* Localhost bypass: curl from 127.0.0.1 gets automatic admin access.
*/
import { Request, Response, NextFunction } from 'express';
@@ -17,8 +16,8 @@ import { pool } from '../db/pool';
const JWT_SECRET = process.env.JWT_SECRET || 'change_this_in_production';
// Fallback trusted origins (used if DB unavailable)
const FALLBACK_TRUSTED_ORIGINS = [
// Trusted origins that bypass auth for internal/same-origin requests
const TRUSTED_ORIGINS = [
'https://cannaiq.co',
'https://www.cannaiq.co',
'https://findadispo.com',
@@ -30,108 +29,31 @@ const FALLBACK_TRUSTED_ORIGINS = [
'http://localhost:5173',
];
const FALLBACK_TRUSTED_PATTERNS = [
/^https:\/\/.*\.cannabrands\.app$/,
/^https:\/\/.*\.cannaiq\.co$/,
// Pattern-based trusted origins (wildcards)
const TRUSTED_ORIGIN_PATTERNS = [
/^https:\/\/.*\.cannabrands\.app$/, // *.cannabrands.app
/^https:\/\/.*\.cannaiq\.co$/, // *.cannaiq.co
];
const FALLBACK_TRUSTED_IPS = [
// Trusted IPs for internal pod-to-pod communication
const TRUSTED_IPS = [
'127.0.0.1',
'::1',
'::ffff:127.0.0.1',
];
// Cache for DB-backed trusted origins
let trustedOriginsCache: {
ips: Set<string>;
domains: Set<string>;
patterns: RegExp[];
loadedAt: Date;
} | null = null;
/**
* Load trusted origins from DB with caching (5 min TTL)
*/
async function loadTrustedOrigins(): Promise<{
ips: Set<string>;
domains: Set<string>;
patterns: RegExp[];
}> {
// Return cached if fresh
if (trustedOriginsCache) {
const age = Date.now() - trustedOriginsCache.loadedAt.getTime();
if (age < 5 * 60 * 1000) {
return trustedOriginsCache;
}
}
try {
const result = await pool.query(`
SELECT origin_type, origin_value
FROM trusted_origins
WHERE active = true
`);
const ips = new Set<string>();
const domains = new Set<string>();
const patterns: RegExp[] = [];
for (const row of result.rows) {
switch (row.origin_type) {
case 'ip':
ips.add(row.origin_value);
break;
case 'domain':
// Store as full origin for comparison
if (!row.origin_value.startsWith('http')) {
domains.add(`https://${row.origin_value}`);
domains.add(`http://${row.origin_value}`);
} else {
domains.add(row.origin_value);
}
break;
case 'pattern':
try {
patterns.push(new RegExp(row.origin_value));
} catch {
console.warn(`[Auth] Invalid trusted origin pattern: ${row.origin_value}`);
}
break;
}
}
trustedOriginsCache = { ips, domains, patterns, loadedAt: new Date() };
return trustedOriginsCache;
} catch (error) {
// DB not available or table doesn't exist - use fallbacks
return {
ips: new Set(FALLBACK_TRUSTED_IPS),
domains: new Set(FALLBACK_TRUSTED_ORIGINS),
patterns: FALLBACK_TRUSTED_PATTERNS,
};
}
}
/**
* Clear trusted origins cache (called when admin updates origins)
*/
export function clearTrustedOriginsCache() {
trustedOriginsCache = null;
}
/**
* Check if request is from a trusted origin/IP
*/
async function isTrustedRequest(req: Request): Promise<boolean> {
const { ips, domains, patterns } = await loadTrustedOrigins();
function isTrustedRequest(req: Request): boolean {
// Check origin header
const origin = req.headers.origin;
if (origin) {
if (domains.has(origin)) {
if (TRUSTED_ORIGINS.includes(origin)) {
return true;
}
for (const pattern of patterns) {
// Check pattern-based origins (wildcards like *.cannabrands.app)
for (const pattern of TRUSTED_ORIGIN_PATTERNS) {
if (pattern.test(origin)) {
return true;
}
@@ -141,15 +63,16 @@ async function isTrustedRequest(req: Request): Promise<boolean> {
// Check referer header (for same-origin requests without CORS)
const referer = req.headers.referer;
if (referer) {
for (const trusted of domains) {
for (const trusted of TRUSTED_ORIGINS) {
if (referer.startsWith(trusted)) {
return true;
}
}
// Check pattern-based referers
try {
const refererUrl = new URL(referer);
const refererOrigin = refererUrl.origin;
for (const pattern of patterns) {
for (const pattern of TRUSTED_ORIGIN_PATTERNS) {
if (pattern.test(refererOrigin)) {
return true;
}
@@ -161,7 +84,7 @@ async function isTrustedRequest(req: Request): Promise<boolean> {
// Check IP for internal requests (pod-to-pod, localhost)
const clientIp = req.ip || req.socket.remoteAddress || '';
if (ips.has(clientIp)) {
if (TRUSTED_IPS.includes(clientIp)) {
return true;
}
@@ -277,7 +200,7 @@ export async function authMiddleware(req: AuthRequest, res: Response, next: Next
}
// No token provided - check trusted origins for API access (WordPress, etc.)
if (await isTrustedRequest(req)) {
if (isTrustedRequest(req)) {
req.user = {
id: 0,
email: 'internal@system',

View File

@@ -109,7 +109,7 @@ import scraperMonitorRoutes from './routes/scraper-monitor';
import apiTokensRoutes from './routes/api-tokens';
import apiPermissionsRoutes from './routes/api-permissions';
import parallelScrapeRoutes from './routes/parallel-scrape';
// crawler-sandbox moved to _deprecated
import crawlerSandboxRoutes from './routes/crawler-sandbox';
import versionRoutes from './routes/version';
import deployStatusRoutes from './routes/deploy-status';
import publicApiRoutes from './routes/public-api';
@@ -146,9 +146,6 @@ import tasksRoutes from './routes/tasks';
import workerRegistryRoutes from './routes/worker-registry';
// Per TASK_WORKFLOW_2024-12-10.md: Raw payload access API
import payloadsRoutes from './routes/payloads';
import k8sRoutes from './routes/k8s';
import trustedOriginsRoutes from './routes/trusted-origins';
// Mark requests from trusted domains (cannaiq.co, findagram.co, findadispo.com)
// These domains can access the API without authentication
@@ -189,7 +186,7 @@ app.use('/api/scraper-monitor', scraperMonitorRoutes);
app.use('/api/api-tokens', apiTokensRoutes);
app.use('/api/api-permissions', apiPermissionsRoutes);
app.use('/api/parallel-scrape', parallelScrapeRoutes);
// crawler-sandbox moved to _deprecated
app.use('/api/crawler-sandbox', crawlerSandboxRoutes);
app.use('/api/version', versionRoutes);
app.use('/api/admin/deploy-status', deployStatusRoutes);
console.log('[DeployStatus] Routes registered at /api/admin/deploy-status');
@@ -202,10 +199,6 @@ app.use('/api/admin/orchestrator', orchestratorAdminRoutes);
app.use('/api/admin/debug', adminDebugRoutes);
console.log('[AdminDebug] Routes registered at /api/admin/debug');
// Admin routes - trusted origins management (IPs, domains that bypass auth)
app.use('/api/admin/trusted-origins', trustedOriginsRoutes);
console.log('[TrustedOrigins] Routes registered at /api/admin/trusted-origins');
// Admin routes - intelligence (brands, pricing analytics)
app.use('/api/admin/intelligence', intelligenceRoutes);
console.log('[Intelligence] Routes registered at /api/admin/intelligence');
@@ -237,10 +230,6 @@ console.log('[WorkerRegistry] Routes registered at /api/worker-registry');
app.use('/api/payloads', payloadsRoutes);
console.log('[Payloads] Routes registered at /api/payloads');
// K8s control routes - worker scaling from admin UI
app.use('/api/k8s', k8sRoutes);
console.log('[K8s] Routes registered at /api/k8s');
// Phase 3: Analytics V2 - Enhanced analytics with rec/med state segmentation
try {
const analyticsV2Router = createAnalyticsV2Router(getPool());

View File

@@ -47,27 +47,4 @@ router.post('/refresh', authMiddleware, async (req: AuthRequest, res) => {
res.json({ token });
});
// Verify password for sensitive actions (requires current user to be authenticated)
router.post('/verify-password', authMiddleware, async (req: AuthRequest, res) => {
try {
const { password } = req.body;
if (!password) {
return res.status(400).json({ error: 'Password required' });
}
// Re-authenticate the current user with the provided password
const user = await authenticateUser(req.user!.email, password);
if (!user) {
return res.status(401).json({ error: 'Invalid password', verified: false });
}
res.json({ verified: true });
} catch (error) {
console.error('Password verification error:', error);
res.status(500).json({ error: 'Internal server error' });
}
});
export default router;

View File

@@ -1,140 +0,0 @@
/**
* Kubernetes Control Routes
*
* Provides admin UI control over k8s resources like worker scaling.
* Uses in-cluster config when running in k8s, or kubeconfig locally.
*/
import { Router, Request, Response } from 'express';
import * as k8s from '@kubernetes/client-node';
const router = Router();
// K8s client setup - lazy initialization
let appsApi: k8s.AppsV1Api | null = null;
let k8sError: string | null = null;
function getK8sClient(): k8s.AppsV1Api | null {
if (appsApi) return appsApi;
if (k8sError) return null;
try {
const kc = new k8s.KubeConfig();
// Try in-cluster config first (when running in k8s)
try {
kc.loadFromCluster();
console.log('[K8s] Loaded in-cluster config');
} catch {
// Fall back to default kubeconfig (local dev)
try {
kc.loadFromDefault();
console.log('[K8s] Loaded default kubeconfig');
} catch (e) {
k8sError = 'No k8s config available';
console.log('[K8s] No config available - k8s routes disabled');
return null;
}
}
appsApi = kc.makeApiClient(k8s.AppsV1Api);
return appsApi;
} catch (e: any) {
k8sError = e.message;
console.error('[K8s] Failed to initialize client:', e.message);
return null;
}
}
const NAMESPACE = process.env.K8S_NAMESPACE || 'dispensary-scraper';
const WORKER_DEPLOYMENT = 'scraper-worker';
/**
* GET /api/k8s/workers
* Get current worker deployment status
*/
router.get('/workers', async (_req: Request, res: Response) => {
const client = getK8sClient();
if (!client) {
return res.json({
success: true,
available: false,
error: k8sError || 'K8s not available',
replicas: 0,
readyReplicas: 0,
});
}
try {
const deployment = await client.readNamespacedDeployment({
name: WORKER_DEPLOYMENT,
namespace: NAMESPACE,
});
res.json({
success: true,
available: true,
replicas: deployment.spec?.replicas || 0,
readyReplicas: deployment.status?.readyReplicas || 0,
availableReplicas: deployment.status?.availableReplicas || 0,
updatedReplicas: deployment.status?.updatedReplicas || 0,
});
} catch (e: any) {
console.error('[K8s] Error getting deployment:', e.message);
res.status(500).json({
success: false,
error: e.message,
});
}
});
/**
* POST /api/k8s/workers/scale
* Scale worker deployment
* Body: { replicas: number }
*/
router.post('/workers/scale', async (req: Request, res: Response) => {
const client = getK8sClient();
if (!client) {
return res.status(503).json({
success: false,
error: k8sError || 'K8s not available',
});
}
const { replicas } = req.body;
if (typeof replicas !== 'number' || replicas < 0 || replicas > 50) {
return res.status(400).json({
success: false,
error: 'replicas must be a number between 0 and 50',
});
}
try {
// Patch the deployment to set replicas
await client.patchNamespacedDeploymentScale({
name: WORKER_DEPLOYMENT,
namespace: NAMESPACE,
body: { spec: { replicas } },
});
console.log(`[K8s] Scaled ${WORKER_DEPLOYMENT} to ${replicas} replicas`);
res.json({
success: true,
replicas,
message: `Scaled to ${replicas} workers`,
});
} catch (e: any) {
console.error('[K8s] Error scaling deployment:', e.message);
res.status(500).json({
success: false,
error: e.message,
});
}
});
export default router;

View File

@@ -28,55 +28,8 @@ const router = Router();
const getDbPool = (): Pool => getPool() as unknown as Pool;
/**
* @swagger
* /payloads:
* get:
* summary: List payload metadata
* description: Returns paginated list of raw crawl payload metadata. Does not include the actual payload data.
* tags: [Payloads]
* parameters:
* - in: query
* name: limit
* schema:
* type: integer
* default: 50
* maximum: 100
* description: Number of payloads to return
* - in: query
* name: offset
* schema:
* type: integer
* default: 0
* description: Number of payloads to skip
* - in: query
* name: dispensary_id
* schema:
* type: integer
* description: Filter by dispensary ID
* responses:
* 200:
* description: List of payload metadata
* content:
* application/json:
* schema:
* type: object
* properties:
* success:
* type: boolean
* example: true
* payloads:
* type: array
* items:
* $ref: '#/components/schemas/PayloadMetadata'
* pagination:
* type: object
* properties:
* limit:
* type: integer
* offset:
* type: integer
* 500:
* description: Server error
* GET /api/payloads
* List payload metadata (paginated)
*/
router.get('/', async (req: Request, res: Response) => {
try {
@@ -103,35 +56,8 @@ router.get('/', async (req: Request, res: Response) => {
});
/**
* @swagger
* /payloads/{id}:
* get:
* summary: Get payload metadata by ID
* description: Returns metadata for a specific payload including dispensary name, size, and timestamps.
* tags: [Payloads]
* parameters:
* - in: path
* name: id
* required: true
* schema:
* type: integer
* description: Payload ID
* responses:
* 200:
* description: Payload metadata
* content:
* application/json:
* schema:
* type: object
* properties:
* success:
* type: boolean
* payload:
* $ref: '#/components/schemas/PayloadMetadata'
* 404:
* description: Payload not found
* 500:
* description: Server error
* GET /api/payloads/:id
* Get payload metadata by ID
*/
router.get('/:id', async (req: Request, res: Response) => {
try {
@@ -171,43 +97,8 @@ router.get('/:id', async (req: Request, res: Response) => {
});
/**
* @swagger
* /payloads/{id}/data:
* get:
* summary: Get full payload data
* description: Returns the complete raw crawl payload JSON, decompressed from disk. This includes all products from the crawl.
* tags: [Payloads]
* parameters:
* - in: path
* name: id
* required: true
* schema:
* type: integer
* description: Payload ID
* responses:
* 200:
* description: Full payload data
* content:
* application/json:
* schema:
* type: object
* properties:
* success:
* type: boolean
* metadata:
* $ref: '#/components/schemas/PayloadMetadata'
* data:
* type: object
* description: Raw GraphQL response with products array
* properties:
* products:
* type: array
* items:
* type: object
* 404:
* description: Payload not found
* 500:
* description: Server error
* GET /api/payloads/:id/data
* Get full payload JSON (decompressed from disk)
*/
router.get('/:id/data', async (req: Request, res: Response) => {
try {
@@ -232,48 +123,8 @@ router.get('/:id/data', async (req: Request, res: Response) => {
});
/**
* @swagger
* /payloads/store/{dispensaryId}:
* get:
* summary: List payloads for a store
* description: Returns paginated list of payload metadata for a specific dispensary.
* tags: [Payloads]
* parameters:
* - in: path
* name: dispensaryId
* required: true
* schema:
* type: integer
* description: Dispensary ID
* - in: query
* name: limit
* schema:
* type: integer
* default: 20
* maximum: 100
* - in: query
* name: offset
* schema:
* type: integer
* default: 0
* responses:
* 200:
* description: List of payloads for store
* content:
* application/json:
* schema:
* type: object
* properties:
* success:
* type: boolean
* dispensaryId:
* type: integer
* payloads:
* type: array
* items:
* $ref: '#/components/schemas/PayloadMetadata'
* 500:
* description: Server error
* GET /api/payloads/store/:dispensaryId
* List payloads for a specific store
*/
router.get('/store/:dispensaryId', async (req: Request, res: Response) => {
try {
@@ -301,42 +152,8 @@ router.get('/store/:dispensaryId', async (req: Request, res: Response) => {
});
/**
* @swagger
* /payloads/store/{dispensaryId}/latest:
* get:
* summary: Get latest payload for a store
* description: Returns the most recent raw crawl payload for a dispensary, including full product data.
* tags: [Payloads]
* parameters:
* - in: path
* name: dispensaryId
* required: true
* schema:
* type: integer
* description: Dispensary ID
* responses:
* 200:
* description: Latest payload with full data
* content:
* application/json:
* schema:
* type: object
* properties:
* success:
* type: boolean
* metadata:
* $ref: '#/components/schemas/PayloadMetadata'
* data:
* type: object
* properties:
* products:
* type: array
* items:
* type: object
* 404:
* description: No payloads found for dispensary
* 500:
* description: Server error
* GET /api/payloads/store/:dispensaryId/latest
* Get the latest payload for a store (with full data)
*/
router.get('/store/:dispensaryId/latest', async (req: Request, res: Response) => {
try {
@@ -364,107 +181,12 @@ router.get('/store/:dispensaryId/latest', async (req: Request, res: Response) =>
});
/**
* @swagger
* /payloads/store/{dispensaryId}/diff:
* get:
* summary: Compare two payloads
* description: |
* Compares two crawl payloads for a store and returns the differences.
* If no IDs are provided, compares the two most recent payloads.
* Returns added products, removed products, price changes, and stock changes.
* tags: [Payloads]
* parameters:
* - in: path
* name: dispensaryId
* required: true
* schema:
* type: integer
* description: Dispensary ID
* - in: query
* name: from
* schema:
* type: integer
* description: Older payload ID (optional)
* - in: query
* name: to
* schema:
* type: integer
* description: Newer payload ID (optional)
* responses:
* 200:
* description: Payload diff results
* content:
* application/json:
* schema:
* type: object
* properties:
* success:
* type: boolean
* from:
* type: object
* properties:
* id:
* type: integer
* fetchedAt:
* type: string
* format: date-time
* productCount:
* type: integer
* to:
* type: object
* properties:
* id:
* type: integer
* fetchedAt:
* type: string
* format: date-time
* productCount:
* type: integer
* diff:
* type: object
* properties:
* added:
* type: integer
* removed:
* type: integer
* priceChanges:
* type: integer
* stockChanges:
* type: integer
* details:
* type: object
* properties:
* added:
* type: array
* items:
* type: object
* removed:
* type: array
* items:
* type: object
* priceChanges:
* type: array
* items:
* type: object
* properties:
* id:
* type: string
* name:
* type: string
* oldPrice:
* type: number
* newPrice:
* type: number
* stockChanges:
* type: array
* items:
* type: object
* 400:
* description: Need at least 2 payloads to diff
* 404:
* description: One or both payloads not found
* 500:
* description: Server error
* GET /api/payloads/store/:dispensaryId/diff
* Compare two payloads for a store
*
* Query params:
* - from: payload ID (older)
* - to: payload ID (newer) - optional, defaults to latest
*/
router.get('/store/:dispensaryId/diff', async (req: Request, res: Response) => {
try {
@@ -609,370 +331,4 @@ router.get('/store/:dispensaryId/diff', async (req: Request, res: Response) => {
}
});
/**
* GET /api/payloads/store/:dispensaryId/query
* Query products from the latest payload with flexible filters
*
* Query params:
* - brand: Filter by brand name (partial match)
* - category: Filter by category (exact match)
* - subcategory: Filter by subcategory
* - strain_type: Filter by strain type (indica, sativa, hybrid, cbd)
* - in_stock: Filter by stock status (true/false)
* - price_min: Minimum price
* - price_max: Maximum price
* - thc_min: Minimum THC percentage
* - thc_max: Maximum THC percentage
* - search: Search product name (partial match)
* - fields: Comma-separated list of fields to return
* - limit: Max results (default 100, max 1000)
* - offset: Skip results for pagination
* - sort: Sort field (name, price, thc, brand)
* - order: Sort order (asc, desc)
*/
router.get('/store/:dispensaryId/query', async (req: Request, res: Response) => {
try {
const pool = getDbPool();
const dispensaryId = parseInt(req.params.dispensaryId);
// Get latest payload
const result = await getLatestPayload(pool, dispensaryId);
if (!result) {
return res.status(404).json({
success: false,
error: `No payloads found for dispensary ${dispensaryId}`,
});
}
let products = result.payload.products || [];
// Parse query params
const {
brand,
category,
subcategory,
strain_type,
in_stock,
price_min,
price_max,
thc_min,
thc_max,
search,
fields,
limit: limitStr,
offset: offsetStr,
sort,
order,
} = req.query;
// Apply filters
if (brand) {
const brandLower = (brand as string).toLowerCase();
products = products.filter((p: any) =>
p.brand?.name?.toLowerCase().includes(brandLower)
);
}
if (category) {
const catLower = (category as string).toLowerCase();
products = products.filter((p: any) =>
p.category?.toLowerCase() === catLower ||
p.Category?.toLowerCase() === catLower
);
}
if (subcategory) {
const subLower = (subcategory as string).toLowerCase();
products = products.filter((p: any) =>
p.subcategory?.toLowerCase() === subLower ||
p.subCategory?.toLowerCase() === subLower
);
}
if (strain_type) {
const strainLower = (strain_type as string).toLowerCase();
products = products.filter((p: any) =>
p.strainType?.toLowerCase() === strainLower ||
p.strain_type?.toLowerCase() === strainLower
);
}
if (in_stock !== undefined) {
const wantInStock = in_stock === 'true';
products = products.filter((p: any) => {
const status = p.Status || p.status;
const isInStock = status === 'Active' || status === 'In Stock' || status === 'in_stock';
return wantInStock ? isInStock : !isInStock;
});
}
if (price_min !== undefined) {
const min = parseFloat(price_min as string);
products = products.filter((p: any) => {
const price = p.Prices?.[0]?.price || p.price;
return price >= min;
});
}
if (price_max !== undefined) {
const max = parseFloat(price_max as string);
products = products.filter((p: any) => {
const price = p.Prices?.[0]?.price || p.price;
return price <= max;
});
}
if (thc_min !== undefined) {
const min = parseFloat(thc_min as string);
products = products.filter((p: any) => {
const thc = p.potencyThc?.formatted || p.thc || 0;
const thcNum = typeof thc === 'string' ? parseFloat(thc) : thc;
return thcNum >= min;
});
}
if (thc_max !== undefined) {
const max = parseFloat(thc_max as string);
products = products.filter((p: any) => {
const thc = p.potencyThc?.formatted || p.thc || 0;
const thcNum = typeof thc === 'string' ? parseFloat(thc) : thc;
return thcNum <= max;
});
}
if (search) {
const searchLower = (search as string).toLowerCase();
products = products.filter((p: any) =>
p.name?.toLowerCase().includes(searchLower)
);
}
// Sort
if (sort) {
const sortOrder = order === 'desc' ? -1 : 1;
products.sort((a: any, b: any) => {
let aVal: any, bVal: any;
switch (sort) {
case 'name':
aVal = a.name || '';
bVal = b.name || '';
break;
case 'price':
aVal = a.Prices?.[0]?.price || a.price || 0;
bVal = b.Prices?.[0]?.price || b.price || 0;
break;
case 'thc':
aVal = parseFloat(a.potencyThc?.formatted || a.thc || '0');
bVal = parseFloat(b.potencyThc?.formatted || b.thc || '0');
break;
case 'brand':
aVal = a.brand?.name || '';
bVal = b.brand?.name || '';
break;
default:
return 0;
}
if (aVal < bVal) return -1 * sortOrder;
if (aVal > bVal) return 1 * sortOrder;
return 0;
});
}
// Pagination
const totalCount = products.length;
const limit = Math.min(parseInt(limitStr as string) || 100, 1000);
const offset = parseInt(offsetStr as string) || 0;
products = products.slice(offset, offset + limit);
// Field selection - normalize product structure
const normalizedProducts = products.map((p: any) => {
const normalized: any = {
id: p._id || p.id,
name: p.name,
brand: p.brand?.name || p.brandName,
category: p.category || p.Category,
subcategory: p.subcategory || p.subCategory,
strain_type: p.strainType || p.strain_type,
price: p.Prices?.[0]?.price || p.price,
price_med: p.Prices?.[0]?.priceMed || p.priceMed,
price_rec: p.Prices?.[0]?.priceRec || p.priceRec,
thc: p.potencyThc?.formatted || p.thc,
cbd: p.potencyCbd?.formatted || p.cbd,
weight: p.Prices?.[0]?.weight || p.weight,
status: p.Status || p.status,
in_stock: (p.Status || p.status) === 'Active',
image_url: p.image || p.imageUrl || p.image_url,
description: p.description,
};
// If specific fields requested, filter
if (fields) {
const requestedFields = (fields as string).split(',').map(f => f.trim());
const filtered: any = {};
for (const field of requestedFields) {
if (normalized.hasOwnProperty(field)) {
filtered[field] = normalized[field];
}
}
return filtered;
}
return normalized;
});
res.json({
success: true,
dispensaryId,
payloadId: result.metadata.id,
fetchedAt: result.metadata.fetchedAt,
query: {
filters: {
brand: brand || null,
category: category || null,
subcategory: subcategory || null,
strain_type: strain_type || null,
in_stock: in_stock || null,
price_min: price_min || null,
price_max: price_max || null,
thc_min: thc_min || null,
thc_max: thc_max || null,
search: search || null,
},
sort: sort || null,
order: order || 'asc',
limit,
offset,
},
pagination: {
total: totalCount,
returned: normalizedProducts.length,
limit,
offset,
has_more: offset + limit < totalCount,
},
products: normalizedProducts,
});
} catch (error: any) {
console.error('[Payloads] Query error:', error.message);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* GET /api/payloads/store/:dispensaryId/aggregate
* Aggregate data from the latest payload
*
* Query params:
* - group_by: Field to group by (brand, category, subcategory, strain_type)
* - metrics: Comma-separated metrics (count, avg_price, min_price, max_price, avg_thc)
*/
router.get('/store/:dispensaryId/aggregate', async (req: Request, res: Response) => {
try {
const pool = getDbPool();
const dispensaryId = parseInt(req.params.dispensaryId);
const result = await getLatestPayload(pool, dispensaryId);
if (!result) {
return res.status(404).json({
success: false,
error: `No payloads found for dispensary ${dispensaryId}`,
});
}
const products = result.payload.products || [];
const groupBy = req.query.group_by as string;
const metricsParam = req.query.metrics as string || 'count';
const metrics = metricsParam.split(',').map(m => m.trim());
if (!groupBy) {
return res.status(400).json({
success: false,
error: 'group_by parameter is required (brand, category, subcategory, strain_type)',
});
}
// Group products
const groups: Map<string, any[]> = new Map();
for (const p of products) {
let key: string;
switch (groupBy) {
case 'brand':
key = p.brand?.name || 'Unknown';
break;
case 'category':
key = p.category || p.Category || 'Unknown';
break;
case 'subcategory':
key = p.subcategory || p.subCategory || 'Unknown';
break;
case 'strain_type':
key = p.strainType || p.strain_type || 'Unknown';
break;
default:
key = 'Unknown';
}
if (!groups.has(key)) {
groups.set(key, []);
}
groups.get(key)!.push(p);
}
// Calculate metrics
const aggregations: any[] = [];
for (const [key, items] of groups) {
const agg: any = { [groupBy]: key };
for (const metric of metrics) {
switch (metric) {
case 'count':
agg.count = items.length;
break;
case 'avg_price':
const prices = items.map(p => p.Prices?.[0]?.price || p.price).filter(p => p != null);
agg.avg_price = prices.length > 0 ? prices.reduce((a, b) => a + b, 0) / prices.length : null;
break;
case 'min_price':
const minPrices = items.map(p => p.Prices?.[0]?.price || p.price).filter(p => p != null);
agg.min_price = minPrices.length > 0 ? Math.min(...minPrices) : null;
break;
case 'max_price':
const maxPrices = items.map(p => p.Prices?.[0]?.price || p.price).filter(p => p != null);
agg.max_price = maxPrices.length > 0 ? Math.max(...maxPrices) : null;
break;
case 'avg_thc':
const thcs = items.map(p => parseFloat(p.potencyThc?.formatted || p.thc || '0')).filter(t => t > 0);
agg.avg_thc = thcs.length > 0 ? thcs.reduce((a, b) => a + b, 0) / thcs.length : null;
break;
case 'in_stock_count':
agg.in_stock_count = items.filter(p => (p.Status || p.status) === 'Active').length;
break;
}
}
aggregations.push(agg);
}
// Sort by count descending
aggregations.sort((a, b) => (b.count || 0) - (a.count || 0));
res.json({
success: true,
dispensaryId,
payloadId: result.metadata.id,
fetchedAt: result.metadata.fetchedAt,
groupBy,
metrics,
totalProducts: products.length,
groupCount: aggregations.length,
aggregations,
});
} catch (error: any) {
console.error('[Payloads] Aggregate error:', error.message);
res.status(500).json({ success: false, error: error.message });
}
});
export default router;

View File

@@ -278,7 +278,7 @@ router.post('/update-locations', requireRole('superadmin', 'admin'), async (req,
// Run in background
updateAllProxyLocations().catch(err => {
console.error('Location update failed:', err);
console.error('Location update failed:', err);
});
res.json({ message: 'Location update job started' });

View File

@@ -1,224 +0,0 @@
/**
* Trusted Origins Admin Routes
*
* Manage IPs and domains that bypass API key authentication.
* Available at /api/admin/trusted-origins
*/
import { Router, Response } from 'express';
import { pool } from '../db/pool';
import { AuthRequest, authMiddleware, requireRole, clearTrustedOriginsCache } from '../auth/middleware';
const router = Router();
// All routes require admin auth
router.use(authMiddleware);
router.use(requireRole('admin', 'superadmin'));
/**
* GET /api/admin/trusted-origins
* List all trusted origins
*/
router.get('/', async (req: AuthRequest, res: Response) => {
try {
const result = await pool.query(`
SELECT
id,
origin_type,
origin_value,
description,
active,
created_at,
updated_at
FROM trusted_origins
ORDER BY origin_type, origin_value
`);
res.json({
success: true,
origins: result.rows,
counts: {
total: result.rows.length,
active: result.rows.filter(r => r.active).length,
ips: result.rows.filter(r => r.origin_type === 'ip').length,
domains: result.rows.filter(r => r.origin_type === 'domain').length,
patterns: result.rows.filter(r => r.origin_type === 'pattern').length,
},
});
} catch (error: any) {
console.error('[TrustedOrigins] List error:', error.message);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/admin/trusted-origins
* Add a new trusted origin
*/
router.post('/', async (req: AuthRequest, res: Response) => {
try {
const { origin_type, origin_value, description } = req.body;
if (!origin_type || !origin_value) {
return res.status(400).json({
success: false,
error: 'origin_type and origin_value are required',
});
}
if (!['ip', 'domain', 'pattern'].includes(origin_type)) {
return res.status(400).json({
success: false,
error: 'origin_type must be: ip, domain, or pattern',
});
}
// Validate pattern if regex
if (origin_type === 'pattern') {
try {
new RegExp(origin_value);
} catch {
return res.status(400).json({
success: false,
error: 'Invalid regex pattern',
});
}
}
const result = await pool.query(`
INSERT INTO trusted_origins (origin_type, origin_value, description, created_by)
VALUES ($1, $2, $3, $4)
RETURNING id, origin_type, origin_value, description, active, created_at
`, [origin_type, origin_value, description || null, req.user?.id || null]);
// Invalidate cache
clearTrustedOriginsCache();
res.json({
success: true,
origin: result.rows[0],
});
} catch (error: any) {
if (error.code === '23505') {
return res.status(409).json({
success: false,
error: 'This origin already exists',
});
}
console.error('[TrustedOrigins] Add error:', error.message);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* PUT /api/admin/trusted-origins/:id
* Update a trusted origin
*/
router.put('/:id', async (req: AuthRequest, res: Response) => {
try {
const id = parseInt(req.params.id);
const { origin_type, origin_value, description, active } = req.body;
// Validate pattern if regex
if (origin_type === 'pattern' && origin_value) {
try {
new RegExp(origin_value);
} catch {
return res.status(400).json({
success: false,
error: 'Invalid regex pattern',
});
}
}
const result = await pool.query(`
UPDATE trusted_origins
SET
origin_type = COALESCE($1, origin_type),
origin_value = COALESCE($2, origin_value),
description = COALESCE($3, description),
active = COALESCE($4, active),
updated_at = NOW()
WHERE id = $5
RETURNING id, origin_type, origin_value, description, active, updated_at
`, [origin_type, origin_value, description, active, id]);
if (result.rows.length === 0) {
return res.status(404).json({ success: false, error: 'Origin not found' });
}
// Invalidate cache
clearTrustedOriginsCache();
res.json({
success: true,
origin: result.rows[0],
});
} catch (error: any) {
console.error('[TrustedOrigins] Update error:', error.message);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* DELETE /api/admin/trusted-origins/:id
* Delete a trusted origin
*/
router.delete('/:id', async (req: AuthRequest, res: Response) => {
try {
const id = parseInt(req.params.id);
const result = await pool.query(`
DELETE FROM trusted_origins WHERE id = $1 RETURNING id, origin_value
`, [id]);
if (result.rows.length === 0) {
return res.status(404).json({ success: false, error: 'Origin not found' });
}
// Invalidate cache
clearTrustedOriginsCache();
res.json({
success: true,
deleted: result.rows[0],
});
} catch (error: any) {
console.error('[TrustedOrigins] Delete error:', error.message);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/admin/trusted-origins/:id/toggle
* Toggle active status
*/
router.post('/:id/toggle', async (req: AuthRequest, res: Response) => {
try {
const id = parseInt(req.params.id);
const result = await pool.query(`
UPDATE trusted_origins
SET active = NOT active, updated_at = NOW()
WHERE id = $1
RETURNING id, origin_type, origin_value, active
`, [id]);
if (result.rows.length === 0) {
return res.status(404).json({ success: false, error: 'Origin not found' });
}
// Invalidate cache
clearTrustedOriginsCache();
res.json({
success: true,
origin: result.rows[0],
});
} catch (error: any) {
console.error('[TrustedOrigins] Toggle error:', error.message);
res.status(500).json({ success: false, error: error.message });
}
});
export default router;

View File

@@ -70,20 +70,21 @@ router.post('/register', async (req: Request, res: Response) => {
);
if (existing.rows.length > 0) {
// Re-activate existing worker - keep existing pod_name (fantasy name), don't overwrite with K8s name
// Re-activate existing worker
const { rows } = await pool.query(`
UPDATE worker_registry
SET status = 'active',
role = $1,
hostname = $2,
ip_address = $3,
pod_name = $2,
hostname = $3,
ip_address = $4,
last_heartbeat_at = NOW(),
started_at = NOW(),
metadata = $4,
metadata = $5,
updated_at = NOW()
WHERE worker_id = $5
RETURNING id, worker_id, friendly_name, pod_name, role
`, [role, finalHostname, clientIp, metadata, finalWorkerId]);
WHERE worker_id = $6
RETURNING id, worker_id, friendly_name, role
`, [role, pod_name, finalHostname, clientIp, metadata, finalWorkerId]);
const worker = rows[0];
const roleMsg = role ? `for ${role}` : 'as role-agnostic';
@@ -104,13 +105,13 @@ router.post('/register', async (req: Request, res: Response) => {
const nameResult = await pool.query('SELECT assign_worker_name($1) as name', [finalWorkerId]);
const friendlyName = nameResult.rows[0].name;
// Register the worker - use friendlyName as pod_name (not K8s name)
// Register the worker
const { rows } = await pool.query(`
INSERT INTO worker_registry (
worker_id, friendly_name, role, pod_name, hostname, ip_address, status, metadata
) VALUES ($1, $2, $3, $4, $5, $6, 'active', $7)
RETURNING id, worker_id, friendly_name, pod_name, role
`, [finalWorkerId, friendlyName, role, friendlyName, finalHostname, clientIp, metadata]);
RETURNING id, worker_id, friendly_name, role
`, [finalWorkerId, friendlyName, role, pod_name, finalHostname, clientIp, metadata]);
const worker = rows[0];
const roleMsg = role ? `for ${role}` : 'as role-agnostic';
@@ -137,36 +138,17 @@ router.post('/register', async (req: Request, res: Response) => {
*
* Body:
* - worker_id: string (required)
* - current_task_id: number (optional) - task currently being processed (primary task)
* - current_task_ids: number[] (optional) - all tasks currently being processed (concurrent)
* - active_task_count: number (optional) - number of tasks currently running
* - max_concurrent_tasks: number (optional) - max concurrent tasks this worker can handle
* - current_task_id: number (optional) - task currently being processed
* - status: string (optional) - 'active', 'idle'
* - resources: object (optional) - memory_mb, cpu_user_ms, cpu_system_ms, etc.
*/
router.post('/heartbeat', async (req: Request, res: Response) => {
try {
const {
worker_id,
current_task_id,
current_task_ids,
active_task_count,
max_concurrent_tasks,
status = 'active',
resources
} = req.body;
const { worker_id, current_task_id, status = 'active', resources } = req.body;
if (!worker_id) {
return res.status(400).json({ success: false, error: 'worker_id is required' });
}
// Build metadata object with all the new fields
const metadata: Record<string, unknown> = {};
if (resources) Object.assign(metadata, resources);
if (current_task_ids) metadata.current_task_ids = current_task_ids;
if (active_task_count !== undefined) metadata.active_task_count = active_task_count;
if (max_concurrent_tasks !== undefined) metadata.max_concurrent_tasks = max_concurrent_tasks;
// Store resources in metadata jsonb column
const { rows } = await pool.query(`
UPDATE worker_registry
@@ -177,7 +159,7 @@ router.post('/heartbeat', async (req: Request, res: Response) => {
updated_at = NOW()
WHERE worker_id = $3
RETURNING id, friendly_name, status
`, [current_task_id || null, status, worker_id, Object.keys(metadata).length > 0 ? JSON.stringify(metadata) : null]);
`, [current_task_id || null, status, worker_id, resources ? JSON.stringify(resources) : null]);
if (rows.length === 0) {
return res.status(404).json({ success: false, error: 'Worker not found - please register first' });
@@ -348,27 +330,12 @@ router.get('/workers', async (req: Request, res: Response) => {
tasks_completed,
tasks_failed,
current_task_id,
-- Concurrent task fields from metadata
(metadata->>'current_task_ids')::jsonb as current_task_ids,
(metadata->>'active_task_count')::int as active_task_count,
(metadata->>'max_concurrent_tasks')::int as max_concurrent_tasks,
-- Decommission fields
COALESCE(decommission_requested, false) as decommission_requested,
decommission_reason,
-- Preflight fields (dual-transport verification)
curl_ip,
http_ip,
preflight_status,
preflight_at,
fingerprint_data,
-- Full metadata for resources
metadata,
EXTRACT(EPOCH FROM (NOW() - last_heartbeat_at)) as seconds_since_heartbeat,
CASE
WHEN status = 'offline' OR status = 'terminated' THEN status
WHEN last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
WHEN current_task_id IS NOT NULL THEN 'busy'
WHEN (metadata->>'active_task_count')::int > 0 THEN 'busy'
ELSE 'ready'
END as health_status,
created_at
@@ -705,163 +672,4 @@ router.get('/capacity', async (_req: Request, res: Response) => {
}
});
// ============================================================
// WORKER LIFECYCLE MANAGEMENT
// ============================================================
/**
* POST /api/worker-registry/workers/:workerId/decommission
* Request graceful decommission of a worker (will stop after current task)
*/
router.post('/workers/:workerId/decommission', async (req: Request, res: Response) => {
try {
const { workerId } = req.params;
const { reason, issued_by } = req.body;
// Update worker_registry to flag for decommission
const result = await pool.query(
`UPDATE worker_registry
SET decommission_requested = true,
decommission_reason = $2,
decommission_requested_at = NOW()
WHERE worker_id = $1
RETURNING friendly_name, status, current_task_id`,
[workerId, reason || 'Manual decommission from admin']
);
if (result.rows.length === 0) {
return res.status(404).json({ success: false, error: 'Worker not found' });
}
const worker = result.rows[0];
// Also log to worker_commands for audit trail
await pool.query(
`INSERT INTO worker_commands (worker_id, command, reason, issued_by)
VALUES ($1, 'decommission', $2, $3)
ON CONFLICT DO NOTHING`,
[workerId, reason || 'Manual decommission', issued_by || 'admin']
).catch(() => {
// Table might not exist yet - ignore
});
res.json({
success: true,
message: worker.current_task_id
? `Worker ${worker.friendly_name} will stop after completing task #${worker.current_task_id}`
: `Worker ${worker.friendly_name} will stop on next poll`,
worker: {
friendly_name: worker.friendly_name,
status: worker.status,
current_task_id: worker.current_task_id,
decommission_requested: true
}
});
} catch (error: any) {
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/worker-registry/workers/:workerId/cancel-decommission
* Cancel a pending decommission request
*/
router.post('/workers/:workerId/cancel-decommission', async (req: Request, res: Response) => {
try {
const { workerId } = req.params;
const result = await pool.query(
`UPDATE worker_registry
SET decommission_requested = false,
decommission_reason = NULL,
decommission_requested_at = NULL
WHERE worker_id = $1
RETURNING friendly_name`,
[workerId]
);
if (result.rows.length === 0) {
return res.status(404).json({ success: false, error: 'Worker not found' });
}
res.json({
success: true,
message: `Decommission cancelled for ${result.rows[0].friendly_name}`
});
} catch (error: any) {
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/worker-registry/spawn
* Spawn a new worker in the current pod (only works in multi-worker-per-pod mode)
* For now, this is a placeholder - actual spawning requires the pod supervisor
*/
router.post('/spawn', async (req: Request, res: Response) => {
try {
const { pod_name, role } = req.body;
// For now, we can't actually spawn workers from the API
// This would require a supervisor process in each pod that listens for spawn commands
// Instead, return instructions for how to scale
res.json({
success: false,
error: 'Direct worker spawning not yet implemented',
instructions: 'To add workers, scale the K8s deployment: kubectl scale deployment/scraper-worker --replicas=N'
});
} catch (error: any) {
res.status(500).json({ success: false, error: error.message });
}
});
/**
* GET /api/worker-registry/pods
* Get workers grouped by pod
*/
router.get('/pods', async (_req: Request, res: Response) => {
try {
const { rows } = await pool.query(`
SELECT
COALESCE(pod_name, 'Unknown') as pod_name,
COUNT(*) as worker_count,
COUNT(*) FILTER (WHERE current_task_id IS NOT NULL) as busy_count,
COUNT(*) FILTER (WHERE current_task_id IS NULL) as idle_count,
SUM(tasks_completed) as total_completed,
SUM(tasks_failed) as total_failed,
SUM((metadata->>'memory_rss_mb')::int) as total_memory_mb,
array_agg(json_build_object(
'worker_id', worker_id,
'friendly_name', friendly_name,
'status', status,
'current_task_id', current_task_id,
'tasks_completed', tasks_completed,
'tasks_failed', tasks_failed,
'decommission_requested', COALESCE(decommission_requested, false),
'last_heartbeat_at', last_heartbeat_at
)) as workers
FROM worker_registry
WHERE status NOT IN ('offline', 'terminated')
GROUP BY pod_name
ORDER BY pod_name
`);
res.json({
success: true,
pods: rows.map(row => ({
pod_name: row.pod_name,
worker_count: parseInt(row.worker_count),
busy_count: parseInt(row.busy_count),
idle_count: parseInt(row.idle_count),
total_completed: parseInt(row.total_completed) || 0,
total_failed: parseInt(row.total_failed) || 0,
total_memory_mb: parseInt(row.total_memory_mb) || 0,
workers: row.workers
}))
});
} catch (error: any) {
res.status(500).json({ success: false, error: error.message });
}
});
export default router;

View File

@@ -35,7 +35,7 @@ const router = Router();
// ============================================================
const K8S_NAMESPACE = process.env.K8S_NAMESPACE || 'dispensary-scraper';
const K8S_DEPLOYMENT_NAME = process.env.K8S_WORKER_DEPLOYMENT || 'scraper-worker';
const K8S_STATEFULSET_NAME = process.env.K8S_WORKER_STATEFULSET || 'scraper-worker';
// Initialize K8s client - uses in-cluster config when running in K8s,
// or kubeconfig when running locally
@@ -70,7 +70,7 @@ function getK8sClient(): k8s.AppsV1Api | null {
/**
* GET /api/workers/k8s/replicas - Get current worker replica count
* Returns current and desired replica counts from the Deployment
* Returns current and desired replica counts from the StatefulSet
*/
router.get('/k8s/replicas', async (_req: Request, res: Response) => {
const client = getK8sClient();
@@ -84,21 +84,21 @@ router.get('/k8s/replicas', async (_req: Request, res: Response) => {
}
try {
const response = await client.readNamespacedDeployment({
name: K8S_DEPLOYMENT_NAME,
const response = await client.readNamespacedStatefulSet({
name: K8S_STATEFULSET_NAME,
namespace: K8S_NAMESPACE,
});
const deployment = response;
const statefulSet = response;
res.json({
success: true,
replicas: {
current: deployment.status?.readyReplicas || 0,
desired: deployment.spec?.replicas || 0,
available: deployment.status?.availableReplicas || 0,
updated: deployment.status?.updatedReplicas || 0,
current: statefulSet.status?.readyReplicas || 0,
desired: statefulSet.spec?.replicas || 0,
available: statefulSet.status?.availableReplicas || 0,
updated: statefulSet.status?.updatedReplicas || 0,
},
deployment: K8S_DEPLOYMENT_NAME,
statefulset: K8S_STATEFULSET_NAME,
namespace: K8S_NAMESPACE,
});
} catch (err: any) {
@@ -112,7 +112,7 @@ router.get('/k8s/replicas', async (_req: Request, res: Response) => {
/**
* POST /api/workers/k8s/scale - Scale worker replicas
* Body: { replicas: number } - desired replica count (0-20)
* Body: { replicas: number } - desired replica count (1-20)
*/
router.post('/k8s/scale', async (req: Request, res: Response) => {
const client = getK8sClient();
@@ -136,21 +136,21 @@ router.post('/k8s/scale', async (req: Request, res: Response) => {
try {
// Get current state first
const currentResponse = await client.readNamespacedDeploymentScale({
name: K8S_DEPLOYMENT_NAME,
const currentResponse = await client.readNamespacedStatefulSetScale({
name: K8S_STATEFULSET_NAME,
namespace: K8S_NAMESPACE,
});
const currentReplicas = currentResponse.spec?.replicas || 0;
// Update scale using replaceNamespacedDeploymentScale
await client.replaceNamespacedDeploymentScale({
name: K8S_DEPLOYMENT_NAME,
// Update scale using replaceNamespacedStatefulSetScale
await client.replaceNamespacedStatefulSetScale({
name: K8S_STATEFULSET_NAME,
namespace: K8S_NAMESPACE,
body: {
apiVersion: 'autoscaling/v1',
kind: 'Scale',
metadata: {
name: K8S_DEPLOYMENT_NAME,
name: K8S_STATEFULSET_NAME,
namespace: K8S_NAMESPACE,
},
spec: {
@@ -159,14 +159,14 @@ router.post('/k8s/scale', async (req: Request, res: Response) => {
},
});
console.log(`[Workers] Scaled ${K8S_DEPLOYMENT_NAME} from ${currentReplicas} to ${replicas} replicas`);
console.log(`[Workers] Scaled ${K8S_STATEFULSET_NAME} from ${currentReplicas} to ${replicas} replicas`);
res.json({
success: true,
message: `Scaled from ${currentReplicas} to ${replicas} replicas`,
previous: currentReplicas,
desired: replicas,
deployment: K8S_DEPLOYMENT_NAME,
statefulset: K8S_STATEFULSET_NAME,
namespace: K8S_NAMESPACE,
});
} catch (err: any) {
@@ -178,73 +178,6 @@ router.post('/k8s/scale', async (req: Request, res: Response) => {
}
});
/**
* POST /api/workers/k8s/scale-up - Scale up worker replicas by 1
* Convenience endpoint for adding a single worker
*/
router.post('/k8s/scale-up', async (_req: Request, res: Response) => {
const client = getK8sClient();
if (!client) {
return res.status(503).json({
success: false,
error: 'K8s client not available (not running in cluster or no kubeconfig)',
});
}
try {
// Get current replica count
const currentResponse = await client.readNamespacedDeploymentScale({
name: K8S_DEPLOYMENT_NAME,
namespace: K8S_NAMESPACE,
});
const currentReplicas = currentResponse.spec?.replicas || 0;
const newReplicas = currentReplicas + 1;
// Cap at 20 replicas
if (newReplicas > 20) {
return res.status(400).json({
success: false,
error: 'Maximum replica count (20) reached',
});
}
// Scale up by 1
await client.replaceNamespacedDeploymentScale({
name: K8S_DEPLOYMENT_NAME,
namespace: K8S_NAMESPACE,
body: {
apiVersion: 'autoscaling/v1',
kind: 'Scale',
metadata: {
name: K8S_DEPLOYMENT_NAME,
namespace: K8S_NAMESPACE,
},
spec: {
replicas: newReplicas,
},
},
});
console.log(`[Workers] Scaled up ${K8S_DEPLOYMENT_NAME} from ${currentReplicas} to ${newReplicas} replicas`);
res.json({
success: true,
message: `Added worker (${currentReplicas}${newReplicas} replicas)`,
previous: currentReplicas,
desired: newReplicas,
deployment: K8S_DEPLOYMENT_NAME,
namespace: K8S_NAMESPACE,
});
} catch (err: any) {
console.error('[Workers] K8s scale-up error:', err.body?.message || err.message);
res.status(500).json({
success: false,
error: err.body?.message || err.message,
});
}
});
// ============================================================
// STATIC ROUTES (must come before parameterized routes)
// ============================================================

View File

@@ -683,118 +683,6 @@ export class CrawlRotator {
const current = this.proxy.getCurrent();
return current?.timezone;
}
/**
* Preflight check - verifies proxy and anti-detect are working
* MUST be called before any task execution to ensure anonymity.
*
* Tests:
* 1. Proxy available - a proxy must be loaded and active
* 2. Proxy connectivity - makes HTTP request through proxy to verify connection
* 3. Anti-detect headers - verifies fingerprint is set with required headers
*
* @returns Promise<PreflightResult> with pass/fail status and details
*/
async preflight(): Promise<PreflightResult> {
const result: PreflightResult = {
passed: false,
proxyAvailable: false,
proxyConnected: false,
antidetectReady: false,
proxyIp: null,
fingerprint: null,
error: null,
responseTimeMs: null,
};
// Step 1: Check proxy is available
const currentProxy = this.proxy.getCurrent();
if (!currentProxy) {
result.error = 'No proxy available';
console.log('[Preflight] FAILED - No proxy available');
return result;
}
result.proxyAvailable = true;
result.proxyIp = currentProxy.host;
// Step 2: Check fingerprint/anti-detect is ready
const fingerprint = this.userAgent.getCurrent();
if (!fingerprint || !fingerprint.userAgent) {
result.error = 'Anti-detect fingerprint not initialized';
console.log('[Preflight] FAILED - No fingerprint');
return result;
}
result.antidetectReady = true;
result.fingerprint = {
userAgent: fingerprint.userAgent,
browserName: fingerprint.browserName,
deviceCategory: fingerprint.deviceCategory,
};
// Step 3: Test proxy connectivity with an actual HTTP request
// Use httpbin.org/ip to verify request goes through proxy
const proxyUrl = this.proxy.getProxyUrl(currentProxy);
const testUrl = 'https://httpbin.org/ip';
try {
const { default: axios } = await import('axios');
const { HttpsProxyAgent } = await import('https-proxy-agent');
const agent = new HttpsProxyAgent(proxyUrl);
const startTime = Date.now();
const response = await axios.get(testUrl, {
httpsAgent: agent,
timeout: 15000, // 15 second timeout
headers: {
'User-Agent': fingerprint.userAgent,
'Accept-Language': fingerprint.acceptLanguage,
...(fingerprint.secChUa && { 'sec-ch-ua': fingerprint.secChUa }),
...(fingerprint.secChUaPlatform && { 'sec-ch-ua-platform': fingerprint.secChUaPlatform }),
...(fingerprint.secChUaMobile && { 'sec-ch-ua-mobile': fingerprint.secChUaMobile }),
},
});
result.responseTimeMs = Date.now() - startTime;
result.proxyConnected = true;
result.passed = true;
// Mark success on proxy stats
await this.proxy.markSuccess(currentProxy.id, result.responseTimeMs);
console.log(`[Preflight] PASSED - Proxy ${currentProxy.host} connected (${result.responseTimeMs}ms), UA: ${fingerprint.browserName}/${fingerprint.deviceCategory}`);
} catch (err: any) {
result.error = `Proxy connection failed: ${err.message || 'Unknown error'}`;
console.log(`[Preflight] FAILED - Proxy connection error: ${err.message}`);
// Mark failure on proxy stats
await this.proxy.markFailed(currentProxy.id, err.message);
}
return result;
}
}
/**
* Result from preflight check
*/
export interface PreflightResult {
/** Overall pass/fail */
passed: boolean;
/** Step 1: Is a proxy loaded? */
proxyAvailable: boolean;
/** Step 2: Did HTTP request through proxy succeed? */
proxyConnected: boolean;
/** Step 3: Is fingerprint/anti-detect ready? */
antidetectReady: boolean;
/** Current proxy IP */
proxyIp: string | null;
/** Fingerprint summary */
fingerprint: { userAgent: string; browserName: string; deviceCategory: string } | null;
/** Error message if failed */
error: string | null;
/** Proxy response time in ms */
responseTimeMs: number | null;
}
// ============================================================

View File

@@ -1,100 +0,0 @@
/**
* Curl Preflight - Verify curl/axios transport works through proxy
*
* Tests:
* 1. Proxy is available and active
* 2. HTTP request through proxy succeeds
* 3. Anti-detect headers are properly set
*
* Use case: Fast, simple API requests that don't need browser fingerprint
*/
import axios from 'axios';
import { HttpsProxyAgent } from 'https-proxy-agent';
import { CrawlRotator, PreflightResult } from './crawl-rotator';
export interface CurlPreflightResult extends PreflightResult {
method: 'curl';
}
/**
* Run curl preflight check
* Tests proxy connectivity using axios/curl through the proxy
*/
export async function runCurlPreflight(
crawlRotator: CrawlRotator
): Promise<CurlPreflightResult> {
const result: CurlPreflightResult = {
method: 'curl',
passed: false,
proxyAvailable: false,
proxyConnected: false,
antidetectReady: false,
proxyIp: null,
fingerprint: null,
error: null,
responseTimeMs: null,
};
// Step 1: Check proxy is available
const currentProxy = crawlRotator.proxy.getCurrent();
if (!currentProxy) {
result.error = 'No proxy available';
console.log('[CurlPreflight] FAILED - No proxy available');
return result;
}
result.proxyAvailable = true;
result.proxyIp = currentProxy.host;
// Step 2: Check fingerprint/anti-detect is ready
const fingerprint = crawlRotator.userAgent.getCurrent();
if (!fingerprint || !fingerprint.userAgent) {
result.error = 'Anti-detect fingerprint not initialized';
console.log('[CurlPreflight] FAILED - No fingerprint');
return result;
}
result.antidetectReady = true;
result.fingerprint = {
userAgent: fingerprint.userAgent,
browserName: fingerprint.browserName,
deviceCategory: fingerprint.deviceCategory,
};
// Step 3: Test proxy connectivity with an actual HTTP request
const proxyUrl = crawlRotator.proxy.getProxyUrl(currentProxy);
const testUrl = 'https://httpbin.org/ip';
try {
const agent = new HttpsProxyAgent(proxyUrl);
const startTime = Date.now();
const response = await axios.get(testUrl, {
httpsAgent: agent,
timeout: 15000, // 15 second timeout
headers: {
'User-Agent': fingerprint.userAgent,
'Accept-Language': fingerprint.acceptLanguage,
...(fingerprint.secChUa && { 'sec-ch-ua': fingerprint.secChUa }),
...(fingerprint.secChUaPlatform && { 'sec-ch-ua-platform': fingerprint.secChUaPlatform }),
...(fingerprint.secChUaMobile && { 'sec-ch-ua-mobile': fingerprint.secChUaMobile }),
},
});
result.responseTimeMs = Date.now() - startTime;
result.proxyConnected = true;
result.passed = true;
// Mark success on proxy stats
await crawlRotator.proxy.markSuccess(currentProxy.id, result.responseTimeMs);
console.log(`[CurlPreflight] PASSED - Proxy ${currentProxy.host} connected (${result.responseTimeMs}ms), UA: ${fingerprint.browserName}/${fingerprint.deviceCategory}`);
} catch (err: any) {
result.error = `Proxy connection failed: ${err.message || 'Unknown error'}`;
console.log(`[CurlPreflight] FAILED - Proxy connection error: ${err.message}`);
// Mark failure on proxy stats
await crawlRotator.proxy.markFailed(currentProxy.id, err.message);
}
return result;
}

View File

@@ -1,399 +0,0 @@
/**
* Puppeteer Preflight - Verify browser-based transport works with anti-detect
*
* Uses Puppeteer + StealthPlugin to:
* 1. Launch headless browser with stealth mode + PROXY
* 2. Visit fingerprint.com demo to verify anti-detect and confirm proxy IP
* 3. Establish session by visiting Dutchie embedded menu
* 4. Make GraphQL request from browser context
* 5. Verify we get a valid response (not blocked)
*
* Use case: Anti-detect scraping that needs real browser fingerprint through proxy
*
* Based on test-intercept.js which successfully captures 1000+ products
*/
import { PreflightResult, CrawlRotator } from './crawl-rotator';
// GraphQL hash for FilteredProducts query - MUST match CLAUDE.md
const FILTERED_PRODUCTS_HASH = 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0';
// Test dispensary - AZ-Deeply-Rooted (known working)
const TEST_CNAME = 'AZ-Deeply-Rooted';
const TEST_PLATFORM_ID = '6405ef617056e8014d79101b';
// Anti-detect verification sites (primary + fallback)
const FINGERPRINT_DEMO_URL = 'https://demo.fingerprint.com/';
const AMIUNIQUE_URL = 'https://amiunique.org/fingerprint';
export interface PuppeteerPreflightResult extends PreflightResult {
method: 'http';
/** Number of products returned (proves API access) */
productsReturned?: number;
/** Browser user agent used */
browserUserAgent?: string;
/** Bot detection result from fingerprint.com */
botDetection?: {
detected: boolean;
probability?: number;
type?: string;
};
/** Expected proxy IP (from pool) */
expectedProxyIp?: string;
/** Whether IP verification passed (detected IP matches proxy) */
ipVerified?: boolean;
}
/**
* Run Puppeteer preflight check with proxy
* Tests browser-based access with anti-detect verification via fingerprint.com
*
* @param crawlRotator - CrawlRotator instance to get proxy from pool
*/
export async function runPuppeteerPreflight(
crawlRotator?: CrawlRotator
): Promise<PuppeteerPreflightResult> {
const result: PuppeteerPreflightResult = {
method: 'http',
passed: false,
proxyAvailable: false,
proxyConnected: false,
antidetectReady: false,
proxyIp: null,
fingerprint: null,
error: null,
responseTimeMs: null,
productsReturned: 0,
ipVerified: false,
};
let browser: any = null;
try {
// Step 0: Get a proxy from the pool
let proxyUrl: string | null = null;
let expectedProxyHost: string | null = null;
if (crawlRotator) {
const currentProxy = crawlRotator.proxy.getCurrent();
if (currentProxy) {
result.proxyAvailable = true;
proxyUrl = crawlRotator.proxy.getProxyUrl(currentProxy);
expectedProxyHost = currentProxy.host;
result.expectedProxyIp = expectedProxyHost;
console.log(`[PuppeteerPreflight] Using proxy: ${currentProxy.host}:${currentProxy.port}`);
} else {
result.error = 'No proxy available from pool';
console.log(`[PuppeteerPreflight] FAILED - No proxy available`);
return result;
}
} else {
console.log(`[PuppeteerPreflight] WARNING: No CrawlRotator provided - using direct connection`);
result.proxyAvailable = true; // No proxy needed for direct
}
// Dynamic imports to avoid loading Puppeteer unless needed
const puppeteer = require('puppeteer-extra');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
puppeteer.use(StealthPlugin());
const startTime = Date.now();
// Build browser args
const browserArgs = ['--no-sandbox', '--disable-setuid-sandbox'];
if (proxyUrl) {
// Extract host:port for Puppeteer (it handles auth separately)
const proxyUrlParsed = new URL(proxyUrl);
browserArgs.push(`--proxy-server=${proxyUrlParsed.host}`);
}
// Launch browser with stealth + proxy
browser = await puppeteer.launch({
headless: 'new',
args: browserArgs,
});
const page = await browser.newPage();
// If proxy has auth, set it up
if (proxyUrl) {
const proxyUrlParsed = new URL(proxyUrl);
if (proxyUrlParsed.username && proxyUrlParsed.password) {
await page.authenticate({
username: decodeURIComponent(proxyUrlParsed.username),
password: decodeURIComponent(proxyUrlParsed.password),
});
}
}
// Get browser user agent
const userAgent = await page.evaluate(() => navigator.userAgent);
result.browserUserAgent = userAgent;
result.fingerprint = {
userAgent,
browserName: 'Chrome (Puppeteer)',
deviceCategory: 'desktop',
};
// =========================================================================
// STEP 1: Visit fingerprint.com demo to verify anti-detect and get IP
// =========================================================================
console.log(`[PuppeteerPreflight] Testing anti-detect at ${FINGERPRINT_DEMO_URL}...`);
try {
await page.goto(FINGERPRINT_DEMO_URL, {
waitUntil: 'networkidle2',
timeout: 30000,
});
result.proxyConnected = true; // If we got here, proxy is working
// Wait for fingerprint results to load
await page.waitForSelector('[data-test="visitor-id"]', { timeout: 10000 }).catch(() => {});
// Extract fingerprint data from the page
const fingerprintData = await page.evaluate(() => {
// Try to find the IP address displayed on the page
const ipElement = document.querySelector('[data-test="ip-address"]');
const ip = ipElement?.textContent?.trim() || null;
// Try to find bot detection info
const botElement = document.querySelector('[data-test="bot-detected"]');
const botDetected = botElement?.textContent?.toLowerCase().includes('true') || false;
// Try to find visitor ID (proves fingerprinting worked)
const visitorIdElement = document.querySelector('[data-test="visitor-id"]');
const visitorId = visitorIdElement?.textContent?.trim() || null;
// Alternative: look for common UI patterns if data-test attrs not present
let detectedIp = ip;
if (!detectedIp) {
// Look for IP in any element containing IP-like pattern
const allText = document.body.innerText;
const ipMatch = allText.match(/\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b/);
detectedIp = ipMatch ? ipMatch[1] : null;
}
return {
ip: detectedIp,
botDetected,
visitorId,
pageLoaded: !!document.body,
};
});
if (fingerprintData.ip) {
result.proxyIp = fingerprintData.ip;
console.log(`[PuppeteerPreflight] Detected IP: ${fingerprintData.ip}`);
// Verify IP matches expected proxy
if (expectedProxyHost) {
// Check if detected IP contains the proxy host (or is close match)
if (fingerprintData.ip === expectedProxyHost ||
expectedProxyHost.includes(fingerprintData.ip) ||
fingerprintData.ip.includes(expectedProxyHost.split('.').slice(0, 3).join('.'))) {
result.ipVerified = true;
console.log(`[PuppeteerPreflight] IP VERIFIED - matches proxy`);
} else {
console.log(`[PuppeteerPreflight] IP mismatch: expected ${expectedProxyHost}, got ${fingerprintData.ip}`);
// Don't fail - residential proxies often show different egress IPs
}
}
}
if (fingerprintData.visitorId) {
console.log(`[PuppeteerPreflight] Fingerprint visitor ID: ${fingerprintData.visitorId}`);
}
result.botDetection = {
detected: fingerprintData.botDetected,
};
if (fingerprintData.botDetected) {
console.log(`[PuppeteerPreflight] WARNING: Bot detection triggered!`);
} else {
console.log(`[PuppeteerPreflight] Anti-detect check: NOT detected as bot`);
result.antidetectReady = true;
}
} catch (fpErr: any) {
// Could mean proxy connection failed
console.log(`[PuppeteerPreflight] Fingerprint.com check failed: ${fpErr.message}`);
if (fpErr.message.includes('net::ERR_PROXY') || fpErr.message.includes('ECONNREFUSED')) {
result.error = `Proxy connection failed: ${fpErr.message}`;
return result;
}
// Try fallback: amiunique.org
console.log(`[PuppeteerPreflight] Trying fallback: ${AMIUNIQUE_URL}...`);
try {
await page.goto(AMIUNIQUE_URL, {
waitUntil: 'networkidle2',
timeout: 30000,
});
result.proxyConnected = true;
// Extract IP from amiunique.org page
const amiData = await page.evaluate(() => {
const allText = document.body.innerText;
const ipMatch = allText.match(/\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b/);
return {
ip: ipMatch ? ipMatch[1] : null,
pageLoaded: !!document.body,
};
});
if (amiData.ip) {
result.proxyIp = amiData.ip;
console.log(`[PuppeteerPreflight] Detected IP via amiunique.org: ${amiData.ip}`);
}
result.antidetectReady = true;
console.log(`[PuppeteerPreflight] amiunique.org fallback succeeded`);
} catch (amiErr: any) {
console.log(`[PuppeteerPreflight] amiunique.org fallback also failed: ${amiErr.message}`);
// Continue with Dutchie test anyway
result.proxyConnected = true;
result.antidetectReady = true;
}
}
// =========================================================================
// STEP 2: Test Dutchie API access (the real test)
// =========================================================================
const embedUrl = `https://dutchie.com/embedded-menu/${TEST_CNAME}?menuType=rec`;
console.log(`[PuppeteerPreflight] Establishing session at ${embedUrl}...`);
await page.goto(embedUrl, {
waitUntil: 'networkidle2',
timeout: 30000,
});
// Make GraphQL request from browser context
const graphqlResult = await page.evaluate(
async (platformId: string, hash: string) => {
try {
const variables = {
includeEnterpriseSpecials: false,
productsFilter: {
dispensaryId: platformId,
pricingType: 'rec',
Status: 'Active', // CRITICAL: Must be 'Active' per CLAUDE.md
types: [],
useCache: true,
isDefaultSort: true,
sortBy: 'popularSortIdx',
sortDirection: 1,
bypassOnlineThresholds: true,
isKioskMenu: false,
removeProductsBelowOptionThresholds: false,
},
page: 0,
perPage: 10, // Just need a few to prove it works
};
const extensions = {
persistedQuery: {
version: 1,
sha256Hash: hash,
},
};
const qs = new URLSearchParams({
operationName: 'FilteredProducts',
variables: JSON.stringify(variables),
extensions: JSON.stringify(extensions),
});
const url = `https://dutchie.com/api-3/graphql?${qs.toString()}`;
const sessionId = 'preflight-' + Date.now();
const response = await fetch(url, {
method: 'GET',
headers: {
Accept: 'application/json',
'content-type': 'application/json',
'x-dutchie-session': sessionId,
'apollographql-client-name': 'Marketplace (production)',
},
credentials: 'include',
});
if (!response.ok) {
return { error: `HTTP ${response.status}`, products: 0 };
}
const json = await response.json();
if (json.errors) {
return { error: JSON.stringify(json.errors).slice(0, 200), products: 0 };
}
const products = json?.data?.filteredProducts?.products || [];
return { error: null, products: products.length };
} catch (err: any) {
return { error: err.message || 'Unknown error', products: 0 };
}
},
TEST_PLATFORM_ID,
FILTERED_PRODUCTS_HASH
);
result.responseTimeMs = Date.now() - startTime;
if (graphqlResult.error) {
result.error = `GraphQL error: ${graphqlResult.error}`;
console.log(`[PuppeteerPreflight] FAILED - ${result.error}`);
} else if (graphqlResult.products === 0) {
result.error = 'GraphQL returned 0 products';
console.log(`[PuppeteerPreflight] FAILED - No products returned`);
} else {
result.passed = true;
result.productsReturned = graphqlResult.products;
console.log(
`[PuppeteerPreflight] PASSED - Got ${graphqlResult.products} products in ${result.responseTimeMs}ms`
);
if (result.proxyIp) {
console.log(`[PuppeteerPreflight] Browser IP via proxy: ${result.proxyIp}`);
}
}
} catch (err: any) {
result.error = `Browser error: ${err.message || 'Unknown error'}`;
console.log(`[PuppeteerPreflight] FAILED - ${result.error}`);
} finally {
if (browser) {
await browser.close().catch(() => {});
}
}
return result;
}
/**
* Run Puppeteer preflight with retry
* Retries once on failure to handle transient issues
*
* @param crawlRotator - CrawlRotator instance to get proxy from pool
* @param maxRetries - Number of retry attempts (default 1)
*/
export async function runPuppeteerPreflightWithRetry(
crawlRotator?: CrawlRotator,
maxRetries: number = 1
): Promise<PuppeteerPreflightResult> {
let lastResult: PuppeteerPreflightResult | null = null;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
if (attempt > 0) {
console.log(`[PuppeteerPreflight] Retry attempt ${attempt}/${maxRetries}...`);
await new Promise((r) => setTimeout(r, 5000)); // Wait 5s between retries
}
lastResult = await runPuppeteerPreflight(crawlRotator);
if (lastResult.passed) {
return lastResult;
}
}
return lastResult!;
}

View File

@@ -1,30 +1,566 @@
/**
* System API Routes (Stub)
* System API Routes
*
* The full system routes depend on SyncOrchestrator which was moved to _deprecated.
* This stub provides empty routers to maintain backward compatibility.
* Provides REST API endpoints for system monitoring and control:
* - /api/system/sync/* - Sync orchestrator
* - /api/system/dlq/* - Dead-letter queue
* - /api/system/integrity/* - Integrity checks
* - /api/system/fix/* - Auto-fix routines
* - /api/system/alerts/* - System alerts
* - /metrics - Prometheus metrics
*
* Full implementation available at: src/_deprecated/system/routes/index.ts
* Phase 5: Full Production Sync + Monitoring
*/
import { Router, Request, Response } from 'express';
import { Pool } from 'pg';
import { MetricsService } from '../services';
import {
SyncOrchestrator,
MetricsService,
DLQService,
AlertService,
IntegrityService,
AutoFixService,
} from '../services';
export function createSystemRouter(_pool: Pool): Router {
export function createSystemRouter(pool: Pool): Router {
const router = Router();
// Stub - full sync/dlq/integrity/fix/alerts routes moved to _deprecated
router.get('/status', (_req: Request, res: Response) => {
res.json({
message: 'System routes temporarily disabled - see _deprecated/system/routes',
status: 'stub',
});
// Initialize services
const metrics = new MetricsService(pool);
const dlq = new DLQService(pool);
const alerts = new AlertService(pool);
const integrity = new IntegrityService(pool, alerts);
const autoFix = new AutoFixService(pool, alerts);
const orchestrator = new SyncOrchestrator(pool, metrics, dlq, alerts);
// ============================================================
// SYNC ORCHESTRATOR ENDPOINTS
// ============================================================
/**
* GET /api/system/sync/status
* Get current sync status
*/
router.get('/sync/status', async (_req: Request, res: Response) => {
try {
const status = await orchestrator.getStatus();
res.json(status);
} catch (error) {
console.error('[System] Sync status error:', error);
res.status(500).json({ error: 'Failed to get sync status' });
}
});
/**
* POST /api/system/sync/run
* Trigger a sync run
*/
router.post('/sync/run', async (req: Request, res: Response) => {
try {
const triggeredBy = req.body.triggeredBy || 'api';
const result = await orchestrator.runSync();
res.json({
success: true,
triggeredBy,
metrics: result,
});
} catch (error) {
console.error('[System] Sync run error:', error);
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : 'Sync run failed',
});
}
});
/**
* GET /api/system/sync/queue-depth
* Get queue depth information
*/
router.get('/sync/queue-depth', async (_req: Request, res: Response) => {
try {
const depth = await orchestrator.getQueueDepth();
res.json(depth);
} catch (error) {
console.error('[System] Queue depth error:', error);
res.status(500).json({ error: 'Failed to get queue depth' });
}
});
/**
* GET /api/system/sync/health
* Get sync health status
*/
router.get('/sync/health', async (_req: Request, res: Response) => {
try {
const health = await orchestrator.getHealth();
res.status(health.healthy ? 200 : 503).json(health);
} catch (error) {
console.error('[System] Health check error:', error);
res.status(500).json({ healthy: false, error: 'Health check failed' });
}
});
/**
* POST /api/system/sync/pause
* Pause the orchestrator
*/
router.post('/sync/pause', async (req: Request, res: Response) => {
try {
const reason = req.body.reason || 'Manual pause';
await orchestrator.pause(reason);
res.json({ success: true, message: 'Orchestrator paused' });
} catch (error) {
console.error('[System] Pause error:', error);
res.status(500).json({ error: 'Failed to pause orchestrator' });
}
});
/**
* POST /api/system/sync/resume
* Resume the orchestrator
*/
router.post('/sync/resume', async (_req: Request, res: Response) => {
try {
await orchestrator.resume();
res.json({ success: true, message: 'Orchestrator resumed' });
} catch (error) {
console.error('[System] Resume error:', error);
res.status(500).json({ error: 'Failed to resume orchestrator' });
}
});
// ============================================================
// DLQ ENDPOINTS
// ============================================================
/**
* GET /api/system/dlq
* List DLQ payloads
*/
router.get('/dlq', async (req: Request, res: Response) => {
try {
const options = {
status: req.query.status as string,
errorType: req.query.errorType as string,
dispensaryId: req.query.dispensaryId ? parseInt(req.query.dispensaryId as string) : undefined,
limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
};
const result = await dlq.listPayloads(options);
res.json(result);
} catch (error) {
console.error('[System] DLQ list error:', error);
res.status(500).json({ error: 'Failed to list DLQ payloads' });
}
});
/**
* GET /api/system/dlq/stats
* Get DLQ statistics
*/
router.get('/dlq/stats', async (_req: Request, res: Response) => {
try {
const stats = await dlq.getStats();
res.json(stats);
} catch (error) {
console.error('[System] DLQ stats error:', error);
res.status(500).json({ error: 'Failed to get DLQ stats' });
}
});
/**
* GET /api/system/dlq/summary
* Get DLQ summary by error type
*/
router.get('/dlq/summary', async (_req: Request, res: Response) => {
try {
const summary = await dlq.getSummary();
res.json(summary);
} catch (error) {
console.error('[System] DLQ summary error:', error);
res.status(500).json({ error: 'Failed to get DLQ summary' });
}
});
/**
* GET /api/system/dlq/:id
* Get a specific DLQ payload
*/
router.get('/dlq/:id', async (req: Request, res: Response) => {
try {
const payload = await dlq.getPayload(req.params.id);
if (!payload) {
return res.status(404).json({ error: 'Payload not found' });
}
res.json(payload);
} catch (error) {
console.error('[System] DLQ get error:', error);
res.status(500).json({ error: 'Failed to get DLQ payload' });
}
});
/**
* POST /api/system/dlq/:id/retry
* Retry a DLQ payload
*/
router.post('/dlq/:id/retry', async (req: Request, res: Response) => {
try {
const result = await dlq.retryPayload(req.params.id);
if (result.success) {
res.json(result);
} else {
res.status(400).json(result);
}
} catch (error) {
console.error('[System] DLQ retry error:', error);
res.status(500).json({ error: 'Failed to retry payload' });
}
});
/**
* POST /api/system/dlq/:id/abandon
* Abandon a DLQ payload
*/
router.post('/dlq/:id/abandon', async (req: Request, res: Response) => {
try {
const reason = req.body.reason || 'Manually abandoned';
const abandonedBy = req.body.abandonedBy || 'api';
const success = await dlq.abandonPayload(req.params.id, reason, abandonedBy);
res.json({ success });
} catch (error) {
console.error('[System] DLQ abandon error:', error);
res.status(500).json({ error: 'Failed to abandon payload' });
}
});
/**
* POST /api/system/dlq/bulk-retry
* Bulk retry payloads by error type
*/
router.post('/dlq/bulk-retry', async (req: Request, res: Response) => {
try {
const { errorType } = req.body;
if (!errorType) {
return res.status(400).json({ error: 'errorType is required' });
}
const result = await dlq.bulkRetryByErrorType(errorType);
res.json(result);
} catch (error) {
console.error('[System] DLQ bulk retry error:', error);
res.status(500).json({ error: 'Failed to bulk retry' });
}
});
// ============================================================
// INTEGRITY CHECK ENDPOINTS
// ============================================================
/**
* POST /api/system/integrity/run
* Run all integrity checks
*/
router.post('/integrity/run', async (req: Request, res: Response) => {
try {
const triggeredBy = req.body.triggeredBy || 'api';
const result = await integrity.runAllChecks(triggeredBy);
res.json(result);
} catch (error) {
console.error('[System] Integrity run error:', error);
res.status(500).json({ error: 'Failed to run integrity checks' });
}
});
/**
* GET /api/system/integrity/runs
* Get recent integrity check runs
*/
router.get('/integrity/runs', async (req: Request, res: Response) => {
try {
const limit = req.query.limit ? parseInt(req.query.limit as string) : 10;
const runs = await integrity.getRecentRuns(limit);
res.json(runs);
} catch (error) {
console.error('[System] Integrity runs error:', error);
res.status(500).json({ error: 'Failed to get integrity runs' });
}
});
/**
* GET /api/system/integrity/runs/:runId
* Get results for a specific integrity run
*/
router.get('/integrity/runs/:runId', async (req: Request, res: Response) => {
try {
const results = await integrity.getRunResults(req.params.runId);
res.json(results);
} catch (error) {
console.error('[System] Integrity run results error:', error);
res.status(500).json({ error: 'Failed to get run results' });
}
});
// ============================================================
// AUTO-FIX ENDPOINTS
// ============================================================
/**
* GET /api/system/fix/routines
* Get available fix routines
*/
router.get('/fix/routines', (_req: Request, res: Response) => {
try {
const routines = autoFix.getAvailableRoutines();
res.json(routines);
} catch (error) {
console.error('[System] Get routines error:', error);
res.status(500).json({ error: 'Failed to get routines' });
}
});
/**
* POST /api/system/fix/:routine
* Run a fix routine
*/
router.post('/fix/:routine', async (req: Request, res: Response) => {
try {
const routineName = req.params.routine;
const dryRun = req.body.dryRun === true;
const triggeredBy = req.body.triggeredBy || 'api';
const result = await autoFix.runRoutine(routineName as any, triggeredBy, { dryRun });
res.json(result);
} catch (error) {
console.error('[System] Fix routine error:', error);
res.status(500).json({ error: 'Failed to run fix routine' });
}
});
/**
* GET /api/system/fix/runs
* Get recent fix runs
*/
router.get('/fix/runs', async (req: Request, res: Response) => {
try {
const limit = req.query.limit ? parseInt(req.query.limit as string) : 20;
const runs = await autoFix.getRecentRuns(limit);
res.json(runs);
} catch (error) {
console.error('[System] Fix runs error:', error);
res.status(500).json({ error: 'Failed to get fix runs' });
}
});
// ============================================================
// ALERTS ENDPOINTS
// ============================================================
/**
* GET /api/system/alerts
* List alerts
*/
router.get('/alerts', async (req: Request, res: Response) => {
try {
const options = {
status: req.query.status as any,
severity: req.query.severity as any,
type: req.query.type as string,
limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
};
const result = await alerts.listAlerts(options);
res.json(result);
} catch (error) {
console.error('[System] Alerts list error:', error);
res.status(500).json({ error: 'Failed to list alerts' });
}
});
/**
* GET /api/system/alerts/active
* Get active alerts
*/
router.get('/alerts/active', async (_req: Request, res: Response) => {
try {
const activeAlerts = await alerts.getActiveAlerts();
res.json(activeAlerts);
} catch (error) {
console.error('[System] Active alerts error:', error);
res.status(500).json({ error: 'Failed to get active alerts' });
}
});
/**
* GET /api/system/alerts/summary
* Get alert summary
*/
router.get('/alerts/summary', async (_req: Request, res: Response) => {
try {
const summary = await alerts.getSummary();
res.json(summary);
} catch (error) {
console.error('[System] Alerts summary error:', error);
res.status(500).json({ error: 'Failed to get alerts summary' });
}
});
/**
* POST /api/system/alerts/:id/acknowledge
* Acknowledge an alert
*/
router.post('/alerts/:id/acknowledge', async (req: Request, res: Response) => {
try {
const alertId = parseInt(req.params.id);
const acknowledgedBy = req.body.acknowledgedBy || 'api';
const success = await alerts.acknowledgeAlert(alertId, acknowledgedBy);
res.json({ success });
} catch (error) {
console.error('[System] Acknowledge alert error:', error);
res.status(500).json({ error: 'Failed to acknowledge alert' });
}
});
/**
* POST /api/system/alerts/:id/resolve
* Resolve an alert
*/
router.post('/alerts/:id/resolve', async (req: Request, res: Response) => {
try {
const alertId = parseInt(req.params.id);
const resolvedBy = req.body.resolvedBy || 'api';
const success = await alerts.resolveAlert(alertId, resolvedBy);
res.json({ success });
} catch (error) {
console.error('[System] Resolve alert error:', error);
res.status(500).json({ error: 'Failed to resolve alert' });
}
});
/**
* POST /api/system/alerts/bulk-acknowledge
* Bulk acknowledge alerts
*/
router.post('/alerts/bulk-acknowledge', async (req: Request, res: Response) => {
try {
const { ids, acknowledgedBy } = req.body;
if (!ids || !Array.isArray(ids)) {
return res.status(400).json({ error: 'ids array is required' });
}
const count = await alerts.bulkAcknowledge(ids, acknowledgedBy || 'api');
res.json({ acknowledged: count });
} catch (error) {
console.error('[System] Bulk acknowledge error:', error);
res.status(500).json({ error: 'Failed to bulk acknowledge' });
}
});
// ============================================================
// METRICS ENDPOINTS
// ============================================================
/**
* GET /api/system/metrics
* Get all current metrics
*/
router.get('/metrics', async (_req: Request, res: Response) => {
try {
const allMetrics = await metrics.getAllMetrics();
res.json(allMetrics);
} catch (error) {
console.error('[System] Metrics error:', error);
res.status(500).json({ error: 'Failed to get metrics' });
}
});
/**
* GET /api/system/metrics/:name
* Get a specific metric
*/
router.get('/metrics/:name', async (req: Request, res: Response) => {
try {
const metric = await metrics.getMetric(req.params.name);
if (!metric) {
return res.status(404).json({ error: 'Metric not found' });
}
res.json(metric);
} catch (error) {
console.error('[System] Metric error:', error);
res.status(500).json({ error: 'Failed to get metric' });
}
});
/**
* GET /api/system/metrics/:name/history
* Get metric time series
*/
router.get('/metrics/:name/history', async (req: Request, res: Response) => {
try {
const hours = req.query.hours ? parseInt(req.query.hours as string) : 24;
const history = await metrics.getMetricHistory(req.params.name, hours);
res.json(history);
} catch (error) {
console.error('[System] Metric history error:', error);
res.status(500).json({ error: 'Failed to get metric history' });
}
});
/**
* GET /api/system/errors
* Get error summary
*/
router.get('/errors', async (_req: Request, res: Response) => {
try {
const summary = await metrics.getErrorSummary();
res.json(summary);
} catch (error) {
console.error('[System] Error summary error:', error);
res.status(500).json({ error: 'Failed to get error summary' });
}
});
/**
* GET /api/system/errors/recent
* Get recent errors
*/
router.get('/errors/recent', async (req: Request, res: Response) => {
try {
const limit = req.query.limit ? parseInt(req.query.limit as string) : 50;
const errorType = req.query.type as string;
const errors = await metrics.getRecentErrors(limit, errorType);
res.json(errors);
} catch (error) {
console.error('[System] Recent errors error:', error);
res.status(500).json({ error: 'Failed to get recent errors' });
}
});
/**
* POST /api/system/errors/acknowledge
* Acknowledge errors
*/
router.post('/errors/acknowledge', async (req: Request, res: Response) => {
try {
const { ids, acknowledgedBy } = req.body;
if (!ids || !Array.isArray(ids)) {
return res.status(400).json({ error: 'ids array is required' });
}
const count = await metrics.acknowledgeErrors(ids, acknowledgedBy || 'api');
res.json({ acknowledged: count });
} catch (error) {
console.error('[System] Acknowledge errors error:', error);
res.status(500).json({ error: 'Failed to acknowledge errors' });
}
});
return router;
}
/**
* Create Prometheus metrics endpoint (standalone)
*/
export function createPrometheusRouter(pool: Pool): Router {
const router = Router();
const metrics = new MetricsService(pool);

View File

@@ -4,7 +4,7 @@
* Phase 5: Full Production Sync + Monitoring
*/
// SyncOrchestrator moved to _deprecated (depends on hydration module)
export { SyncOrchestrator, type SyncStatus, type QueueDepth, type SyncRunMetrics, type OrchestratorStatus } from './sync-orchestrator';
export { MetricsService, ERROR_TYPES, type Metric, type MetricTimeSeries, type ErrorBucket, type ErrorType } from './metrics';
export { DLQService, type DLQPayload, type DLQStats } from './dlq';
export { AlertService, type SystemAlert, type AlertSummary, type AlertSeverity, type AlertStatus } from './alerts';

View File

@@ -4,9 +4,8 @@
* Exports all task handlers for the task worker.
*/
export { handleProductDiscovery } from './product-discovery';
export { handleProductRefresh } from './product-refresh';
export { handleProductDiscovery } from './product-discovery';
export { handleStoreDiscovery } from './store-discovery';
export { handleEntryPointDiscovery } from './entry-point-discovery';
export { handleAnalyticsRefresh } from './analytics-refresh';
export { handleWhoami } from './whoami';

View File

@@ -1,80 +0,0 @@
/**
* WhoAmI Handler
* Tests proxy connectivity and anti-detect by fetching public IP
* Reports: proxy IP, fingerprint info, and connection status
*/
import { TaskContext, TaskResult } from '../task-worker';
import { execSync } from 'child_process';
export async function handleWhoami(ctx: TaskContext): Promise<TaskResult> {
const { pool, crawlRotator } = ctx;
console.log('[WhoAmI] Testing proxy and anti-detect...');
try {
// Use the preflight check which tests proxy + anti-detect
if (crawlRotator) {
const preflight = await crawlRotator.preflight();
if (!preflight.passed) {
return {
success: false,
error: preflight.error || 'Preflight check failed',
proxyAvailable: preflight.proxyAvailable,
proxyConnected: preflight.proxyConnected,
antidetectReady: preflight.antidetectReady,
};
}
console.log(`[WhoAmI] Proxy IP: ${preflight.proxyIp}, Response: ${preflight.responseTimeMs}ms`);
console.log(`[WhoAmI] Fingerprint: ${preflight.fingerprint?.browserName}/${preflight.fingerprint?.deviceCategory}`);
return {
success: true,
proxyIp: preflight.proxyIp,
responseTimeMs: preflight.responseTimeMs,
fingerprint: preflight.fingerprint,
proxyAvailable: preflight.proxyAvailable,
proxyConnected: preflight.proxyConnected,
antidetectReady: preflight.antidetectReady,
};
}
// Fallback: Direct proxy test without CrawlRotator
const proxyResult = await pool.query(`
SELECT host, port, username, password
FROM proxies
WHERE is_active = true
LIMIT 1
`);
if (proxyResult.rows.length === 0) {
return { success: false, error: 'No active proxy configured' };
}
const p = proxyResult.rows[0];
const proxyUrl = p.username
? `http://${p.username}:${p.password}@${p.host}:${p.port}`
: `http://${p.host}:${p.port}`;
console.log(`[WhoAmI] Using proxy: ${p.host}:${p.port}`);
// Fetch IP via proxy
const cmd = `curl -s --proxy '${proxyUrl}' 'https://api.ipify.org?format=json'`;
const output = execSync(cmd, { timeout: 30000 }).toString().trim();
const data = JSON.parse(output);
console.log(`[WhoAmI] Proxy IP: ${data.ip}`);
return {
success: true,
proxyIp: data.ip,
proxyHost: p.host,
proxyPort: p.port,
};
} catch (error: any) {
console.error('[WhoAmI] Error:', error.message);
return { success: false, error: error.message };
}
}

View File

@@ -17,8 +17,8 @@ export {
export { TaskWorker, TaskContext, TaskResult } from './task-worker';
export {
handleProductDiscovery,
handleProductRefresh,
handleProductDiscovery,
handleStoreDiscovery,
handleEntryPointDiscovery,
handleAnalyticsRefresh,

View File

@@ -24,16 +24,14 @@ async function tableExists(tableName: string): Promise<boolean> {
// Per TASK_WORKFLOW_2024-12-10.md: Task roles
// payload_fetch: Hits Dutchie API, saves raw payload to filesystem
// product_discovery: Main product crawl handler
// product_refresh: Legacy role (deprecated but kept for compatibility)
// product_refresh: Reads local payload, normalizes, upserts to DB
export type TaskRole =
| 'store_discovery'
| 'entry_point_discovery'
| 'product_discovery'
| 'payload_fetch' // Fetches from API, saves to disk
| 'product_refresh' // DEPRECATED: Use product_discovery instead
| 'analytics_refresh'
| 'whoami'; // Tests proxy + anti-detect connectivity
| 'payload_fetch' // NEW: Fetches from API, saves to disk
| 'product_refresh' // CHANGED: Now reads from local payload
| 'analytics_refresh';
export type TaskStatus =
| 'pending'
@@ -52,7 +50,6 @@ export interface WorkerTask {
platform: string | null;
status: TaskStatus;
priority: number;
method: 'curl' | 'http' | null; // Transport method: curl=axios/proxy, http=Puppeteer/browser
scheduled_for: Date | null;
worker_id: string | null;
claimed_at: Date | null;
@@ -154,33 +151,23 @@ class TaskService {
* Claim a task atomically for a worker
* If role is null, claims ANY available task (role-agnostic worker)
* Returns null if task pool is paused.
*
* @param role - Task role to claim, or null for any task
* @param workerId - Worker ID claiming the task
* @param curlPassed - Whether worker passed curl preflight (default true for backward compat)
* @param httpPassed - Whether worker passed http/Puppeteer preflight (default false)
*/
async claimTask(
role: TaskRole | null,
workerId: string,
curlPassed: boolean = true,
httpPassed: boolean = false
): Promise<WorkerTask | null> {
async claimTask(role: TaskRole | null, workerId: string): Promise<WorkerTask | null> {
// Check if task pool is paused - don't claim any tasks
if (isTaskPoolPaused()) {
return null;
}
if (role) {
// Role-specific claiming - use the SQL function with preflight capabilities
// Role-specific claiming - use the SQL function
const result = await pool.query(
`SELECT * FROM claim_task($1, $2, $3, $4)`,
[role, workerId, curlPassed, httpPassed]
`SELECT * FROM claim_task($1, $2)`,
[role, workerId]
);
return (result.rows[0] as WorkerTask) || null;
}
// Role-agnostic claiming - claim ANY pending task matching worker capabilities
// Role-agnostic claiming - claim ANY pending task
const result = await pool.query(`
UPDATE worker_tasks
SET
@@ -191,12 +178,6 @@ class TaskService {
SELECT id FROM worker_tasks
WHERE status = 'pending'
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
-- Method compatibility: worker must have passed the required preflight
AND (
method IS NULL -- No preference, any worker can claim
OR (method = 'curl' AND $2 = TRUE)
OR (method = 'http' AND $3 = TRUE)
)
-- Exclude stores that already have an active task
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
SELECT dispensary_id FROM worker_tasks
@@ -208,7 +189,7 @@ class TaskService {
FOR UPDATE SKIP LOCKED
)
RETURNING *
`, [workerId, curlPassed, httpPassed]);
`, [workerId]);
return (result.rows[0] as WorkerTask) || null;
}
@@ -249,24 +230,6 @@ class TaskService {
);
}
/**
* Release a claimed task back to pending (e.g., when preflight fails)
* This allows another worker to pick it up.
*/
async releaseTask(taskId: number): Promise<void> {
await pool.query(
`UPDATE worker_tasks
SET status = 'pending',
worker_id = NULL,
claimed_at = NULL,
started_at = NULL,
updated_at = NOW()
WHERE id = $1 AND status IN ('claimed', 'running')`,
[taskId]
);
console.log(`[TaskService] Task ${taskId} released back to pending`);
}
/**
* Mark a task as failed, with auto-retry if under max_retries
* Returns true if task was re-queued for retry, false if permanently failed

View File

@@ -51,10 +51,6 @@ import os from 'os';
import { CrawlRotator } from '../services/crawl-rotator';
import { setCrawlRotator } from '../platforms/dutchie';
// Dual-transport preflight system
import { runCurlPreflight, CurlPreflightResult } from '../services/curl-preflight';
import { runPuppeteerPreflightWithRetry, PuppeteerPreflightResult } from '../services/puppeteer-preflight';
// Task handlers by role
// Per TASK_WORKFLOW_2024-12-10.md: payload_fetch and product_refresh are now separate
import { handlePayloadFetch } from './handlers/payload-fetch';
@@ -63,59 +59,16 @@ import { handleProductDiscovery } from './handlers/product-discovery';
import { handleStoreDiscovery } from './handlers/store-discovery';
import { handleEntryPointDiscovery } from './handlers/entry-point-discovery';
import { handleAnalyticsRefresh } from './handlers/analytics-refresh';
import { handleWhoami } from './handlers/whoami';
const POLL_INTERVAL_MS = parseInt(process.env.POLL_INTERVAL_MS || '5000');
const HEARTBEAT_INTERVAL_MS = parseInt(process.env.HEARTBEAT_INTERVAL_MS || '30000');
const API_BASE_URL = process.env.API_BASE_URL || 'http://localhost:3010';
// =============================================================================
// CONCURRENT TASK PROCESSING SETTINGS
// =============================================================================
// Workers can process multiple tasks simultaneously using async I/O.
// This improves throughput for I/O-bound tasks (network calls, DB queries).
//
// Resource thresholds trigger "backoff" - the worker stops claiming new tasks
// but continues processing existing ones until resources return to normal.
//
// See: docs/WORKER_TASK_ARCHITECTURE.md#concurrent-task-processing
// =============================================================================
// Maximum number of tasks this worker will run concurrently
// Tune based on workload: I/O-bound tasks benefit from higher concurrency
const MAX_CONCURRENT_TASKS = parseInt(process.env.MAX_CONCURRENT_TASKS || '3');
// When heap memory usage exceeds this threshold (as decimal 0.0-1.0), stop claiming new tasks
// Default 85% - gives headroom before OOM
const MEMORY_BACKOFF_THRESHOLD = parseFloat(process.env.MEMORY_BACKOFF_THRESHOLD || '0.85');
// Parse max heap size from NODE_OPTIONS (--max-old-space-size=1500)
// This is used as the denominator for memory percentage calculation
// V8's heapTotal is dynamic and stays small when idle, causing false high percentages
function getMaxHeapSizeMb(): number {
const nodeOptions = process.env.NODE_OPTIONS || '';
const match = nodeOptions.match(/--max-old-space-size=(\d+)/);
if (match) {
return parseInt(match[1], 10);
}
// Fallback: use 512MB if not specified
return 512;
}
const MAX_HEAP_SIZE_MB = getMaxHeapSizeMb();
// When CPU usage exceeds this threshold (as decimal 0.0-1.0), stop claiming new tasks
// Default 90% - allows some burst capacity
const CPU_BACKOFF_THRESHOLD = parseFloat(process.env.CPU_BACKOFF_THRESHOLD || '0.90');
// How long to wait (ms) when in backoff state before rechecking resources
const BACKOFF_DURATION_MS = parseInt(process.env.BACKOFF_DURATION_MS || '10000');
export interface TaskContext {
pool: Pool;
workerId: string;
task: WorkerTask;
heartbeat: () => Promise<void>;
crawlRotator?: CrawlRotator;
}
export interface TaskResult {
@@ -130,38 +83,17 @@ export interface TaskResult {
type TaskHandler = (ctx: TaskContext) => Promise<TaskResult>;
// Per TASK_WORKFLOW_2024-12-10.md: Handler registry
// payload_fetch: Fetches from Dutchie API, saves to disk
// payload_fetch: Fetches from Dutchie API, saves to disk, chains to product_refresh
// product_refresh: Reads local payload, normalizes, upserts to DB
// product_discovery: Main handler for product crawling
const TASK_HANDLERS: Record<TaskRole, TaskHandler> = {
payload_fetch: handlePayloadFetch, // API fetch -> disk
product_refresh: handleProductRefresh, // disk -> DB
payload_fetch: handlePayloadFetch, // NEW: API fetch -> disk
product_refresh: handleProductRefresh, // CHANGED: disk -> DB
product_discovery: handleProductDiscovery,
store_discovery: handleStoreDiscovery,
entry_point_discovery: handleEntryPointDiscovery,
analytics_refresh: handleAnalyticsRefresh,
whoami: handleWhoami, // Tests proxy + anti-detect
};
/**
* Resource usage stats reported to the registry and used for backoff decisions.
* These values are included in worker heartbeats and displayed in the UI.
*/
interface ResourceStats {
/** Current heap memory usage as decimal (0.0 to 1.0) */
memoryPercent: number;
/** Current heap used in MB */
memoryMb: number;
/** Total heap available in MB */
memoryTotalMb: number;
/** CPU usage percentage since last check (0 to 100) */
cpuPercent: number;
/** True if worker is currently in backoff state */
isBackingOff: boolean;
/** Reason for backoff (e.g., "Memory at 87.3% (threshold: 85%)") */
backoffReason: string | null;
}
export class TaskWorker {
private pool: Pool;
private workerId: string;
@@ -170,125 +102,14 @@ export class TaskWorker {
private isRunning: boolean = false;
private heartbeatInterval: NodeJS.Timeout | null = null;
private registryHeartbeatInterval: NodeJS.Timeout | null = null;
private currentTask: WorkerTask | null = null;
private crawlRotator: CrawlRotator;
// ==========================================================================
// CONCURRENT TASK TRACKING
// ==========================================================================
// activeTasks: Map of task ID -> task object for all currently running tasks
// taskPromises: Map of task ID -> Promise for cleanup when task completes
// maxConcurrentTasks: How many tasks this worker will run in parallel
// ==========================================================================
private activeTasks: Map<number, WorkerTask> = new Map();
private taskPromises: Map<number, Promise<void>> = new Map();
private maxConcurrentTasks: number = MAX_CONCURRENT_TASKS;
// ==========================================================================
// RESOURCE MONITORING FOR BACKOFF
// ==========================================================================
// CPU tracking uses differential measurement - we track last values and
// calculate percentage based on elapsed time since last check.
// ==========================================================================
private lastCpuUsage: { user: number; system: number } = { user: 0, system: 0 };
private lastCpuCheck: number = Date.now();
private isBackingOff: boolean = false;
private backoffReason: string | null = null;
// ==========================================================================
// DUAL-TRANSPORT PREFLIGHT STATUS
// ==========================================================================
// Workers run BOTH preflights on startup:
// - curl: axios/proxy transport - fast, for simple API calls
// - http: Puppeteer/browser transport - anti-detect, for Dutchie GraphQL
//
// Task claiming checks method compatibility - worker must have passed
// the preflight for the task's required method.
// ==========================================================================
private preflightCurlPassed: boolean = false;
private preflightHttpPassed: boolean = false;
private preflightCurlResult: CurlPreflightResult | null = null;
private preflightHttpResult: PuppeteerPreflightResult | null = null;
constructor(role: TaskRole | null = null, workerId?: string) {
this.pool = getPool();
this.role = role;
this.workerId = workerId || `worker-${uuidv4().slice(0, 8)}`;
this.crawlRotator = new CrawlRotator(this.pool);
// Initialize CPU tracking
const cpuUsage = process.cpuUsage();
this.lastCpuUsage = { user: cpuUsage.user, system: cpuUsage.system };
this.lastCpuCheck = Date.now();
}
/**
* Get current resource usage
* Memory percentage is calculated against MAX_HEAP_SIZE_MB (from --max-old-space-size)
* NOT against V8's dynamic heapTotal which stays small when idle
*/
private getResourceStats(): ResourceStats {
const memUsage = process.memoryUsage();
const heapUsedMb = memUsage.heapUsed / 1024 / 1024;
// Use MAX_HEAP_SIZE_MB as ceiling, not dynamic heapTotal
// V8's heapTotal stays small when idle (e.g., 36MB) causing false 95%+ readings
// With --max-old-space-size=1500, we should calculate against 1500MB
const memoryPercent = heapUsedMb / MAX_HEAP_SIZE_MB;
// Calculate CPU usage since last check
const cpuUsage = process.cpuUsage();
const now = Date.now();
const elapsed = now - this.lastCpuCheck;
let cpuPercent = 0;
if (elapsed > 0) {
const userDiff = (cpuUsage.user - this.lastCpuUsage.user) / 1000; // microseconds to ms
const systemDiff = (cpuUsage.system - this.lastCpuUsage.system) / 1000;
cpuPercent = ((userDiff + systemDiff) / elapsed) * 100;
}
// Update last values
this.lastCpuUsage = { user: cpuUsage.user, system: cpuUsage.system };
this.lastCpuCheck = now;
return {
memoryPercent,
memoryMb: Math.round(heapUsedMb),
memoryTotalMb: MAX_HEAP_SIZE_MB, // Use max-old-space-size, not dynamic heapTotal
cpuPercent: Math.min(100, cpuPercent), // Cap at 100%
isBackingOff: this.isBackingOff,
backoffReason: this.backoffReason,
};
}
/**
* Check if we should back off from taking new tasks
*/
private shouldBackOff(): { backoff: boolean; reason: string | null } {
const stats = this.getResourceStats();
if (stats.memoryPercent > MEMORY_BACKOFF_THRESHOLD) {
return { backoff: true, reason: `Memory at ${(stats.memoryPercent * 100).toFixed(1)}% (threshold: ${MEMORY_BACKOFF_THRESHOLD * 100}%)` };
}
if (stats.cpuPercent > CPU_BACKOFF_THRESHOLD * 100) {
return { backoff: true, reason: `CPU at ${stats.cpuPercent.toFixed(1)}% (threshold: ${CPU_BACKOFF_THRESHOLD * 100}%)` };
}
return { backoff: false, reason: null };
}
/**
* Get count of currently running tasks
*/
get activeTaskCount(): number {
return this.activeTasks.size;
}
/**
* Check if we can accept more tasks
*/
private canAcceptMoreTasks(): boolean {
return this.activeTasks.size < this.maxConcurrentTasks;
}
/**
@@ -371,99 +192,6 @@ export class TaskWorker {
}
}
/**
* Run dual-transport preflights on startup
* Tests both curl (axios/proxy) and http (Puppeteer/browser) transport methods.
* Results are reported to worker_registry and used for task claiming.
*
* NOTE: All current tasks require 'http' method, so http preflight must pass
* for the worker to claim any tasks. Curl preflight is for future use.
*/
private async runDualPreflights(): Promise<void> {
console.log(`[TaskWorker] Running dual-transport preflights...`);
// Run both preflights in parallel for efficiency
const [curlResult, httpResult] = await Promise.all([
runCurlPreflight(this.crawlRotator).catch((err): CurlPreflightResult => ({
method: 'curl',
passed: false,
proxyAvailable: false,
proxyConnected: false,
antidetectReady: false,
proxyIp: null,
fingerprint: null,
error: `Preflight error: ${err.message}`,
responseTimeMs: null,
})),
runPuppeteerPreflightWithRetry(this.crawlRotator, 1).catch((err): PuppeteerPreflightResult => ({
method: 'http',
passed: false,
proxyAvailable: false,
proxyConnected: false,
antidetectReady: false,
proxyIp: null,
fingerprint: null,
error: `Preflight error: ${err.message}`,
responseTimeMs: null,
productsReturned: 0,
})),
]);
// Store results
this.preflightCurlResult = curlResult;
this.preflightHttpResult = httpResult;
this.preflightCurlPassed = curlResult.passed;
this.preflightHttpPassed = httpResult.passed;
// Log results
console.log(`[TaskWorker] CURL preflight: ${curlResult.passed ? 'PASSED' : 'FAILED'}${curlResult.error ? ` - ${curlResult.error}` : ''}`);
console.log(`[TaskWorker] HTTP preflight: ${httpResult.passed ? 'PASSED' : 'FAILED'}${httpResult.error ? ` - ${httpResult.error}` : ''}`);
if (httpResult.passed && httpResult.productsReturned) {
console.log(`[TaskWorker] HTTP preflight returned ${httpResult.productsReturned} products from test store`);
}
// Report to worker_registry via API
await this.reportPreflightStatus();
// Since all tasks require 'http', warn if http preflight failed
if (!this.preflightHttpPassed) {
console.warn(`[TaskWorker] WARNING: HTTP preflight failed - this worker cannot claim any tasks!`);
console.warn(`[TaskWorker] Error: ${httpResult.error}`);
}
}
/**
* Report preflight status to worker_registry
*/
private async reportPreflightStatus(): Promise<void> {
try {
// Update worker_registry directly via SQL (more reliable than API)
await this.pool.query(`
SELECT update_worker_preflight($1, 'curl', $2, $3, $4)
`, [
this.workerId,
this.preflightCurlPassed ? 'passed' : 'failed',
this.preflightCurlResult?.responseTimeMs || null,
this.preflightCurlResult?.error || null,
]);
await this.pool.query(`
SELECT update_worker_preflight($1, 'http', $2, $3, $4)
`, [
this.workerId,
this.preflightHttpPassed ? 'passed' : 'failed',
this.preflightHttpResult?.responseTimeMs || null,
this.preflightHttpResult?.error || null,
]);
console.log(`[TaskWorker] Preflight status reported to worker_registry`);
} catch (err: any) {
// Non-fatal - worker can still function
console.warn(`[TaskWorker] Could not report preflight status: ${err.message}`);
}
}
/**
* Register worker with the registry (get friendly name)
*/
@@ -524,32 +252,21 @@ export class TaskWorker {
const memUsage = process.memoryUsage();
const cpuUsage = process.cpuUsage();
const proxyLocation = this.crawlRotator.getProxyLocation();
const resourceStats = this.getResourceStats();
// Get array of active task IDs
const activeTaskIds = Array.from(this.activeTasks.keys());
await fetch(`${API_BASE_URL}/api/worker-registry/heartbeat`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
worker_id: this.workerId,
current_task_id: activeTaskIds[0] || null, // Primary task for backwards compat
current_task_ids: activeTaskIds, // All active tasks
active_task_count: this.activeTasks.size,
max_concurrent_tasks: this.maxConcurrentTasks,
status: this.activeTasks.size > 0 ? 'active' : 'idle',
current_task_id: this.currentTask?.id || null,
status: this.currentTask ? 'active' : 'idle',
resources: {
memory_mb: Math.round(memUsage.heapUsed / 1024 / 1024),
memory_total_mb: Math.round(memUsage.heapTotal / 1024 / 1024),
memory_rss_mb: Math.round(memUsage.rss / 1024 / 1024),
memory_percent: Math.round(resourceStats.memoryPercent * 100),
cpu_user_ms: Math.round(cpuUsage.user / 1000),
cpu_system_ms: Math.round(cpuUsage.system / 1000),
cpu_percent: Math.round(resourceStats.cpuPercent),
proxy_location: proxyLocation,
is_backing_off: this.isBackingOff,
backoff_reason: this.backoffReason,
}
})
});
@@ -607,119 +324,24 @@ export class TaskWorker {
// Register with the API to get a friendly name
await this.register();
// Run dual-transport preflights
await this.runDualPreflights();
// Start registry heartbeat
this.startRegistryHeartbeat();
const roleMsg = this.role ? `for role: ${this.role}` : '(role-agnostic - any task)';
const preflightMsg = `curl=${this.preflightCurlPassed ? '✓' : '✗'} http=${this.preflightHttpPassed ? '✓' : '✗'}`;
console.log(`[TaskWorker] ${this.friendlyName} starting ${roleMsg} (${preflightMsg}, max ${this.maxConcurrentTasks} concurrent tasks)`);
console.log(`[TaskWorker] ${this.friendlyName} starting ${roleMsg}`);
while (this.isRunning) {
try {
await this.mainLoop();
await this.processNextTask();
} catch (error: any) {
console.error(`[TaskWorker] Loop error:`, error.message);
await this.sleep(POLL_INTERVAL_MS);
}
}
// Wait for any remaining tasks to complete
if (this.taskPromises.size > 0) {
console.log(`[TaskWorker] Waiting for ${this.taskPromises.size} active tasks to complete...`);
await Promise.allSettled(this.taskPromises.values());
}
console.log(`[TaskWorker] Worker ${this.workerId} stopped`);
}
/**
* Main loop - tries to fill up to maxConcurrentTasks
*/
private async mainLoop(): Promise<void> {
// Check resource usage and backoff if needed
const { backoff, reason } = this.shouldBackOff();
if (backoff) {
if (!this.isBackingOff) {
console.log(`[TaskWorker] ${this.friendlyName} backing off: ${reason}`);
}
this.isBackingOff = true;
this.backoffReason = reason;
await this.sleep(BACKOFF_DURATION_MS);
return;
}
// Clear backoff state
if (this.isBackingOff) {
console.log(`[TaskWorker] ${this.friendlyName} resuming normal operation`);
this.isBackingOff = false;
this.backoffReason = null;
}
// Check for decommission signal
const shouldDecommission = await this.checkDecommission();
if (shouldDecommission) {
console.log(`[TaskWorker] ${this.friendlyName} received decommission signal - waiting for ${this.activeTasks.size} tasks to complete`);
// Stop accepting new tasks, wait for current to finish
this.isRunning = false;
return;
}
// Try to claim more tasks if we have capacity
if (this.canAcceptMoreTasks()) {
// Pass preflight capabilities to only claim compatible tasks
const task = await taskService.claimTask(
this.role,
this.workerId,
this.preflightCurlPassed,
this.preflightHttpPassed
);
if (task) {
console.log(`[TaskWorker] ${this.friendlyName} claimed task ${task.id} (${task.role}) [${this.activeTasks.size + 1}/${this.maxConcurrentTasks}]`);
// =================================================================
// PREFLIGHT CHECK - CRITICAL: Worker MUST pass before task execution
// Verifies: 1) Proxy available 2) Proxy connected 3) Anti-detect ready
// =================================================================
const preflight = await this.crawlRotator.preflight();
if (!preflight.passed) {
console.log(`[TaskWorker] ${this.friendlyName} PREFLIGHT FAILED for task ${task.id}: ${preflight.error}`);
console.log(`[TaskWorker] Releasing task ${task.id} back to pending - worker cannot proceed without proxy/anti-detect`);
// Release task back to pending so another worker can pick it up
await taskService.releaseTask(task.id);
// Wait before trying again - give proxies time to recover
await this.sleep(30000); // 30 second wait on preflight failure
return;
}
console.log(`[TaskWorker] ${this.friendlyName} preflight PASSED for task ${task.id} (proxy: ${preflight.proxyIp}, ${preflight.responseTimeMs}ms)`);
this.activeTasks.set(task.id, task);
// Start task in background (don't await)
const taskPromise = this.executeTask(task);
this.taskPromises.set(task.id, taskPromise);
// Clean up when done
taskPromise.finally(() => {
this.activeTasks.delete(task.id);
this.taskPromises.delete(task.id);
});
// Immediately try to claim more tasks (don't wait for poll interval)
return;
}
}
// No task claimed or at capacity - wait before next poll
await this.sleep(POLL_INTERVAL_MS);
}
/**
* Stop the worker
*/
@@ -732,10 +354,23 @@ export class TaskWorker {
}
/**
* Execute a single task (runs concurrently with other tasks)
* Process the next available task
*/
private async executeTask(task: WorkerTask): Promise<void> {
console.log(`[TaskWorker] ${this.friendlyName} starting task ${task.id} (${task.role}) for dispensary ${task.dispensary_id || 'N/A'}`);
private async processNextTask(): Promise<void> {
// Try to claim a task
const task = await taskService.claimTask(this.role, this.workerId);
if (!task) {
// No tasks available, wait and retry
await this.sleep(POLL_INTERVAL_MS);
return;
}
this.currentTask = task;
console.log(`[TaskWorker] Claimed task ${task.id} (${task.role}) for dispensary ${task.dispensary_id || 'N/A'}`);
// Start heartbeat
this.startHeartbeat(task.id);
try {
// Mark as running
@@ -755,7 +390,6 @@ export class TaskWorker {
heartbeat: async () => {
await taskService.heartbeat(task.id);
},
crawlRotator: this.crawlRotator,
};
// Execute the task
@@ -765,7 +399,7 @@ export class TaskWorker {
// Mark as completed
await taskService.completeTask(task.id, result);
await this.reportTaskCompletion(true);
console.log(`[TaskWorker] ${this.friendlyName} completed task ${task.id} [${this.activeTasks.size}/${this.maxConcurrentTasks} active]`);
console.log(`[TaskWorker] ${this.friendlyName} completed task ${task.id}`);
// Chain next task if applicable
const chainedTask = await taskService.chainNextTask({
@@ -787,35 +421,9 @@ export class TaskWorker {
await taskService.failTask(task.id, error.message);
await this.reportTaskCompletion(false);
console.error(`[TaskWorker] ${this.friendlyName} task ${task.id} error:`, error.message);
}
// Note: cleanup (removing from activeTasks) is handled in mainLoop's finally block
}
/**
* Check if this worker has been flagged for decommission
* Returns true if worker should stop after current task
*/
private async checkDecommission(): Promise<boolean> {
try {
// Check worker_registry for decommission flag
const result = await this.pool.query(
`SELECT decommission_requested, decommission_reason
FROM worker_registry
WHERE worker_id = $1`,
[this.workerId]
);
if (result.rows.length > 0 && result.rows[0].decommission_requested) {
const reason = result.rows[0].decommission_reason || 'No reason provided';
console.log(`[TaskWorker] Decommission requested: ${reason}`);
return true;
}
return false;
} catch (error: any) {
// If we can't check, continue running
console.warn(`[TaskWorker] Could not check decommission status: ${error.message}`);
return false;
} finally {
this.stopHeartbeat();
this.currentTask = null;
}
}
@@ -852,29 +460,12 @@ export class TaskWorker {
/**
* Get worker info
*/
getInfo(): {
workerId: string;
role: TaskRole | null;
isRunning: boolean;
activeTaskIds: number[];
activeTaskCount: number;
maxConcurrentTasks: number;
isBackingOff: boolean;
backoffReason: string | null;
preflightCurlPassed: boolean;
preflightHttpPassed: boolean;
} {
getInfo(): { workerId: string; role: TaskRole | null; isRunning: boolean; currentTaskId: number | null } {
return {
workerId: this.workerId,
role: this.role,
isRunning: this.isRunning,
activeTaskIds: Array.from(this.activeTasks.keys()),
activeTaskCount: this.activeTasks.size,
maxConcurrentTasks: this.maxConcurrentTasks,
isBackingOff: this.isBackingOff,
backoffReason: this.backoffReason,
preflightCurlPassed: this.preflightCurlPassed,
preflightHttpPassed: this.preflightHttpPassed,
currentTaskId: this.currentTask?.id || null,
};
}
}
@@ -891,8 +482,8 @@ async function main(): Promise<void> {
'store_discovery',
'entry_point_discovery',
'product_discovery',
'payload_fetch', // Fetches from API, saves to disk
'product_refresh', // Reads from disk, processes to DB
'payload_fetch', // NEW: Fetches from API, saves to disk
'product_refresh', // CHANGED: Reads from disk, processes to DB
'analytics_refresh',
];

View File

@@ -1,180 +0,0 @@
/**
* Stealth Browser Payload Capture - Direct GraphQL Injection
*
* Uses the browser session to make GraphQL requests that look organic.
* Adds proper headers matching what Dutchie's frontend sends.
*/
const puppeteer = require('puppeteer-extra');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
const fs = require('fs');
puppeteer.use(StealthPlugin());
async function capturePayload(config) {
const {
dispensaryId = null,
platformId,
cName,
outputPath = `/tmp/payload_${cName}_${Date.now()}.json`,
} = config;
const browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const page = await browser.newPage();
// Establish session by visiting the embedded menu
const embedUrl = `https://dutchie.com/embedded-menu/${cName}?menuType=rec`;
console.log(`[Capture] Establishing session at ${embedUrl}...`);
await page.goto(embedUrl, {
waitUntil: 'networkidle2',
timeout: 60000
});
console.log('[Capture] Session established, fetching ALL products...');
// Fetch all products using GET requests with proper headers
const result = await page.evaluate(async (platformId, cName) => {
const allProducts = [];
const logs = [];
let pageNum = 0;
const perPage = 100;
let totalCount = 0;
const sessionId = 'browser-session-' + Date.now();
try {
while (pageNum < 30) { // Max 30 pages = 3000 products
const variables = {
includeEnterpriseSpecials: false,
productsFilter: {
dispensaryId: platformId,
pricingType: 'rec',
Status: 'Active', // 'Active' for in-stock products per CLAUDE.md
types: [],
useCache: true,
isDefaultSort: true,
sortBy: 'popularSortIdx',
sortDirection: 1,
bypassOnlineThresholds: true,
isKioskMenu: false,
removeProductsBelowOptionThresholds: false,
},
page: pageNum,
perPage: perPage,
};
const extensions = {
persistedQuery: {
version: 1,
sha256Hash: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0'
}
};
// Build GET URL like the browser does
const qs = new URLSearchParams({
operationName: 'FilteredProducts',
variables: JSON.stringify(variables),
extensions: JSON.stringify(extensions)
});
const url = `https://dutchie.com/api-3/graphql?${qs.toString()}`;
const response = await fetch(url, {
method: 'GET',
headers: {
'Accept': 'application/json',
'content-type': 'application/json',
'x-dutchie-session': sessionId,
'apollographql-client-name': 'Marketplace (production)',
},
credentials: 'include'
});
logs.push(`Page ${pageNum}: HTTP ${response.status}`);
if (!response.ok) {
const text = await response.text();
logs.push(`HTTP error: ${response.status} - ${text.slice(0, 200)}`);
break;
}
const json = await response.json();
if (json.errors) {
logs.push(`GraphQL error: ${JSON.stringify(json.errors).slice(0, 200)}`);
break;
}
const data = json?.data?.filteredProducts;
if (!data || !data.products) {
logs.push('No products in response');
break;
}
const products = data.products;
allProducts.push(...products);
if (pageNum === 0) {
totalCount = data.queryInfo?.totalCount || 0;
logs.push(`Total reported: ${totalCount}`);
}
logs.push(`Got ${products.length} products (total: ${allProducts.length}/${totalCount})`);
if (allProducts.length >= totalCount || products.length < perPage) {
break;
}
pageNum++;
// Small delay between pages to be polite
await new Promise(r => setTimeout(r, 200));
}
} catch (err) {
logs.push(`Error: ${err.message}`);
}
return { products: allProducts, totalCount, logs };
}, platformId, cName);
await browser.close();
// Print logs from browser context
result.logs.forEach(log => console.log(`[Browser] ${log}`));
console.log(`[Capture] Got ${result.products.length} products (API reported ${result.totalCount})`);
const payload = {
dispensaryId: dispensaryId,
platformId: platformId,
cName,
fetchedAt: new Date().toISOString(),
productCount: result.products.length,
products: result.products,
};
fs.writeFileSync(outputPath, JSON.stringify(payload, null, 2));
console.log(`\n=== Capture Complete ===`);
console.log(`Total products: ${result.products.length}`);
console.log(`Saved to: ${outputPath}`);
console.log(`File size: ${(fs.statSync(outputPath).size / 1024).toFixed(1)} KB`);
return payload;
}
// Run
(async () => {
const payload = await capturePayload({
cName: 'AZ-Deeply-Rooted',
platformId: '6405ef617056e8014d79101b',
});
if (payload.products.length > 0) {
const sample = payload.products[0];
console.log(`\nSample: ${sample.Name || sample.name} - ${sample.brand?.name || sample.brandName}`);
}
})().catch(console.error);

View File

@@ -14,5 +14,5 @@
"allowSyntheticDefaultImports": true
},
"include": ["src/**/*"],
"exclude": ["node_modules", "dist", "src/**/*.test.ts", "src/**/__tests__/**", "src/_deprecated/**"]
"exclude": ["node_modules", "dist", "src/**/*.test.ts", "src/**/__tests__/**"]
}

View File

@@ -2,7 +2,7 @@
<html lang="en">
<head>
<meta charset="UTF-8" />
<link rel="icon" type="image/svg+xml" href="/favicon.svg" />
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>CannaIQ - Cannabis Menu Intelligence Platform</title>
<meta name="description" content="CannaIQ provides real-time cannabis dispensary menu data, product tracking, and analytics for dispensaries across Arizona." />

View File

@@ -1,5 +0,0 @@
<svg viewBox="0 0 32 32" xmlns="http://www.w3.org/2000/svg">
<rect width="32" height="32" rx="6" fill="#059669"/>
<path d="M16 6C12.5 6 9.5 7.5 7.5 10L16 16L24.5 10C22.5 7.5 19.5 6 16 6Z" fill="white"/>
<path d="M7.5 10C6 12 5 14.5 5 17C5 22.5 10 26 16 26C22 26 27 22.5 27 17C27 14.5 26 12 24.5 10L16 16L7.5 10Z" fill="white" fill-opacity="0.7"/>
</svg>

Before

Width:  |  Height:  |  Size: 360 B

View File

@@ -47,6 +47,7 @@ import CrossStateCompare from './pages/CrossStateCompare';
import StateDetail from './pages/StateDetail';
import { Discovery } from './pages/Discovery';
import { WorkersDashboard } from './pages/WorkersDashboard';
import { JobQueue } from './pages/JobQueue';
import TasksDashboard from './pages/TasksDashboard';
import { ScraperOverviewDashboard } from './pages/ScraperOverviewDashboard';
import { SeoOrchestrator } from './pages/admin/seo/SeoOrchestrator';
@@ -124,6 +125,8 @@ export default function App() {
<Route path="/discovery" element={<PrivateRoute><Discovery /></PrivateRoute>} />
{/* Workers Dashboard */}
<Route path="/workers" element={<PrivateRoute><WorkersDashboard /></PrivateRoute>} />
{/* Job Queue Management */}
<Route path="/job-queue" element={<PrivateRoute><JobQueue /></PrivateRoute>} />
{/* Task Queue Dashboard */}
<Route path="/tasks" element={<PrivateRoute><TasksDashboard /></PrivateRoute>} />
{/* Scraper Overview Dashboard (new primary) */}

View File

@@ -1,7 +1,8 @@
import { ReactNode, useEffect, useState, useRef } from 'react';
import { useNavigate, useLocation, Link } from 'react-router-dom';
import { ReactNode, useEffect, useState } from 'react';
import { useNavigate, useLocation } from 'react-router-dom';
import { useAuthStore } from '../store/authStore';
import { api } from '../lib/api';
import { StateSelector } from './StateSelector';
import {
LayoutDashboard,
Building2,
@@ -47,8 +48,8 @@ interface NavLinkProps {
function NavLink({ to, icon, label, isActive }: NavLinkProps) {
return (
<Link
to={to}
<a
href={to}
className={`flex items-center gap-3 px-3 py-2 rounded-lg text-sm font-medium transition-colors ${
isActive
? 'bg-emerald-50 text-emerald-700'
@@ -57,7 +58,7 @@ function NavLink({ to, icon, label, isActive }: NavLinkProps) {
>
<span className={`flex-shrink-0 ${isActive ? 'text-emerald-600' : 'text-gray-400'}`}>{icon}</span>
<span>{label}</span>
</Link>
</a>
);
}
@@ -85,8 +86,6 @@ export function Layout({ children }: LayoutProps) {
const { user, logout } = useAuthStore();
const [versionInfo, setVersionInfo] = useState<VersionInfo | null>(null);
const [sidebarOpen, setSidebarOpen] = useState(false);
const navRef = useRef<HTMLElement>(null);
const scrollPositionRef = useRef<number>(0);
useEffect(() => {
const fetchVersion = async () => {
@@ -112,34 +111,16 @@ export function Layout({ children }: LayoutProps) {
return location.pathname.startsWith(path);
};
// Save scroll position before route change
useEffect(() => {
const nav = navRef.current;
if (nav) {
const handleScroll = () => {
scrollPositionRef.current = nav.scrollTop;
};
nav.addEventListener('scroll', handleScroll);
return () => nav.removeEventListener('scroll', handleScroll);
}
}, []);
// Restore scroll position after route change and close mobile sidebar
// Close sidebar on route change (mobile)
useEffect(() => {
setSidebarOpen(false);
// Restore scroll position after render
requestAnimationFrame(() => {
if (navRef.current) {
navRef.current.scrollTop = scrollPositionRef.current;
}
});
}, [location.pathname]);
const sidebarContent = (
<>
{/* Logo/Brand */}
<div className="px-6 py-5 border-b border-gray-200">
<Link to="/dashboard" className="flex items-center gap-3 hover:opacity-80 transition-opacity">
<div className="flex items-center gap-3">
<div className="w-8 h-8 bg-emerald-600 rounded-lg flex items-center justify-center">
<svg viewBox="0 0 24 24" className="w-5 h-5 text-white" fill="currentColor">
<path d="M12 2C8.5 2 5.5 3.5 3.5 6L12 12L20.5 6C18.5 3.5 15.5 2 12 2Z" />
@@ -154,13 +135,17 @@ export function Layout({ children }: LayoutProps) {
</p>
)}
</div>
</Link>
</div>
<p className="text-xs text-gray-500 mt-2 truncate">{user?.email}</p>
</div>
{/* State Selector */}
<div className="px-4 py-3 border-b border-gray-200 bg-gray-50">
<StateSelector showLabel={false} />
</div>
{/* Navigation */}
<nav ref={navRef} className="flex-1 px-3 py-4 space-y-6 overflow-y-auto">
<nav className="flex-1 px-3 py-4 space-y-6 overflow-y-auto">
<NavSection title="Main">
<NavLink to="/dashboard" icon={<LayoutDashboard className="w-4 h-4" />} label="Dashboard" isActive={isActive('/dashboard', true)} />
<NavLink to="/dispensaries" icon={<Building2 className="w-4 h-4" />} label="Dispensaries" isActive={isActive('/dispensaries')} />
@@ -179,7 +164,8 @@ export function Layout({ children }: LayoutProps) {
<NavLink to="/admin/orchestrator" icon={<Activity className="w-4 h-4" />} label="Orchestrator" isActive={isActive('/admin/orchestrator')} />
<NavLink to="/users" icon={<UserCog className="w-4 h-4" />} label="Users" isActive={isActive('/users')} />
<NavLink to="/workers" icon={<Users className="w-4 h-4" />} label="Workers" isActive={isActive('/workers')} />
<NavLink to="/tasks" icon={<ListChecks className="w-4 h-4" />} label="Tasks" isActive={isActive('/tasks')} />
<NavLink to="/job-queue" icon={<ListOrdered className="w-4 h-4" />} label="Job Queue" isActive={isActive('/job-queue')} />
<NavLink to="/tasks" icon={<ListChecks className="w-4 h-4" />} label="Task Queue" isActive={isActive('/tasks')} />
<NavLink to="/admin/seo" icon={<FileText className="w-4 h-4" />} label="SEO Pages" isActive={isActive('/admin/seo')} />
<NavLink to="/proxies" icon={<Shield className="w-4 h-4" />} label="Proxies" isActive={isActive('/proxies')} />
<NavLink to="/api-permissions" icon={<Key className="w-4 h-4" />} label="API Keys" isActive={isActive('/api-permissions')} />
@@ -228,7 +214,7 @@ export function Layout({ children }: LayoutProps) {
<button onClick={() => setSidebarOpen(true)} className="p-2 -ml-2 rounded-lg hover:bg-gray-100">
<Menu className="w-5 h-5 text-gray-600" />
</button>
<Link to="/dashboard" className="flex items-center gap-2 hover:opacity-80 transition-opacity">
<div className="flex items-center gap-2">
<div className="w-6 h-6 bg-emerald-600 rounded flex items-center justify-center">
<svg viewBox="0 0 24 24" className="w-4 h-4 text-white" fill="currentColor">
<path d="M12 2C8.5 2 5.5 3.5 3.5 6L12 12L20.5 6C18.5 3.5 15.5 2 12 2Z" />
@@ -236,7 +222,7 @@ export function Layout({ children }: LayoutProps) {
</svg>
</div>
<span className="font-semibold text-gray-900">CannaIQ</span>
</Link>
</div>
</div>
{/* Page content */}

View File

@@ -1,138 +0,0 @@
import { useState, useEffect, useRef } from 'react';
import { api } from '../lib/api';
import { Shield, X, Loader2 } from 'lucide-react';
interface PasswordConfirmModalProps {
isOpen: boolean;
onClose: () => void;
onConfirm: () => void;
title: string;
description: string;
}
export function PasswordConfirmModal({
isOpen,
onClose,
onConfirm,
title,
description,
}: PasswordConfirmModalProps) {
const [password, setPassword] = useState('');
const [error, setError] = useState('');
const [loading, setLoading] = useState(false);
const inputRef = useRef<HTMLInputElement>(null);
useEffect(() => {
if (isOpen) {
setPassword('');
setError('');
// Focus the input when modal opens
setTimeout(() => inputRef.current?.focus(), 100);
}
}, [isOpen]);
const handleSubmit = async (e: React.FormEvent) => {
e.preventDefault();
if (!password.trim()) {
setError('Password is required');
return;
}
setLoading(true);
setError('');
try {
const result = await api.verifyPassword(password);
if (result.verified) {
onConfirm();
onClose();
} else {
setError('Invalid password');
}
} catch (err: any) {
setError(err.message || 'Verification failed');
} finally {
setLoading(false);
}
};
if (!isOpen) return null;
return (
<div className="fixed inset-0 z-50 flex items-center justify-center">
{/* Backdrop */}
<div
className="absolute inset-0 bg-black bg-opacity-50"
onClick={onClose}
/>
{/* Modal */}
<div className="relative bg-white rounded-lg shadow-xl max-w-md w-full mx-4">
{/* Header */}
<div className="flex items-center justify-between px-6 py-4 border-b border-gray-200">
<div className="flex items-center gap-3">
<div className="p-2 bg-amber-100 rounded-lg">
<Shield className="w-5 h-5 text-amber-600" />
</div>
<h3 className="text-lg font-semibold text-gray-900">{title}</h3>
</div>
<button
onClick={onClose}
className="p-1 hover:bg-gray-100 rounded-lg transition-colors"
>
<X className="w-5 h-5 text-gray-500" />
</button>
</div>
{/* Body */}
<form onSubmit={handleSubmit}>
<div className="px-6 py-4">
<p className="text-gray-600 mb-4">{description}</p>
<div className="space-y-2">
<label
htmlFor="password"
className="block text-sm font-medium text-gray-700"
>
Enter your password to continue
</label>
<input
ref={inputRef}
type="password"
id="password"
value={password}
onChange={(e) => setPassword(e.target.value)}
className="w-full px-4 py-2 border border-gray-300 rounded-lg focus:ring-2 focus:ring-emerald-500 focus:border-emerald-500"
placeholder="Password"
disabled={loading}
/>
{error && (
<p className="text-sm text-red-600">{error}</p>
)}
</div>
</div>
{/* Footer */}
<div className="flex justify-end gap-3 px-6 py-4 border-t border-gray-200 bg-gray-50 rounded-b-lg">
<button
type="button"
onClick={onClose}
disabled={loading}
className="px-4 py-2 text-gray-700 hover:bg-gray-100 rounded-lg transition-colors"
>
Cancel
</button>
<button
type="submit"
disabled={loading}
className="px-4 py-2 bg-emerald-600 text-white rounded-lg hover:bg-emerald-700 transition-colors disabled:opacity-50 flex items-center gap-2"
>
{loading && <Loader2 className="w-4 h-4 animate-spin" />}
Confirm
</button>
</div>
</form>
</div>
</div>
);
}

View File

@@ -84,13 +84,6 @@ class ApiClient {
});
}
async verifyPassword(password: string) {
return this.request<{ verified: boolean; error?: string }>('/api/auth/verify-password', {
method: 'POST',
body: JSON.stringify({ password }),
});
}
async getMe() {
return this.request<{ user: any }>('/api/auth/me');
}
@@ -2957,25 +2950,6 @@ class ApiClient {
{ method: 'POST' }
);
}
// K8s Worker Control
async getK8sWorkers() {
return this.request<{
success: boolean;
available: boolean;
replicas: number;
readyReplicas: number;
availableReplicas?: number;
error?: string;
}>('/api/k8s/workers');
}
async scaleK8sWorkers(replicas: number) {
return this.request<{ success: boolean; replicas: number; message?: string; error?: string }>(
'/api/k8s/workers/scale',
{ method: 'POST', body: JSON.stringify({ replicas }) }
);
}
}
export const api = new ApiClient(API_URL);

File diff suppressed because it is too large Load Diff

View File

@@ -275,7 +275,7 @@ export default function NationalDashboard() {
<>
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4">
<MetricCard
title="Regions (US + CA)"
title="States"
value={summary.activeStates}
icon={Globe}
/>

View File

@@ -12,15 +12,11 @@ import {
Search,
ChevronDown,
ChevronUp,
ChevronLeft,
ChevronRight,
Gauge,
Users,
Power,
Play,
Square,
Plus,
X,
Calendar,
Trash2,
} from 'lucide-react';
interface Task {
@@ -70,313 +66,6 @@ interface TaskCounts {
stale: number;
}
interface Store {
id: number;
name: string;
state_code: string;
crawl_enabled: boolean;
}
interface CreateTaskModalProps {
isOpen: boolean;
onClose: () => void;
onTaskCreated: () => void;
}
const TASK_ROLES = [
{ id: 'product_refresh', name: 'Product Resync', description: 'Re-crawl products for price/stock changes' },
{ id: 'product_discovery', name: 'Product Discovery', description: 'Initial crawl for new dispensaries' },
{ id: 'store_discovery', name: 'Store Discovery', description: 'Discover new dispensary locations' },
{ id: 'entry_point_discovery', name: 'Entry Point Discovery', description: 'Resolve platform IDs from menu URLs' },
{ id: 'analytics_refresh', name: 'Analytics Refresh', description: 'Refresh materialized views' },
];
function CreateTaskModal({ isOpen, onClose, onTaskCreated }: CreateTaskModalProps) {
const [role, setRole] = useState('product_refresh');
const [priority, setPriority] = useState(10);
const [scheduleType, setScheduleType] = useState<'now' | 'scheduled'>('now');
const [scheduledFor, setScheduledFor] = useState('');
const [stores, setStores] = useState<Store[]>([]);
const [storeSearch, setStoreSearch] = useState('');
const [selectedStores, setSelectedStores] = useState<Store[]>([]);
const [loading, setLoading] = useState(false);
const [storesLoading, setStoresLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
useEffect(() => {
if (isOpen) {
fetchStores();
}
}, [isOpen]);
const fetchStores = async () => {
setStoresLoading(true);
try {
const res = await api.get('/api/stores?limit=500');
setStores(res.data.stores || res.data || []);
} catch (err) {
console.error('Failed to fetch stores:', err);
} finally {
setStoresLoading(false);
}
};
const filteredStores = stores.filter(s =>
s.name.toLowerCase().includes(storeSearch.toLowerCase()) ||
s.state_code?.toLowerCase().includes(storeSearch.toLowerCase())
);
const toggleStore = (store: Store) => {
if (selectedStores.find(s => s.id === store.id)) {
setSelectedStores(selectedStores.filter(s => s.id !== store.id));
} else {
setSelectedStores([...selectedStores, store]);
}
};
const selectAll = () => setSelectedStores(filteredStores);
const clearAll = () => setSelectedStores([]);
const handleSubmit = async () => {
setLoading(true);
setError(null);
try {
const scheduledDate = scheduleType === 'scheduled' && scheduledFor
? new Date(scheduledFor).toISOString()
: undefined;
if (role === 'store_discovery' || role === 'analytics_refresh') {
await api.post('/api/tasks', {
role,
priority,
scheduled_for: scheduledDate,
platform: 'dutchie',
});
} else if (selectedStores.length === 0) {
setError('Please select at least one store');
setLoading(false);
return;
} else {
for (const store of selectedStores) {
await api.post('/api/tasks', {
role,
dispensary_id: store.id,
priority,
scheduled_for: scheduledDate,
platform: 'dutchie',
});
}
}
onTaskCreated();
onClose();
setSelectedStores([]);
setPriority(10);
setScheduleType('now');
setScheduledFor('');
} catch (err: any) {
setError(err.response?.data?.error || err.message || 'Failed to create task');
} finally {
setLoading(false);
}
};
if (!isOpen) return null;
const needsStore = role !== 'store_discovery' && role !== 'analytics_refresh';
return (
<div className="fixed inset-0 z-50 overflow-y-auto">
<div className="flex min-h-full items-center justify-center p-4">
<div className="fixed inset-0 bg-black/50" onClick={onClose} />
<div className="relative bg-white rounded-xl shadow-xl max-w-2xl w-full max-h-[90vh] overflow-hidden">
<div className="px-6 py-4 border-b border-gray-200 flex items-center justify-between">
<h2 className="text-lg font-semibold text-gray-900">Create New Task</h2>
<button onClick={onClose} className="p-1 hover:bg-gray-100 rounded">
<X className="w-5 h-5 text-gray-500" />
</button>
</div>
<div className="px-6 py-4 space-y-6 overflow-y-auto max-h-[calc(90vh-140px)]">
{error && (
<div className="bg-red-50 border border-red-200 rounded-lg p-3 text-red-700 text-sm">
{error}
</div>
)}
<div>
<label className="block text-sm font-medium text-gray-700 mb-2">Task Role</label>
<div className="grid grid-cols-1 gap-2">
{TASK_ROLES.map(r => (
<button
key={r.id}
onClick={() => setRole(r.id)}
className={`flex items-start gap-3 p-3 rounded-lg border text-left transition-colors ${
role === r.id
? 'border-emerald-500 bg-emerald-50'
: 'border-gray-200 hover:border-gray-300'
}`}
>
<div className={`w-4 h-4 rounded-full border-2 mt-0.5 flex-shrink-0 ${
role === r.id ? 'border-emerald-500 bg-emerald-500' : 'border-gray-300'
}`}>
{role === r.id && (
<div className="w-full h-full flex items-center justify-center">
<div className="w-1.5 h-1.5 bg-white rounded-full" />
</div>
)}
</div>
<div>
<p className="font-medium text-gray-900">{r.name}</p>
<p className="text-xs text-gray-500">{r.description}</p>
</div>
</button>
))}
</div>
</div>
{needsStore && (
<div>
<label className="block text-sm font-medium text-gray-700 mb-2">
Select Stores ({selectedStores.length} selected)
</label>
<div className="border border-gray-200 rounded-lg overflow-hidden">
<div className="p-2 border-b border-gray-200 bg-gray-50">
<div className="relative">
<Search className="absolute left-3 top-1/2 -translate-y-1/2 w-4 h-4 text-gray-400" />
<input
type="text"
value={storeSearch}
onChange={(e) => setStoreSearch(e.target.value)}
placeholder="Search stores..."
className="w-full pl-9 pr-3 py-2 text-sm border border-gray-200 rounded"
/>
</div>
<div className="flex gap-2 mt-2">
<button onClick={selectAll} className="text-xs text-emerald-600 hover:underline">
Select all ({filteredStores.length})
</button>
<span className="text-gray-300">|</span>
<button onClick={clearAll} className="text-xs text-gray-500 hover:underline">
Clear
</button>
</div>
</div>
<div className="max-h-48 overflow-y-auto">
{storesLoading ? (
<div className="p-4 text-center text-gray-500">
<RefreshCw className="w-5 h-5 animate-spin mx-auto mb-1" />
Loading stores...
</div>
) : filteredStores.length === 0 ? (
<div className="p-4 text-center text-gray-500">No stores found</div>
) : (
filteredStores.map(store => (
<label key={store.id} className="flex items-center gap-3 px-3 py-2 hover:bg-gray-50 cursor-pointer">
<input
type="checkbox"
checked={!!selectedStores.find(s => s.id === store.id)}
onChange={() => toggleStore(store)}
className="w-4 h-4 text-emerald-600 rounded"
/>
<div className="flex-1 min-w-0">
<p className="text-sm text-gray-900 truncate">{store.name}</p>
<p className="text-xs text-gray-500">{store.state_code}</p>
</div>
{!store.crawl_enabled && (
<span className="text-xs text-orange-600 bg-orange-50 px-1.5 py-0.5 rounded">disabled</span>
)}
</label>
))
)}
</div>
</div>
</div>
)}
<div>
<label className="block text-sm font-medium text-gray-700 mb-2">Priority: {priority}</label>
<input
type="range"
min="0"
max="100"
value={priority}
onChange={(e) => setPriority(parseInt(e.target.value))}
className="w-full h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer"
/>
<div className="flex justify-between text-xs text-gray-500 mt-1">
<span>0 (Low)</span>
<span>10 (Normal)</span>
<span>50 (High)</span>
<span>100 (Urgent)</span>
</div>
</div>
<div>
<label className="block text-sm font-medium text-gray-700 mb-2">Schedule</label>
<div className="flex gap-4">
<label className="flex items-center gap-2 cursor-pointer">
<input
type="radio"
name="schedule"
checked={scheduleType === 'now'}
onChange={() => setScheduleType('now')}
className="w-4 h-4 text-emerald-600"
/>
<span className="text-sm text-gray-700">Run immediately</span>
</label>
<label className="flex items-center gap-2 cursor-pointer">
<input
type="radio"
name="schedule"
checked={scheduleType === 'scheduled'}
onChange={() => setScheduleType('scheduled')}
className="w-4 h-4 text-emerald-600"
/>
<span className="text-sm text-gray-700">Schedule for later</span>
</label>
</div>
{scheduleType === 'scheduled' && (
<div className="mt-3 relative">
<Calendar className="absolute left-3 top-1/2 -translate-y-1/2 w-4 h-4 text-gray-400" />
<input
type="datetime-local"
value={scheduledFor}
onChange={(e) => setScheduledFor(e.target.value)}
className="w-full pl-9 pr-3 py-2 text-sm border border-gray-200 rounded"
/>
</div>
)}
</div>
</div>
<div className="px-6 py-4 border-t border-gray-200 bg-gray-50 flex items-center justify-between">
<div className="text-sm text-gray-500">
{needsStore ? (
selectedStores.length > 0 ? `Will create ${selectedStores.length} task${selectedStores.length > 1 ? 's' : ''}` : 'Select stores to create tasks'
) : 'Will create 1 task'}
</div>
<div className="flex gap-3">
<button onClick={onClose} className="px-4 py-2 text-sm text-gray-700 hover:bg-gray-100 rounded-lg">
Cancel
</button>
<button
onClick={handleSubmit}
disabled={loading || (needsStore && selectedStores.length === 0)}
className="px-4 py-2 text-sm bg-emerald-600 text-white rounded-lg hover:bg-emerald-700 disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-2"
>
{loading && <RefreshCw className="w-4 h-4 animate-spin" />}
Create Task{selectedStores.length > 1 ? 's' : ''}
</button>
</div>
</div>
</div>
</div>
</div>
);
}
const ROLES = [
'store_discovery',
'entry_point_discovery',
@@ -450,11 +139,7 @@ export default function TasksDashboard() {
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
const [poolPaused, setPoolPaused] = useState(false);
const [showCreateModal, setShowCreateModal] = useState(false);
// Pagination
const [page, setPage] = useState(0);
const tasksPerPage = 25;
const [poolLoading, setPoolLoading] = useState(false);
// Filters
const [roleFilter, setRoleFilter] = useState<string>('');
@@ -488,14 +173,20 @@ export default function TasksDashboard() {
}
};
const handleDeleteTask = async (taskId: number) => {
if (!confirm('Delete this task?')) return;
const togglePool = async () => {
setPoolLoading(true);
try {
await api.delete(`/api/tasks/${taskId}`);
fetchData();
if (poolPaused) {
await api.resumeTaskPool();
setPoolPaused(false);
} else {
await api.pauseTaskPool();
setPoolPaused(true);
}
} catch (err: any) {
console.error('Delete error:', err);
alert(err.response?.data?.error || 'Failed to delete task');
setError(err.message || 'Failed to toggle pool');
} finally {
setPoolLoading(false);
}
};
@@ -518,10 +209,6 @@ export default function TasksDashboard() {
return true;
});
// Pagination
const paginatedTasks = filteredTasks.slice(page * tasksPerPage, (page + 1) * tasksPerPage);
const totalPages = Math.ceil(filteredTasks.length / tasksPerPage);
const totalActive = (counts?.claimed || 0) + (counts?.running || 0);
const totalPending = counts?.pending || 0;
@@ -538,37 +225,42 @@ export default function TasksDashboard() {
return (
<Layout>
<div className="space-y-6">
{/* Sticky Header */}
<div className="sticky top-0 z-10 bg-white pb-4 -mx-6 px-6 pt-2 border-b border-gray-200 shadow-sm">
<div className="flex flex-col sm:flex-row sm:items-center sm:justify-between gap-4">
<div>
<h1 className="text-2xl font-bold text-gray-900 flex items-center gap-2">
<ListChecks className="w-7 h-7 text-emerald-600" />
Task Queue
</h1>
<p className="text-gray-500 mt-1">
{totalActive} active, {totalPending} pending tasks
</p>
</div>
{/* Header */}
<div className="flex flex-col sm:flex-row sm:items-center sm:justify-between gap-4">
<div>
<h1 className="text-2xl font-bold text-gray-900 flex items-center gap-2">
<ListChecks className="w-7 h-7 text-emerald-600" />
Task Queue
</h1>
<p className="text-gray-500 mt-1">
{totalActive} active, {totalPending} pending tasks
</p>
</div>
<div className="flex items-center gap-4">
{/* Create Task Button */}
<button
onClick={() => setShowCreateModal(true)}
className="flex items-center gap-2 px-4 py-2 bg-emerald-600 text-white rounded-lg hover:bg-emerald-700 transition-colors"
>
<Plus className="w-4 h-4" />
Create Task
</button>
{/* Pool status indicator */}
{poolPaused && (
<span className="inline-flex items-center gap-1.5 px-3 py-1.5 rounded-full text-sm font-medium bg-yellow-100 text-yellow-800">
<Square className="w-4 h-4" />
Pool Paused
</span>
<div className="flex items-center gap-4">
{/* Pool Toggle */}
<button
onClick={togglePool}
disabled={poolLoading}
className={`flex items-center gap-2 px-4 py-2 rounded-lg font-medium transition-colors ${
poolPaused
? 'bg-emerald-100 text-emerald-700 hover:bg-emerald-200'
: 'bg-red-100 text-red-700 hover:bg-red-200'
}`}
>
{poolPaused ? (
<>
<Play className={`w-5 h-5 ${poolLoading ? 'animate-pulse' : ''}`} />
Start Pool
</>
) : (
<>
<Square className={`w-5 h-5 ${poolLoading ? 'animate-pulse' : ''}`} />
Stop Pool
</>
)}
<span className="text-sm text-gray-400">Auto-refreshes every 15s</span>
</div>
</button>
<span className="text-sm text-gray-400">Auto-refreshes every 15s</span>
</div>
</div>
@@ -576,13 +268,6 @@ export default function TasksDashboard() {
<div className="p-4 bg-red-50 text-red-700 rounded-lg">{error}</div>
)}
{/* Create Task Modal */}
<CreateTaskModal
isOpen={showCreateModal}
onClose={() => setShowCreateModal(false)}
onTaskCreated={fetchData}
/>
{/* Status Summary Cards */}
<div className="grid grid-cols-2 sm:grid-cols-3 lg:grid-cols-6 gap-4">
{Object.entries(counts || {}).map(([status, count]) => (
@@ -785,19 +470,17 @@ export default function TasksDashboard() {
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">
Error
</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase w-16">
</th>
</tr>
</thead>
<tbody className="divide-y divide-gray-200">
{paginatedTasks.length === 0 ? (
{filteredTasks.length === 0 ? (
<tr>
<td colSpan={9} className="px-4 py-8 text-center text-gray-500">
<td colSpan={8} className="px-4 py-8 text-center text-gray-500">
No tasks found
</td>
</tr>
) : (
paginatedTasks.map((task) => (
filteredTasks.map((task) => (
<tr key={task.id} className="hover:bg-gray-50">
<td className="px-4 py-3 text-sm font-mono text-gray-600">#{task.id}</td>
<td className="px-4 py-3 text-sm text-gray-900">
@@ -828,47 +511,12 @@ export default function TasksDashboard() {
<td className="px-4 py-3 text-sm text-red-600 max-w-xs truncate">
{task.error_message || '-'}
</td>
<td className="px-4 py-3">
{(task.status === 'failed' || task.status === 'completed' || task.status === 'pending') && (
<button
onClick={() => handleDeleteTask(task.id)}
className="p-1 text-gray-400 hover:text-red-500 hover:bg-red-50 rounded transition-colors"
title="Delete task"
>
<Trash2 className="w-4 h-4" />
</button>
)}
</td>
</tr>
))
)}
</tbody>
</table>
</div>
{/* Pagination */}
<div className="px-4 py-3 border-t border-gray-200 bg-gray-50 flex items-center justify-between">
<div className="text-sm text-gray-500">
Showing {page * tasksPerPage + 1} - {Math.min((page + 1) * tasksPerPage, filteredTasks.length)} of {filteredTasks.length} tasks
</div>
<div className="flex items-center gap-2">
<button
onClick={() => setPage(p => Math.max(0, p - 1))}
disabled={page === 0}
className="px-3 py-1 text-sm border border-gray-200 rounded hover:bg-gray-100 disabled:opacity-50 disabled:cursor-not-allowed"
>
<ChevronLeft className="w-4 h-4" />
</button>
<span className="text-sm text-gray-600">Page {page + 1} of {totalPages || 1}</span>
<button
onClick={() => setPage(p => p + 1)}
disabled={page >= totalPages - 1}
className="px-3 py-1 text-sm border border-gray-200 rounded hover:bg-gray-100 disabled:opacity-50 disabled:cursor-not-allowed"
>
<ChevronRight className="w-4 h-4" />
</button>
</div>
</div>
</div>
</div>
</Layout>

View File

@@ -18,11 +18,9 @@ import {
Server,
MapPin,
Trash2,
PowerOff,
Undo2,
Plus,
MemoryStick,
AlertTriangle,
Minus,
Loader2,
} from 'lucide-react';
// Worker from registry
@@ -41,36 +39,16 @@ interface Worker {
tasks_completed: number;
tasks_failed: number;
current_task_id: number | null;
current_task_ids?: number[]; // Multiple concurrent tasks
active_task_count?: number;
max_concurrent_tasks?: number;
health_status: string;
seconds_since_heartbeat: number;
decommission_requested?: boolean;
decommission_reason?: string;
// Dual-transport preflight status
preflight_curl_status?: 'pending' | 'passed' | 'failed' | 'skipped';
preflight_http_status?: 'pending' | 'passed' | 'failed' | 'skipped';
preflight_curl_at?: string;
preflight_http_at?: string;
preflight_curl_error?: string;
preflight_http_error?: string;
preflight_curl_ms?: number;
preflight_http_ms?: number;
can_curl?: boolean;
can_http?: boolean;
metadata: {
cpu?: number;
memory?: number;
memoryTotal?: number;
memory_mb?: number;
memory_total_mb?: number;
memory_percent?: number; // NEW: memory as percentage
cpu_user_ms?: number;
cpu_system_ms?: number;
cpu_percent?: number; // NEW: CPU percentage
is_backing_off?: boolean; // NEW: resource backoff state
backoff_reason?: string; // NEW: why backing off
proxy_location?: {
city?: string;
state?: string;
@@ -94,6 +72,14 @@ interface Task {
worker_id: string | null;
}
// K8s replica info (added 2024-12-10)
interface K8sReplicas {
current: number;
desired: number;
available: number;
updated: number;
}
function formatRelativeTime(dateStr: string | null): string {
if (!dateStr) return '-';
const date = new Date(dateStr);
@@ -234,320 +220,69 @@ function HealthBadge({ status, healthStatus }: { status: string; healthStatus: s
);
}
// Format CPU time for display
function formatCpuTime(ms: number): string {
if (ms < 1000) return `${ms}ms`;
if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
return `${(ms / 60000).toFixed(1)}m`;
}
// Resource usage badge showing memory%, CPU%, and backoff status
function ResourceBadge({ worker }: { worker: Worker }) {
const memPercent = worker.metadata?.memory_percent;
const cpuPercent = worker.metadata?.cpu_percent;
const isBackingOff = worker.metadata?.is_backing_off;
const backoffReason = worker.metadata?.backoff_reason;
if (isBackingOff) {
return (
<div className="flex items-center gap-1.5" title={backoffReason || 'Backing off due to resource pressure'}>
<AlertTriangle className="w-4 h-4 text-amber-500 animate-pulse" />
<span className="text-xs text-amber-600 font-medium">Backing off</span>
</div>
);
}
// No data yet
if (memPercent === undefined && cpuPercent === undefined) {
return <span className="text-gray-400 text-xs">-</span>;
}
// Color based on usage level
const getColor = (pct: number) => {
if (pct >= 90) return 'text-red-600';
if (pct >= 75) return 'text-amber-600';
if (pct >= 50) return 'text-yellow-600';
return 'text-emerald-600';
};
return (
<div className="flex flex-col gap-0.5 text-xs">
{memPercent !== undefined && (
<div className="flex items-center gap-1" title={`Memory: ${worker.metadata?.memory_mb || 0}MB / ${worker.metadata?.memory_total_mb || 0}MB`}>
<MemoryStick className={`w-3 h-3 ${getColor(memPercent)}`} />
<span className={getColor(memPercent)}>{memPercent}%</span>
</div>
)}
{cpuPercent !== undefined && (
<div className="flex items-center gap-1">
<Cpu className={`w-3 h-3 ${getColor(cpuPercent)}`} />
<span className={getColor(cpuPercent)}>{cpuPercent}%</span>
</div>
)}
</div>
);
}
// Transport capability badge showing curl/http preflight status
function TransportBadge({ worker }: { worker: Worker }) {
const curlStatus = worker.preflight_curl_status || 'pending';
const httpStatus = worker.preflight_http_status || 'pending';
const getStatusConfig = (status: string, label: string, ms?: number, error?: string) => {
switch (status) {
case 'passed':
return {
bg: 'bg-emerald-100',
text: 'text-emerald-700',
icon: <CheckCircle className="w-3 h-3" />,
tooltip: ms ? `${label}: Passed (${ms}ms)` : `${label}: Passed`,
};
case 'failed':
return {
bg: 'bg-red-100',
text: 'text-red-700',
icon: <XCircle className="w-3 h-3" />,
tooltip: error ? `${label}: Failed - ${error}` : `${label}: Failed`,
};
case 'skipped':
return {
bg: 'bg-gray-100',
text: 'text-gray-500',
icon: <Clock className="w-3 h-3" />,
tooltip: `${label}: Skipped`,
};
default:
return {
bg: 'bg-yellow-100',
text: 'text-yellow-700',
icon: <Clock className="w-3 h-3 animate-pulse" />,
tooltip: `${label}: Pending`,
};
}
};
const curlConfig = getStatusConfig(curlStatus, 'CURL', worker.preflight_curl_ms, worker.preflight_curl_error);
const httpConfig = getStatusConfig(httpStatus, 'HTTP', worker.preflight_http_ms, worker.preflight_http_error);
return (
<div className="flex flex-col gap-1">
<div
className={`inline-flex items-center gap-1 px-1.5 py-0.5 rounded text-xs font-medium ${curlConfig.bg} ${curlConfig.text}`}
title={curlConfig.tooltip}
>
{curlConfig.icon}
<span>curl</span>
</div>
<div
className={`inline-flex items-center gap-1 px-1.5 py-0.5 rounded text-xs font-medium ${httpConfig.bg} ${httpConfig.text}`}
title={httpConfig.tooltip}
>
{httpConfig.icon}
<span>http</span>
</div>
</div>
);
}
// Task count badge showing active/max concurrent tasks
function TaskCountBadge({ worker, tasks }: { worker: Worker; tasks: Task[] }) {
const activeCount = worker.active_task_count ?? (worker.current_task_id ? 1 : 0);
const maxCount = worker.max_concurrent_tasks ?? 1;
const taskIds = worker.current_task_ids ?? (worker.current_task_id ? [worker.current_task_id] : []);
if (activeCount === 0) {
return <span className="text-gray-400 text-sm">Idle</span>;
}
// Get task names for tooltip
const taskNames = taskIds.map(id => {
const task = tasks.find(t => t.id === id);
return task ? `#${id}: ${task.role}${task.dispensary_name ? ` (${task.dispensary_name})` : ''}` : `#${id}`;
}).join('\n');
return (
<div className="flex items-center gap-2" title={taskNames}>
<span className="text-sm font-medium text-blue-600">
{activeCount}/{maxCount} tasks
</span>
{taskIds.length === 1 && (
<span className="text-xs text-gray-500">#{taskIds[0]}</span>
)}
</div>
);
}
// Pod visualization - shows pod as hub with worker nodes radiating out
function PodVisualization({
podName,
workers,
isSelected = false,
onSelect
}: {
podName: string;
workers: Worker[];
isSelected?: boolean;
onSelect?: () => void;
}) {
const busyCount = workers.filter(w => w.current_task_id !== null).length;
const allBusy = busyCount === workers.length;
const allIdle = busyCount === 0;
// Aggregate resource stats for the pod
const totalMemoryMb = workers.reduce((sum, w) => sum + (w.metadata?.memory_mb || 0), 0);
const totalCpuUserMs = workers.reduce((sum, w) => sum + (w.metadata?.cpu_user_ms || 0), 0);
const totalCpuSystemMs = workers.reduce((sum, w) => sum + (w.metadata?.cpu_system_ms || 0), 0);
const totalCompleted = workers.reduce((sum, w) => sum + w.tasks_completed, 0);
const totalFailed = workers.reduce((sum, w) => sum + w.tasks_failed, 0);
// Pod color based on worker status
const podColor = allBusy ? 'bg-blue-500' : allIdle ? 'bg-emerald-500' : 'bg-yellow-500';
const podBorder = allBusy ? 'border-blue-400' : allIdle ? 'border-emerald-400' : 'border-yellow-400';
const podGlow = allBusy ? 'shadow-blue-200' : allIdle ? 'shadow-emerald-200' : 'shadow-yellow-200';
// Selection ring
const selectionRing = isSelected ? 'ring-4 ring-purple-400 ring-offset-2' : '';
// Build pod tooltip
const podTooltip = [
`Pod: ${podName}`,
`Workers: ${busyCount}/${workers.length} busy`,
`Memory: ${totalMemoryMb} MB (RSS)`,
`CPU: ${formatCpuTime(totalCpuUserMs)} user, ${formatCpuTime(totalCpuSystemMs)} system`,
`Tasks: ${totalCompleted} completed, ${totalFailed} failed`,
'Click to select',
].join('\n');
return (
<div className="flex flex-col items-center p-4">
{/* Pod hub */}
<div className="relative">
{/* Center pod circle */}
<div
className={`w-20 h-20 rounded-full ${podColor} border-4 ${podBorder} shadow-lg ${podGlow} ${selectionRing} flex items-center justify-center text-white font-bold text-xs text-center leading-tight z-10 relative cursor-pointer hover:scale-105 transition-all`}
title={podTooltip}
onClick={onSelect}
>
<span className="px-1">{podName}</span>
</div>
{/* Worker nodes radiating out */}
{workers.map((worker, index) => {
const angle = (index * 360) / workers.length - 90; // Start from top
const radians = (angle * Math.PI) / 180;
const radius = 55; // Distance from center
const x = Math.cos(radians) * radius;
const y = Math.sin(radians) * radius;
const isBusy = worker.current_task_id !== null;
const isDecommissioning = worker.decommission_requested;
const isBackingOff = worker.metadata?.is_backing_off;
// Color priority: decommissioning > backing off > busy > idle
const workerColor = isDecommissioning ? 'bg-orange-500' : isBackingOff ? 'bg-yellow-500' : isBusy ? 'bg-blue-500' : 'bg-emerald-500';
const workerBorder = isDecommissioning ? 'border-orange-300' : isBackingOff ? 'border-yellow-300' : isBusy ? 'border-blue-300' : 'border-emerald-300';
// Line from center to worker
const lineLength = radius - 10;
const lineX = Math.cos(radians) * (lineLength / 2 + 10);
const lineY = Math.sin(radians) * (lineLength / 2 + 10);
return (
<div key={worker.id}>
{/* Connection line */}
<div
className={`absolute w-0.5 ${isDecommissioning ? 'bg-orange-300' : isBackingOff ? 'bg-yellow-300' : isBusy ? 'bg-blue-300' : 'bg-emerald-300'}`}
style={{
height: `${lineLength}px`,
left: '50%',
top: '50%',
transform: `translate(-50%, -50%) translate(${lineX}px, ${lineY}px) rotate(${angle + 90}deg)`,
transformOrigin: 'center',
}}
/>
{/* Worker node */}
<div
className={`absolute w-6 h-6 rounded-full ${workerColor} border-2 ${workerBorder} flex items-center justify-center text-white text-xs font-bold cursor-pointer hover:scale-110 transition-transform`}
style={{
left: '50%',
top: '50%',
transform: `translate(-50%, -50%) translate(${x}px, ${y}px)`,
}}
title={`${worker.friendly_name}\nStatus: ${isDecommissioning ? 'Stopping after current task' : isBackingOff ? `Backing off: ${worker.metadata?.backoff_reason || 'resource pressure'}` : isBusy ? `Working on task #${worker.current_task_id}` : 'Ready - waiting for tasks'}\nMemory: ${worker.metadata?.memory_mb || 0} MB (${worker.metadata?.memory_percent || 0}%)\nCPU: ${formatCpuTime(worker.metadata?.cpu_user_ms || 0)} user, ${formatCpuTime(worker.metadata?.cpu_system_ms || 0)} sys\nCompleted: ${worker.tasks_completed} | Failed: ${worker.tasks_failed}\nLast heartbeat: ${new Date(worker.last_heartbeat_at).toLocaleTimeString()}`}
>
{index + 1}
</div>
</div>
);
})}
</div>
{/* Pod stats */}
<div className="mt-12 text-center">
<p className="text-xs text-gray-500">
{busyCount}/{workers.length} busy
</p>
{isSelected && (
<p className="text-xs text-purple-600 font-medium mt-1">Selected</p>
)}
</div>
</div>
);
}
// Group workers by pod
function groupWorkersByPod(workers: Worker[]): Map<string, Worker[]> {
const pods = new Map<string, Worker[]>();
for (const worker of workers) {
const podName = worker.pod_name || 'Unknown';
if (!pods.has(podName)) {
pods.set(podName, []);
}
pods.get(podName)!.push(worker);
}
return pods;
}
// Format estimated time remaining
function formatEstimatedTime(hours: number): string {
if (hours < 1) {
return `${Math.round(hours * 60)} minutes`;
}
if (hours < 24) {
return `${hours.toFixed(1)} hours`;
}
const days = hours / 24;
if (days < 7) {
return `${days.toFixed(1)} days`;
}
return `${(days / 7).toFixed(1)} weeks`;
}
export function WorkersDashboard() {
const [workers, setWorkers] = useState<Worker[]>([]);
const [tasks, setTasks] = useState<Task[]>([]);
const [pendingTaskCount, setPendingTaskCount] = useState<number>(0);
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
// Pod selection state
const [selectedPod, setSelectedPod] = useState<string | null>(null);
// K8s scaling state (added 2024-12-10)
const [k8sReplicas, setK8sReplicas] = useState<K8sReplicas | null>(null);
const [k8sError, setK8sError] = useState<string | null>(null);
const [scaling, setScaling] = useState(false);
const [targetReplicas, setTargetReplicas] = useState<number | null>(null);
// Pagination
const [page, setPage] = useState(0);
const workersPerPage = 15;
// Fetch K8s replica count (added 2024-12-10)
const fetchK8sReplicas = useCallback(async () => {
try {
const res = await api.get('/api/workers/k8s/replicas');
if (res.data.success && res.data.replicas) {
setK8sReplicas(res.data.replicas);
if (targetReplicas === null) {
setTargetReplicas(res.data.replicas.desired);
}
setK8sError(null);
}
} catch (err: any) {
// K8s not available (local dev or no RBAC)
setK8sError(err.response?.data?.error || 'K8s not available');
setK8sReplicas(null);
}
}, [targetReplicas]);
// Scale workers (added 2024-12-10)
const handleScale = useCallback(async (replicas: number) => {
if (replicas < 0 || replicas > 20) return;
setScaling(true);
try {
const res = await api.post('/api/workers/k8s/scale', { replicas });
if (res.data.success) {
setTargetReplicas(replicas);
// Refresh after a short delay to see the change
setTimeout(fetchK8sReplicas, 1000);
}
} catch (err: any) {
console.error('Scale error:', err);
setK8sError(err.response?.data?.error || 'Failed to scale');
} finally {
setScaling(false);
}
}, [fetchK8sReplicas]);
const fetchData = useCallback(async () => {
try {
// Fetch workers from registry, running tasks, and task counts
const [workersRes, tasksRes, countsRes] = await Promise.all([
api.get('/api/worker-registry/workers'),
api.get('/api/tasks?status=running&limit=100'),
api.get('/api/tasks/counts'),
]);
// Fetch workers from registry
const workersRes = await api.get('/api/worker-registry/workers');
// Fetch running tasks to get current task details
const tasksRes = await api.get('/api/tasks?status=running&limit=100');
setWorkers(workersRes.data.workers || []);
setTasks(tasksRes.data.tasks || []);
setPendingTaskCount(countsRes.data?.pending || 0);
setError(null);
} catch (err: any) {
console.error('Fetch error:', err);
@@ -568,51 +303,16 @@ export function WorkersDashboard() {
}
};
// Decommission a worker (graceful shutdown after current task)
const handleDecommissionWorker = async (workerId: string, friendlyName: string) => {
if (!confirm(`Decommission ${friendlyName}? Worker will stop after completing its current task.`)) return;
try {
const res = await api.post(`/api/worker-registry/workers/${workerId}/decommission`, {
reason: 'Manual decommission from admin UI'
});
if (res.data.success) {
fetchData();
}
} catch (err: any) {
console.error('Decommission error:', err);
alert(err.response?.data?.error || 'Failed to decommission worker');
}
};
// Cancel decommission
const handleCancelDecommission = async (workerId: string) => {
try {
await api.post(`/api/worker-registry/workers/${workerId}/cancel-decommission`);
fetchData();
} catch (err: any) {
console.error('Cancel decommission error:', err);
}
};
// Add a worker by scaling up the K8s deployment
const handleAddWorker = async () => {
try {
const res = await api.post('/api/workers/k8s/scale-up');
if (res.data.success) {
// Refresh after a short delay to see the new worker
setTimeout(fetchData, 2000);
}
} catch (err: any) {
console.error('Add worker error:', err);
alert(err.response?.data?.error || 'Failed to add worker. K8s scaling may not be available.');
}
};
useEffect(() => {
fetchData();
fetchK8sReplicas(); // Added 2024-12-10
const interval = setInterval(fetchData, 5000);
return () => clearInterval(interval);
}, [fetchData]);
const k8sInterval = setInterval(fetchK8sReplicas, 10000); // K8s refresh every 10s
return () => {
clearInterval(interval);
clearInterval(k8sInterval);
};
}, [fetchData, fetchK8sReplicas]);
// Paginated workers
const paginatedWorkers = workers.slice(
@@ -652,9 +352,15 @@ export function WorkersDashboard() {
<h1 className="text-2xl font-bold text-gray-900">Workers</h1>
<p className="text-gray-500 mt-1">
{workers.length} registered workers ({busyWorkers.length} busy, {idleWorkers.length} idle)
<span className="text-xs text-gray-400 ml-2">(auto-refresh 5s)</span>
</p>
</div>
<button
onClick={() => fetchData()}
className="flex items-center gap-2 px-4 py-2 bg-emerald-600 text-white rounded-lg hover:bg-emerald-700 transition-colors"
>
<RefreshCw className="w-4 h-4" />
Refresh
</button>
</div>
{error && (
@@ -663,6 +369,68 @@ export function WorkersDashboard() {
</div>
)}
{/* K8s Scaling Card (added 2024-12-10) */}
{k8sReplicas && (
<div className="bg-white rounded-lg border border-gray-200 p-4">
<div className="flex items-center justify-between">
<div className="flex items-center gap-3">
<div className="w-10 h-10 bg-purple-100 rounded-lg flex items-center justify-center">
<Server className="w-5 h-5 text-purple-600" />
</div>
<div>
<p className="text-sm text-gray-500">K8s Worker Pods</p>
<p className="text-xl font-semibold">
{k8sReplicas.current} / {k8sReplicas.desired}
{k8sReplicas.current !== k8sReplicas.desired && (
<span className="text-sm font-normal text-yellow-600 ml-2">scaling...</span>
)}
</p>
</div>
</div>
<div className="flex items-center gap-2">
<button
onClick={() => handleScale((targetReplicas || k8sReplicas.desired) - 1)}
disabled={scaling || (targetReplicas || k8sReplicas.desired) <= 0}
className="w-8 h-8 flex items-center justify-center bg-gray-100 text-gray-700 rounded-lg hover:bg-gray-200 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
title="Scale down"
>
<Minus className="w-4 h-4" />
</button>
<input
type="number"
min="0"
max="20"
value={targetReplicas ?? k8sReplicas.desired}
onChange={(e) => setTargetReplicas(Math.max(0, Math.min(20, parseInt(e.target.value) || 0)))}
onBlur={() => {
if (targetReplicas !== null && targetReplicas !== k8sReplicas.desired) {
handleScale(targetReplicas);
}
}}
onKeyDown={(e) => {
if (e.key === 'Enter' && targetReplicas !== null && targetReplicas !== k8sReplicas.desired) {
handleScale(targetReplicas);
}
}}
className="w-16 text-center border border-gray-300 rounded-lg px-2 py-1 text-lg font-semibold"
/>
<button
onClick={() => handleScale((targetReplicas || k8sReplicas.desired) + 1)}
disabled={scaling || (targetReplicas || k8sReplicas.desired) >= 20}
className="w-8 h-8 flex items-center justify-center bg-gray-100 text-gray-700 rounded-lg hover:bg-gray-200 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
title="Scale up"
>
<Plus className="w-4 h-4" />
</button>
{scaling && <Loader2 className="w-4 h-4 text-purple-600 animate-spin ml-2" />}
</div>
</div>
{k8sError && (
<p className="text-xs text-red-500 mt-2">{k8sError}</p>
)}
</div>
)}
{/* Stats Cards */}
<div className="grid grid-cols-5 gap-4">
<div className="bg-white rounded-lg border border-gray-200 p-4">
@@ -722,197 +490,6 @@ export function WorkersDashboard() {
</div>
</div>
{/* Estimated Completion Time Card */}
{pendingTaskCount > 0 && activeWorkers.length > 0 && (() => {
// Calculate average task rate across all workers
const totalHoursUp = activeWorkers.reduce((sum, w) => {
if (!w.started_at) return sum;
const start = new Date(w.started_at);
const now = new Date();
return sum + (now.getTime() - start.getTime()) / (1000 * 60 * 60);
}, 0);
const totalTasksDone = totalCompleted + totalFailed;
const avgTasksPerHour = totalHoursUp > 0.1 ? totalTasksDone / totalHoursUp : 0;
const estimatedHours = avgTasksPerHour > 0 ? pendingTaskCount / avgTasksPerHour : null;
return (
<div className="bg-gradient-to-r from-amber-50 to-orange-50 rounded-lg border border-amber-200 p-4">
<div className="flex items-center justify-between">
<div className="flex items-center gap-3">
<div className="w-10 h-10 bg-amber-100 rounded-lg flex items-center justify-center">
<Clock className="w-5 h-5 text-amber-600" />
</div>
<div>
<p className="text-sm text-amber-700 font-medium">Estimated Time to Complete Queue</p>
<p className="text-2xl font-bold text-amber-900">
{estimatedHours !== null ? formatEstimatedTime(estimatedHours) : 'Calculating...'}
</p>
</div>
</div>
<div className="text-right text-sm text-amber-700">
<p><span className="font-semibold">{pendingTaskCount}</span> pending tasks</p>
<p><span className="font-semibold">{activeWorkers.length}</span> active workers</p>
{avgTasksPerHour > 0 && (
<p className="text-xs text-amber-600 mt-1">
~{avgTasksPerHour.toFixed(1)} tasks/hour
</p>
)}
</div>
</div>
</div>
);
})()}
{/* Worker Pods Visualization */}
<div className="bg-white rounded-lg border border-gray-200 overflow-hidden">
<div className="px-4 py-3 border-b border-gray-200 bg-gray-50">
<div className="flex items-center justify-between">
<div>
<h3 className="text-sm font-semibold text-gray-900 flex items-center gap-2">
<Zap className="w-4 h-4 text-emerald-500" />
Worker Pods ({Array.from(groupWorkersByPod(workers)).length} pods, {activeWorkers.length} workers)
</h3>
<p className="text-xs text-gray-500 mt-0.5">
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-emerald-500"></span> ready</span>
<span className="mx-2">|</span>
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-blue-500"></span> busy</span>
<span className="mx-2">|</span>
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-yellow-500"></span> backing off</span>
<span className="mx-2">|</span>
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-orange-500"></span> stopping</span>
</p>
</div>
<div className="text-sm text-gray-500">
{busyWorkers.length} busy, {activeWorkers.length - busyWorkers.length} idle
{selectedPod && (
<button
onClick={() => setSelectedPod(null)}
className="ml-3 text-xs text-purple-600 hover:text-purple-800 underline"
>
Clear selection
</button>
)}
</div>
</div>
</div>
{workers.length === 0 ? (
<div className="px-4 py-12 text-center text-gray-500">
<Users className="w-12 h-12 mx-auto mb-3 text-gray-300" />
<p className="font-medium">No worker pods running</p>
<p className="text-xs mt-1">Start pods to process tasks from the queue</p>
</div>
) : (
<div className="p-6">
<div className="flex flex-wrap justify-center gap-8">
{Array.from(groupWorkersByPod(workers)).map(([podName, podWorkers]) => (
<PodVisualization
key={podName}
podName={podName}
workers={podWorkers}
isSelected={selectedPod === podName}
onSelect={() => setSelectedPod(selectedPod === podName ? null : podName)}
/>
))}
</div>
{/* Selected Pod Control Panel */}
{selectedPod && (() => {
const podWorkers = groupWorkersByPod(workers).get(selectedPod) || [];
const busyInPod = podWorkers.filter(w => w.current_task_id !== null).length;
const idleInPod = podWorkers.filter(w => w.current_task_id === null && !w.decommission_requested).length;
const stoppingInPod = podWorkers.filter(w => w.decommission_requested).length;
return (
<div className="mt-6 border-t border-gray-200 pt-6">
<div className="bg-purple-50 rounded-lg border border-purple-200 p-4">
<div className="flex items-center justify-between mb-4">
<div className="flex items-center gap-3">
<div className="w-10 h-10 bg-purple-100 rounded-lg flex items-center justify-center">
<Server className="w-5 h-5 text-purple-600" />
</div>
<div>
<h4 className="font-semibold text-purple-900">{selectedPod}</h4>
<p className="text-xs text-purple-600">
{podWorkers.length} workers: {busyInPod} busy, {idleInPod} idle{stoppingInPod > 0 && `, ${stoppingInPod} stopping`}
</p>
</div>
</div>
</div>
{/* Worker list in selected pod */}
<div className="space-y-2">
{podWorkers.map((worker) => {
const isBusy = worker.current_task_id !== null;
const isDecommissioning = worker.decommission_requested;
return (
<div key={worker.id} className="flex items-center justify-between bg-white rounded-lg px-3 py-2 border border-purple-100">
<div className="flex items-center gap-3">
<div className={`w-8 h-8 rounded-full flex items-center justify-center text-white text-sm font-bold ${
isDecommissioning ? 'bg-orange-500' :
isBusy ? 'bg-blue-500' : 'bg-emerald-500'
}`}>
{worker.friendly_name?.charAt(0) || '?'}
</div>
<div>
<p className="text-sm font-medium text-gray-900">{worker.friendly_name}</p>
<p className="text-xs text-gray-500">
{isDecommissioning ? (
<span className="text-orange-600">Stopping after current task...</span>
) : isBusy ? (
<span className="text-blue-600">Working on task #{worker.current_task_id}</span>
) : (
<span className="text-emerald-600">Idle - ready for tasks</span>
)}
</p>
</div>
</div>
<div className="flex items-center gap-2">
{isDecommissioning ? (
<button
onClick={() => handleCancelDecommission(worker.worker_id)}
className="flex items-center gap-1.5 px-3 py-1.5 text-sm bg-white border border-gray-300 text-gray-700 rounded-lg hover:bg-gray-50 transition-colors"
title="Cancel decommission"
>
<Undo2 className="w-4 h-4" />
Cancel
</button>
) : (
<button
onClick={() => handleDecommissionWorker(worker.worker_id, worker.friendly_name)}
className="flex items-center gap-1.5 px-3 py-1.5 text-sm bg-orange-100 text-orange-700 rounded-lg hover:bg-orange-200 transition-colors"
title={isBusy ? 'Worker will stop after completing current task' : 'Remove idle worker'}
>
<PowerOff className="w-4 h-4" />
{isBusy ? 'Stop after task' : 'Remove'}
</button>
)}
</div>
</div>
);
})}
</div>
{/* Add Worker button */}
<div className="mt-4 pt-4 border-t border-purple-200">
<button
onClick={handleAddWorker}
className="flex items-center gap-1.5 px-3 py-2 text-sm bg-emerald-100 text-emerald-700 rounded-lg hover:bg-emerald-200 transition-colors"
>
<Plus className="w-4 h-4" />
Add Worker
</button>
</div>
</div>
</div>
);
})()}
</div>
)}
</div>
{/* Workers Table */}
<div className="bg-white rounded-lg border border-gray-200 overflow-hidden">
<div className="px-4 py-3 border-b border-gray-200 bg-gray-50 flex items-center justify-between">
@@ -955,11 +532,10 @@ export function WorkersDashboard() {
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Worker</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Role</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Status</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Transport</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Resources</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Tasks</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Exit Location</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Current Task</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Duration</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Throughput</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Utilization</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Heartbeat</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase"></th>
</tr>
@@ -974,29 +550,16 @@ export function WorkersDashboard() {
<tr key={worker.id} className="hover:bg-gray-50">
<td className="px-4 py-3">
<div className="flex items-center gap-3">
<div className={`w-10 h-10 rounded-full flex items-center justify-center text-white font-bold text-sm relative ${
worker.decommission_requested ? 'bg-orange-500' :
<div className={`w-10 h-10 rounded-full flex items-center justify-center text-white font-bold text-sm ${
worker.health_status === 'offline' ? 'bg-gray-400' :
worker.health_status === 'stale' ? 'bg-yellow-500' :
worker.health_status === 'busy' ? 'bg-blue-500' :
'bg-emerald-500'
}`}>
{worker.friendly_name?.charAt(0) || '?'}
{worker.decommission_requested && (
<div className="absolute -top-1 -right-1 w-4 h-4 bg-red-500 rounded-full flex items-center justify-center">
<PowerOff className="w-2.5 h-2.5 text-white" />
</div>
)}
</div>
<div>
<p className="font-medium text-gray-900 flex items-center gap-1.5">
{worker.friendly_name}
{worker.decommission_requested && (
<span className="text-xs text-orange-600 bg-orange-100 px-1.5 py-0.5 rounded" title={worker.decommission_reason || 'Pending decommission'}>
stopping
</span>
)}
</p>
<p className="font-medium text-gray-900">{worker.friendly_name}</p>
<p className="text-xs text-gray-400 font-mono">{worker.worker_id.slice(0, 20)}...</p>
</div>
</div>
@@ -1008,13 +571,45 @@ export function WorkersDashboard() {
<HealthBadge status={worker.status} healthStatus={worker.health_status} />
</td>
<td className="px-4 py-3">
<TransportBadge worker={worker} />
{(() => {
const loc = worker.metadata?.proxy_location;
if (!loc) {
return <span className="text-gray-400 text-sm">-</span>;
}
const parts = [loc.city, loc.state, loc.country].filter(Boolean);
if (parts.length === 0) {
return loc.isRotating ? (
<span className="text-xs text-purple-600 font-medium" title="Rotating proxy - exit location varies per request">
Rotating
</span>
) : (
<span className="text-gray-400 text-sm">Unknown</span>
);
}
return (
<div className="flex items-center gap-1.5" title={loc.timezone || ''}>
<MapPin className="w-3 h-3 text-gray-400" />
<span className="text-sm text-gray-700">
{parts.join(', ')}
</span>
{loc.isRotating && (
<span className="text-xs text-purple-500" title="Rotating proxy">*</span>
)}
</div>
);
})()}
</td>
<td className="px-4 py-3">
<ResourceBadge worker={worker} />
</td>
<td className="px-4 py-3">
<TaskCountBadge worker={worker} tasks={tasks} />
{worker.current_task_id ? (
<div>
<span className="text-sm text-gray-900">Task #{worker.current_task_id}</span>
{currentTask?.dispensary_name && (
<p className="text-xs text-gray-500">{currentTask.dispensary_name}</p>
)}
</div>
) : (
<span className="text-gray-400 text-sm">Idle</span>
)}
</td>
<td className="px-4 py-3">
{currentTask?.started_at ? (

View File

@@ -1,36 +0,0 @@
# RBAC configuration for scraper pod to control worker scaling
# Allows the scraper to read and scale the scraper-worker statefulset
apiVersion: v1
kind: ServiceAccount
metadata:
name: scraper-sa
namespace: dispensary-scraper
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: worker-scaler
namespace: dispensary-scraper
rules:
# Allow reading deployment and statefulset status
- apiGroups: ["apps"]
resources: ["deployments", "statefulsets"]
verbs: ["get", "list"]
# Allow scaling deployments and statefulsets
- apiGroups: ["apps"]
resources: ["deployments/scale", "statefulsets/scale"]
verbs: ["get", "patch", "update"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: scraper-worker-scaler
namespace: dispensary-scraper
subjects:
- kind: ServiceAccount
name: scraper-sa
namespace: dispensary-scraper
roleRef:
kind: Role
name: worker-scaler
apiGroup: rbac.authorization.k8s.io

View File

@@ -40,16 +40,12 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: API_BASE_URL
value: "http://scraper"
- name: NODE_OPTIONS
value: "--max-old-space-size=1500"
resources:
requests:
memory: "1Gi"
memory: "256Mi"
cpu: "100m"
limits:
memory: "2Gi"
memory: "512Mi"
cpu: "500m"
livenessProbe:
exec:

View File

@@ -25,7 +25,6 @@ spec:
labels:
app: scraper
spec:
serviceAccountName: scraper-sa
imagePullSecrets:
- name: regcred
containers:

View File

@@ -1,18 +0,0 @@
# Woodpecker Agent Docker Compose
# Path: /opt/woodpecker/docker-compose.yml
# Deploy: cd /opt/woodpecker && docker compose up -d
version: '3.8'
services:
woodpecker-agent:
image: woodpeckerci/woodpecker-agent:latest
container_name: woodpecker-agent
restart: always
volumes:
- /var/run/docker.sock:/var/run/docker.sock
environment:
- WOODPECKER_SERVER=localhost:9000
- WOODPECKER_AGENT_SECRET=${WOODPECKER_AGENT_SECRET}
- WOODPECKER_MAX_WORKFLOWS=5
- WOODPECKER_HEALTHCHECK=true
- WOODPECKER_LOG_LEVEL=info

View File

@@ -6,19 +6,6 @@ kind: Namespace
metadata:
name: woodpecker
---
# PVC for npm cache - shared across CI jobs
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: npm-cache
namespace: woodpecker
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 5Gi
---
apiVersion: v1
kind: Secret
metadata:
@@ -65,9 +52,6 @@ spec:
value: "woodpecker"
- name: WOODPECKER_BACKEND_K8S_VOLUME_SIZE
value: "10G"
# Allow CI steps to mount the npm-cache PVC
- name: WOODPECKER_BACKEND_K8S_VOLUMES
value: "npm-cache:/npm-cache"
resources:
limits:
memory: "512Mi"