Compare commits
66 Commits
feat/task-
...
fix/ci-and
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
50654be910 | ||
|
|
cdab71a1ee | ||
|
|
a35976b9e9 | ||
|
|
f2864bd2ad | ||
|
|
3f958fbff3 | ||
|
|
c84ef0396b | ||
|
|
6cd1f55119 | ||
|
|
e918234928 | ||
|
|
888a608485 | ||
|
|
b5c3b05246 | ||
|
|
fdce5e0302 | ||
|
|
4679b245de | ||
|
|
a837070f54 | ||
|
|
5a929e9803 | ||
|
|
52b0fad410 | ||
|
|
9944031eea | ||
|
|
2babaa7136 | ||
|
|
90567511dd | ||
|
|
beb16ad0cb | ||
|
|
fc7fc5ea85 | ||
|
|
ab8956b14b | ||
|
|
1d9c90641f | ||
|
|
6126b907f2 | ||
|
|
cc93d2d483 | ||
|
|
7642c17ec0 | ||
|
|
cb60dcf352 | ||
|
|
5ffe05d519 | ||
|
|
8e2f07c941 | ||
|
|
0b6e615075 | ||
|
|
be251c6fb3 | ||
|
|
efb1e89e33 | ||
|
|
529c447413 | ||
|
|
1eaf95c06b | ||
|
|
138ed17d8b | ||
|
|
a880c41d89 | ||
|
|
2a9ae61dce | ||
|
|
1f21911fa1 | ||
|
|
6f0a58f5d2 | ||
|
|
8206dce821 | ||
|
|
ced1afaa8a | ||
|
|
d6c602c567 | ||
|
|
a252a7fefd | ||
|
|
83b06c21cc | ||
|
|
f5214da54c | ||
|
|
e3d4dd0127 | ||
|
|
d0ee0d72f5 | ||
|
|
521f0550cd | ||
|
|
8a09691e91 | ||
|
|
459ad7d9c9 | ||
|
|
d102d27731 | ||
|
|
01810c40a1 | ||
|
|
b7d33e1cbf | ||
|
|
5b34b5a78c | ||
|
|
c091d2316b | ||
|
|
e8862b8a8b | ||
|
|
1b46ab699d | ||
|
|
ac1995f63f | ||
|
|
de93669652 | ||
|
|
dffc124920 | ||
|
|
932ceb0287 | ||
|
|
824d48fd85 | ||
|
|
47fdab0382 | ||
|
|
ed7ddc6375 | ||
|
|
cf06f4a8c0 | ||
|
|
61e915968f | ||
|
|
a4338669a9 |
@@ -1,6 +1,3 @@
|
||||
when:
|
||||
- event: [push, pull_request]
|
||||
|
||||
steps:
|
||||
# ===========================================
|
||||
# PR VALIDATION: Parallel type checks (PRs only)
|
||||
@@ -72,6 +69,7 @@ steps:
|
||||
|
||||
# ===========================================
|
||||
# MASTER DEPLOY: Parallel Docker builds
|
||||
# NOTE: cache_from/cache_to removed due to plugin bug splitting on commas
|
||||
# ===========================================
|
||||
docker-backend:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
@@ -163,7 +161,7 @@ steps:
|
||||
event: push
|
||||
|
||||
# ===========================================
|
||||
# STAGE 3: Deploy (after Docker builds)
|
||||
# STAGE 3: Deploy and Run Migrations
|
||||
# ===========================================
|
||||
deploy:
|
||||
image: bitnami/kubectl:latest
|
||||
@@ -174,12 +172,17 @@ steps:
|
||||
- mkdir -p ~/.kube
|
||||
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
||||
- chmod 600 ~/.kube/config
|
||||
# Deploy backend first
|
||||
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
||||
# Note: Migrations run automatically at startup via auto-migrate
|
||||
# Deploy remaining services
|
||||
# Resilience: ensure workers are scaled up if at 0
|
||||
- REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
|
||||
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
||||
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
||||
depends_on:
|
||||
- docker-backend
|
||||
191
.woodpecker/ci.yml
Normal file
191
.woodpecker/ci.yml
Normal file
@@ -0,0 +1,191 @@
|
||||
steps:
|
||||
# ===========================================
|
||||
# PR VALIDATION: Only typecheck changed projects
|
||||
# ===========================================
|
||||
typecheck-backend:
|
||||
image: code.cannabrands.app/creationshop/node:20
|
||||
commands:
|
||||
- npm config set cache /npm-cache/backend --global
|
||||
- cd backend
|
||||
- npm ci --prefer-offline
|
||||
- npx tsc --noEmit
|
||||
volumes:
|
||||
- npm-cache:/npm-cache
|
||||
depends_on: []
|
||||
when:
|
||||
event: pull_request
|
||||
path:
|
||||
include: ['backend/**']
|
||||
|
||||
typecheck-cannaiq:
|
||||
image: code.cannabrands.app/creationshop/node:20
|
||||
commands:
|
||||
- npm config set cache /npm-cache/cannaiq --global
|
||||
- cd cannaiq
|
||||
- npm ci --prefer-offline
|
||||
- npx tsc --noEmit
|
||||
volumes:
|
||||
- npm-cache:/npm-cache
|
||||
depends_on: []
|
||||
when:
|
||||
event: pull_request
|
||||
path:
|
||||
include: ['cannaiq/**']
|
||||
|
||||
# findadispo/findagram typechecks skipped - they have || true anyway
|
||||
|
||||
# ===========================================
|
||||
# AUTO-MERGE: Merge PR after all checks pass
|
||||
# ===========================================
|
||||
auto-merge:
|
||||
image: alpine:latest
|
||||
environment:
|
||||
GITEA_TOKEN:
|
||||
from_secret: gitea_token
|
||||
commands:
|
||||
- apk add --no-cache curl
|
||||
- |
|
||||
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
|
||||
curl -s -X POST \
|
||||
-H "Authorization: token $GITEA_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"Do":"merge"}' \
|
||||
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
||||
depends_on:
|
||||
- typecheck-backend
|
||||
- typecheck-cannaiq
|
||||
when:
|
||||
event: pull_request
|
||||
|
||||
# ===========================================
|
||||
# MASTER DEPLOY: Parallel Docker builds
|
||||
# ===========================================
|
||||
docker-backend:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/dispensary-scraper
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: backend/Dockerfile
|
||||
context: backend
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
cache_from: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache
|
||||
cache_to: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache,mode=max
|
||||
build_args:
|
||||
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
|
||||
APP_GIT_SHA: ${CI_COMMIT_SHA}
|
||||
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
|
||||
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
docker-cannaiq:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/cannaiq-frontend
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: cannaiq/Dockerfile
|
||||
context: cannaiq
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
cache_from: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache
|
||||
cache_to: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache,mode=max
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
docker-findadispo:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/findadispo-frontend
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: findadispo/frontend/Dockerfile
|
||||
context: findadispo/frontend
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache
|
||||
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache,mode=max
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
docker-findagram:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/findagram-frontend
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: findagram/frontend/Dockerfile
|
||||
context: findagram/frontend
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache
|
||||
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache,mode=max
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
# ===========================================
|
||||
# STAGE 3: Deploy and Run Migrations
|
||||
# ===========================================
|
||||
deploy:
|
||||
image: bitnami/kubectl:latest
|
||||
environment:
|
||||
KUBECONFIG_CONTENT:
|
||||
from_secret: kubeconfig_data
|
||||
commands:
|
||||
- mkdir -p ~/.kube
|
||||
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
||||
- chmod 600 ~/.kube/config
|
||||
# Deploy backend first
|
||||
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
||||
# Note: Migrations run automatically at startup via auto-migrate
|
||||
# Deploy remaining services
|
||||
# Resilience: ensure workers are scaled up if at 0
|
||||
- REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
|
||||
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
||||
depends_on:
|
||||
- docker-backend
|
||||
- docker-cannaiq
|
||||
- docker-findadispo
|
||||
- docker-findagram
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
@@ -25,8 +25,9 @@ ENV APP_GIT_SHA=${APP_GIT_SHA}
|
||||
ENV APP_BUILD_TIME=${APP_BUILD_TIME}
|
||||
ENV CONTAINER_IMAGE_TAG=${CONTAINER_IMAGE_TAG}
|
||||
|
||||
# Install Chromium dependencies
|
||||
# Install Chromium dependencies and curl for HTTP requests
|
||||
RUN apt-get update && apt-get install -y \
|
||||
curl \
|
||||
chromium \
|
||||
fonts-liberation \
|
||||
libnss3 \
|
||||
|
||||
218
backend/docs/CODEBASE_MAP.md
Normal file
218
backend/docs/CODEBASE_MAP.md
Normal file
@@ -0,0 +1,218 @@
|
||||
# CannaiQ Backend Codebase Map
|
||||
|
||||
**Last Updated:** 2025-12-12
|
||||
**Purpose:** Help Claude and developers understand which code is current vs deprecated
|
||||
|
||||
---
|
||||
|
||||
## Quick Reference: What to Use
|
||||
|
||||
### For Crawling/Scraping
|
||||
| Task | Use This | NOT This |
|
||||
|------|----------|----------|
|
||||
| Fetch products | `src/tasks/handlers/payload-fetch.ts` | `src/hydration/*` |
|
||||
| Process products | `src/tasks/handlers/product-refresh.ts` | `src/scraper-v2/*` |
|
||||
| GraphQL client | `src/platforms/dutchie/client.ts` | `src/dutchie-az/services/graphql-client.ts` |
|
||||
| Worker system | `src/tasks/task-worker.ts` | `src/dutchie-az/services/worker.ts` |
|
||||
|
||||
### For Database
|
||||
| Task | Use This | NOT This |
|
||||
|------|----------|----------|
|
||||
| Get DB pool | `src/db/pool.ts` | `src/dutchie-az/db/connection.ts` |
|
||||
| Run migrations | `src/db/migrate.ts` (CLI only) | Never import at runtime |
|
||||
| Query products | `store_products` table | `products`, `dutchie_products` |
|
||||
| Query stores | `dispensaries` table | `stores` table |
|
||||
|
||||
### For Discovery
|
||||
| Task | Use This |
|
||||
|------|----------|
|
||||
| Discover stores | `src/discovery/*.ts` |
|
||||
| Run discovery | `npx tsx src/scripts/run-discovery.ts` |
|
||||
|
||||
---
|
||||
|
||||
## Directory Status
|
||||
|
||||
### ACTIVE DIRECTORIES (Use These)
|
||||
|
||||
```
|
||||
src/
|
||||
├── auth/ # JWT/session auth, middleware
|
||||
├── db/ # Database pool, migrations
|
||||
├── discovery/ # Dutchie store discovery pipeline
|
||||
├── middleware/ # Express middleware
|
||||
├── multi-state/ # Multi-state query support
|
||||
├── platforms/ # Platform-specific clients (Dutchie, Jane, etc)
|
||||
│ └── dutchie/ # THE Dutchie client - use this one
|
||||
├── routes/ # Express API routes
|
||||
├── services/ # Core services (logger, scheduler, etc)
|
||||
├── tasks/ # Task system (workers, handlers, scheduler)
|
||||
│ └── handlers/ # Task handlers (payload_fetch, product_refresh, etc)
|
||||
├── types/ # TypeScript types
|
||||
└── utils/ # Utilities (storage, image processing)
|
||||
```
|
||||
|
||||
### DEPRECATED DIRECTORIES (DO NOT USE)
|
||||
|
||||
```
|
||||
src/
|
||||
├── hydration/ # DEPRECATED - Old pipeline approach
|
||||
├── scraper-v2/ # DEPRECATED - Old scraper engine
|
||||
├── canonical-hydration/# DEPRECATED - Merged into tasks/handlers
|
||||
├── dutchie-az/ # PARTIAL - Some parts deprecated, some active
|
||||
│ ├── db/ # DEPRECATED - Use src/db/pool.ts
|
||||
│ └── services/ # PARTIAL - worker.ts still runs, graphql-client.ts deprecated
|
||||
├── portals/ # FUTURE - Not yet implemented
|
||||
├── seo/ # PARTIAL - Settings work, templates WIP
|
||||
└── system/ # DEPRECATED - Old orchestration system
|
||||
```
|
||||
|
||||
### DEPRECATED FILES (DO NOT USE)
|
||||
|
||||
```
|
||||
src/dutchie-az/db/connection.ts # Use src/db/pool.ts instead
|
||||
src/dutchie-az/services/graphql-client.ts # Use src/platforms/dutchie/client.ts
|
||||
src/hydration/*.ts # Entire directory deprecated
|
||||
src/scraper-v2/*.ts # Entire directory deprecated
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Key Files Reference
|
||||
|
||||
### Entry Points
|
||||
| File | Purpose | Status |
|
||||
|------|---------|--------|
|
||||
| `src/index.ts` | Main Express server | ACTIVE |
|
||||
| `src/dutchie-az/services/worker.ts` | Worker process entry | ACTIVE |
|
||||
| `src/tasks/task-worker.ts` | Task worker (new system) | ACTIVE |
|
||||
|
||||
### Dutchie Integration
|
||||
| File | Purpose | Status |
|
||||
|------|---------|--------|
|
||||
| `src/platforms/dutchie/client.ts` | GraphQL client, hashes, curl | **PRIMARY** |
|
||||
| `src/platforms/dutchie/queries.ts` | High-level query functions | ACTIVE |
|
||||
| `src/platforms/dutchie/index.ts` | Re-exports | ACTIVE |
|
||||
|
||||
### Task Handlers
|
||||
| File | Purpose | Status |
|
||||
|------|---------|--------|
|
||||
| `src/tasks/handlers/payload-fetch.ts` | Fetch products from Dutchie | **PRIMARY** |
|
||||
| `src/tasks/handlers/product-refresh.ts` | Process payload into DB | **PRIMARY** |
|
||||
| `src/tasks/handlers/menu-detection.ts` | Detect menu type | ACTIVE |
|
||||
| `src/tasks/handlers/id-resolution.ts` | Resolve platform IDs | ACTIVE |
|
||||
| `src/tasks/handlers/image-download.ts` | Download product images | ACTIVE |
|
||||
|
||||
### Database
|
||||
| File | Purpose | Status |
|
||||
|------|---------|--------|
|
||||
| `src/db/pool.ts` | Canonical DB pool | **PRIMARY** |
|
||||
| `src/db/migrate.ts` | Migration runner (CLI only) | CLI ONLY |
|
||||
| `src/db/auto-migrate.ts` | Auto-run migrations on startup | ACTIVE |
|
||||
|
||||
### Configuration
|
||||
| File | Purpose | Status |
|
||||
|------|---------|--------|
|
||||
| `.env` | Environment variables | ACTIVE |
|
||||
| `package.json` | Dependencies | ACTIVE |
|
||||
| `tsconfig.json` | TypeScript config | ACTIVE |
|
||||
|
||||
---
|
||||
|
||||
## GraphQL Hashes (CRITICAL)
|
||||
|
||||
The correct hashes are in `src/platforms/dutchie/client.ts`:
|
||||
|
||||
```typescript
|
||||
export const GRAPHQL_HASHES = {
|
||||
FilteredProducts: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0',
|
||||
GetAddressBasedDispensaryData: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b',
|
||||
ConsumerDispensaries: '0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b',
|
||||
GetAllCitiesByState: 'ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6',
|
||||
};
|
||||
```
|
||||
|
||||
**ALWAYS** use `Status: 'Active'` for FilteredProducts (not `null` or `'All'`).
|
||||
|
||||
---
|
||||
|
||||
## Scripts Reference
|
||||
|
||||
### Useful Scripts (in `src/scripts/`)
|
||||
| Script | Purpose |
|
||||
|--------|---------|
|
||||
| `run-discovery.ts` | Run Dutchie discovery |
|
||||
| `crawl-single-store.ts` | Test crawl a single store |
|
||||
| `test-dutchie-graphql.ts` | Test GraphQL queries |
|
||||
|
||||
### One-Off Scripts (probably don't need)
|
||||
| Script | Purpose |
|
||||
|--------|---------|
|
||||
| `harmonize-az-dispensaries.ts` | One-time data cleanup |
|
||||
| `bootstrap-stores-for-dispensaries.ts` | One-time migration |
|
||||
| `backfill-*.ts` | Historical backfill scripts |
|
||||
|
||||
---
|
||||
|
||||
## API Routes
|
||||
|
||||
### Active Routes (in `src/routes/`)
|
||||
| Route File | Mount Point | Purpose |
|
||||
|------------|-------------|---------|
|
||||
| `auth.ts` | `/api/auth` | Login/logout/session |
|
||||
| `stores.ts` | `/api/stores` | Store CRUD |
|
||||
| `dashboard.ts` | `/api/dashboard` | Dashboard stats |
|
||||
| `workers.ts` | `/api/workers` | Worker monitoring |
|
||||
| `pipeline.ts` | `/api/pipeline` | Crawl triggers |
|
||||
| `discovery.ts` | `/api/discovery` | Discovery management |
|
||||
| `analytics.ts` | `/api/analytics` | Analytics queries |
|
||||
| `wordpress.ts` | `/api/v1/wordpress` | WordPress plugin API |
|
||||
|
||||
---
|
||||
|
||||
## Documentation Files
|
||||
|
||||
### Current Docs (in `backend/docs/`)
|
||||
| Doc | Purpose | Currency |
|
||||
|-----|---------|----------|
|
||||
| `TASK_WORKFLOW_2024-12-10.md` | Task system architecture | CURRENT |
|
||||
| `WORKER_TASK_ARCHITECTURE.md` | Worker/task design | CURRENT |
|
||||
| `CRAWL_PIPELINE.md` | Crawl pipeline overview | CURRENT |
|
||||
| `ORGANIC_SCRAPING_GUIDE.md` | Browser-based scraping | CURRENT |
|
||||
| `CODEBASE_MAP.md` | This file | CURRENT |
|
||||
| `ANALYTICS_V2_EXAMPLES.md` | Analytics API examples | CURRENT |
|
||||
| `BRAND_INTELLIGENCE_API.md` | Brand API docs | CURRENT |
|
||||
|
||||
### Root Docs
|
||||
| Doc | Purpose | Currency |
|
||||
|-----|---------|----------|
|
||||
| `CLAUDE.md` | Claude instructions | **PRIMARY** |
|
||||
| `README.md` | Project overview | NEEDS UPDATE |
|
||||
|
||||
---
|
||||
|
||||
## Common Mistakes to Avoid
|
||||
|
||||
1. **Don't use `src/hydration/`** - It's an old approach that was superseded by the task system
|
||||
|
||||
2. **Don't use `src/dutchie-az/db/connection.ts`** - Use `src/db/pool.ts` instead
|
||||
|
||||
3. **Don't import `src/db/migrate.ts` at runtime** - It will crash. Only use for CLI migrations.
|
||||
|
||||
4. **Don't query `stores` table** - It's empty. Use `dispensaries`.
|
||||
|
||||
5. **Don't query `products` table** - It's empty. Use `store_products`.
|
||||
|
||||
6. **Don't use wrong GraphQL hash** - Always get hash from `GRAPHQL_HASHES` in client.ts
|
||||
|
||||
7. **Don't use `Status: null`** - It returns 0 products. Use `Status: 'Active'`.
|
||||
|
||||
---
|
||||
|
||||
## When in Doubt
|
||||
|
||||
1. Check if the file is imported in `src/index.ts` - if not, it may be deprecated
|
||||
2. Check the last modified date - older files may be stale
|
||||
3. Look for `DEPRECATED` comments in the code
|
||||
4. Ask: "Is there a newer version of this in `src/tasks/` or `src/platforms/`?"
|
||||
5. Read the relevant doc in `docs/` before modifying code
|
||||
297
backend/docs/_archive/ORGANIC_SCRAPING_GUIDE.md
Normal file
297
backend/docs/_archive/ORGANIC_SCRAPING_GUIDE.md
Normal file
@@ -0,0 +1,297 @@
|
||||
# Organic Browser-Based Scraping Guide
|
||||
|
||||
**Last Updated:** 2025-12-12
|
||||
**Status:** Production-ready proof of concept
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
This document describes the "organic" browser-based approach to scraping Dutchie dispensary menus. Unlike direct curl/axios requests, this method uses a real browser session to make API calls, making requests appear natural and reducing detection risk.
|
||||
|
||||
---
|
||||
|
||||
## Why Organic Scraping?
|
||||
|
||||
| Approach | Detection Risk | Speed | Complexity |
|
||||
|----------|---------------|-------|------------|
|
||||
| Direct curl | Higher | Fast | Low |
|
||||
| curl-impersonate | Medium | Fast | Medium |
|
||||
| **Browser-based (organic)** | **Lowest** | Slower | Higher |
|
||||
|
||||
Direct curl requests can be fingerprinted via:
|
||||
- TLS fingerprint (cipher suites, extensions)
|
||||
- Header order and values
|
||||
- Missing cookies/session data
|
||||
- Request patterns
|
||||
|
||||
Browser-based requests inherit:
|
||||
- Real Chrome TLS fingerprint
|
||||
- Session cookies from page visit
|
||||
- Natural header order
|
||||
- JavaScript execution environment
|
||||
|
||||
---
|
||||
|
||||
## Implementation
|
||||
|
||||
### Dependencies
|
||||
|
||||
```bash
|
||||
npm install puppeteer puppeteer-extra puppeteer-extra-plugin-stealth
|
||||
```
|
||||
|
||||
### Core Script: `test-intercept.js`
|
||||
|
||||
Located at: `backend/test-intercept.js`
|
||||
|
||||
```javascript
|
||||
const puppeteer = require('puppeteer-extra');
|
||||
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
|
||||
const fs = require('fs');
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
async function capturePayload(config) {
|
||||
const { dispensaryId, platformId, cName, outputPath } = config;
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: 'new',
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
|
||||
// STEP 1: Establish session by visiting the menu
|
||||
const embedUrl = `https://dutchie.com/embedded-menu/${cName}?menuType=rec`;
|
||||
await page.goto(embedUrl, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
|
||||
// STEP 2: Fetch ALL products using GraphQL from browser context
|
||||
const result = await page.evaluate(async (platformId) => {
|
||||
const allProducts = [];
|
||||
let pageNum = 0;
|
||||
const perPage = 100;
|
||||
let totalCount = 0;
|
||||
const sessionId = 'browser-session-' + Date.now();
|
||||
|
||||
while (pageNum < 30) {
|
||||
const variables = {
|
||||
includeEnterpriseSpecials: false,
|
||||
productsFilter: {
|
||||
dispensaryId: platformId,
|
||||
pricingType: 'rec',
|
||||
Status: 'Active', // CRITICAL: Must be 'Active', not null
|
||||
types: [],
|
||||
useCache: true,
|
||||
isDefaultSort: true,
|
||||
sortBy: 'popularSortIdx',
|
||||
sortDirection: 1,
|
||||
bypassOnlineThresholds: true,
|
||||
isKioskMenu: false,
|
||||
removeProductsBelowOptionThresholds: false,
|
||||
},
|
||||
page: pageNum,
|
||||
perPage: perPage,
|
||||
};
|
||||
|
||||
const extensions = {
|
||||
persistedQuery: {
|
||||
version: 1,
|
||||
sha256Hash: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0'
|
||||
}
|
||||
};
|
||||
|
||||
const qs = new URLSearchParams({
|
||||
operationName: 'FilteredProducts',
|
||||
variables: JSON.stringify(variables),
|
||||
extensions: JSON.stringify(extensions)
|
||||
});
|
||||
|
||||
const response = await fetch(`https://dutchie.com/api-3/graphql?${qs}`, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'Accept': 'application/json',
|
||||
'content-type': 'application/json',
|
||||
'x-dutchie-session': sessionId,
|
||||
'apollographql-client-name': 'Marketplace (production)',
|
||||
},
|
||||
credentials: 'include'
|
||||
});
|
||||
|
||||
const json = await response.json();
|
||||
const data = json?.data?.filteredProducts;
|
||||
if (!data?.products) break;
|
||||
|
||||
allProducts.push(...data.products);
|
||||
if (pageNum === 0) totalCount = data.queryInfo?.totalCount || 0;
|
||||
if (allProducts.length >= totalCount) break;
|
||||
|
||||
pageNum++;
|
||||
await new Promise(r => setTimeout(r, 200)); // Polite delay
|
||||
}
|
||||
|
||||
return { products: allProducts, totalCount };
|
||||
}, platformId);
|
||||
|
||||
await browser.close();
|
||||
|
||||
// STEP 3: Save payload
|
||||
const payload = {
|
||||
dispensaryId,
|
||||
platformId,
|
||||
cName,
|
||||
fetchedAt: new Date().toISOString(),
|
||||
productCount: result.products.length,
|
||||
products: result.products,
|
||||
};
|
||||
|
||||
fs.writeFileSync(outputPath, JSON.stringify(payload, null, 2));
|
||||
return payload;
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Critical Parameters
|
||||
|
||||
### GraphQL Hash (FilteredProducts)
|
||||
|
||||
```
|
||||
ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0
|
||||
```
|
||||
|
||||
**WARNING:** Using the wrong hash returns HTTP 400.
|
||||
|
||||
### Status Parameter
|
||||
|
||||
| Value | Result |
|
||||
|-------|--------|
|
||||
| `'Active'` | Returns in-stock products (1019 in test) |
|
||||
| `null` | Returns 0 products |
|
||||
| `'All'` | Returns HTTP 400 |
|
||||
|
||||
**ALWAYS use `Status: 'Active'`**
|
||||
|
||||
### Required Headers
|
||||
|
||||
```javascript
|
||||
{
|
||||
'Accept': 'application/json',
|
||||
'content-type': 'application/json',
|
||||
'x-dutchie-session': 'unique-session-id',
|
||||
'apollographql-client-name': 'Marketplace (production)',
|
||||
}
|
||||
```
|
||||
|
||||
### Endpoint
|
||||
|
||||
```
|
||||
https://dutchie.com/api-3/graphql
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Benchmarks
|
||||
|
||||
Test store: AZ-Deeply-Rooted (1019 products)
|
||||
|
||||
| Metric | Value |
|
||||
|--------|-------|
|
||||
| Total products | 1019 |
|
||||
| Time | 18.5 seconds |
|
||||
| Payload size | 11.8 MB |
|
||||
| Pages fetched | 11 (100 per page) |
|
||||
| Success rate | 100% |
|
||||
|
||||
---
|
||||
|
||||
## Payload Format
|
||||
|
||||
The output matches the existing `payload-fetch.ts` handler format:
|
||||
|
||||
```json
|
||||
{
|
||||
"dispensaryId": 123,
|
||||
"platformId": "6405ef617056e8014d79101b",
|
||||
"cName": "AZ-Deeply-Rooted",
|
||||
"fetchedAt": "2025-12-12T05:05:19.837Z",
|
||||
"productCount": 1019,
|
||||
"products": [
|
||||
{
|
||||
"id": "6927508db4851262f629a869",
|
||||
"Name": "Product Name",
|
||||
"brand": { "name": "Brand Name", ... },
|
||||
"type": "Flower",
|
||||
"THC": "25%",
|
||||
"Prices": [...],
|
||||
"Options": [...],
|
||||
...
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Integration Points
|
||||
|
||||
### As a Task Handler
|
||||
|
||||
The organic approach can be integrated as an alternative to curl-based fetching:
|
||||
|
||||
```typescript
|
||||
// In src/tasks/handlers/organic-payload-fetch.ts
|
||||
export async function handleOrganicPayloadFetch(ctx: TaskContext): Promise<TaskResult> {
|
||||
// Use puppeteer-based capture
|
||||
// Save to same payload storage
|
||||
// Queue product_refresh task
|
||||
}
|
||||
```
|
||||
|
||||
### Worker Configuration
|
||||
|
||||
Add to job_schedules:
|
||||
```sql
|
||||
INSERT INTO job_schedules (name, role, cron_expression)
|
||||
VALUES ('organic_product_crawl', 'organic_payload_fetch', '0 */6 * * *');
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### HTTP 400 Bad Request
|
||||
- Check hash is correct: `ee29c060...`
|
||||
- Verify Status is `'Active'` (string, not null)
|
||||
|
||||
### 0 Products Returned
|
||||
- Status was likely `null` or `'All'` - use `'Active'`
|
||||
- Check platformId is valid MongoDB ObjectId
|
||||
|
||||
### Session Not Established
|
||||
- Increase timeout on initial page.goto()
|
||||
- Check cName is valid (matches embedded-menu URL)
|
||||
|
||||
### Detection/Blocking
|
||||
- StealthPlugin should handle most cases
|
||||
- Add random delays between pages
|
||||
- Use headless: 'new' (not true/false)
|
||||
|
||||
---
|
||||
|
||||
## Files Reference
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `backend/test-intercept.js` | Proof of concept script |
|
||||
| `backend/src/platforms/dutchie/client.ts` | GraphQL hashes, curl implementation |
|
||||
| `backend/src/tasks/handlers/payload-fetch.ts` | Current curl-based handler |
|
||||
| `backend/src/utils/payload-storage.ts` | Payload save/load utilities |
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- `DUTCHIE_CRAWL_WORKFLOW.md` - Full crawl pipeline documentation
|
||||
- `TASK_WORKFLOW_2024-12-10.md` - Task system architecture
|
||||
- `CLAUDE.md` - Project rules and constraints
|
||||
25
backend/docs/_archive/README.md
Normal file
25
backend/docs/_archive/README.md
Normal file
@@ -0,0 +1,25 @@
|
||||
# ARCHIVED DOCUMENTATION
|
||||
|
||||
**WARNING: These docs may be outdated or inaccurate.**
|
||||
|
||||
The code has evolved significantly. These docs are kept for historical reference only.
|
||||
|
||||
## What to Use Instead
|
||||
|
||||
**The single source of truth is:**
|
||||
- `CLAUDE.md` (root) - Essential rules and quick reference
|
||||
- `docs/CODEBASE_MAP.md` - Current file/directory reference
|
||||
|
||||
## Why Archive?
|
||||
|
||||
These docs were written during development iterations and may reference:
|
||||
- Old file paths that no longer exist
|
||||
- Deprecated approaches (hydration, scraper-v2)
|
||||
- APIs that have changed
|
||||
- Database schemas that evolved
|
||||
|
||||
## If You Need Details
|
||||
|
||||
1. First check CODEBASE_MAP.md for current file locations
|
||||
2. Then read the actual source code
|
||||
3. Only use archive docs as a last resort for historical context
|
||||
@@ -362,6 +362,148 @@ SET status = 'pending', retry_count = retry_count + 1
|
||||
WHERE status = 'failed' AND retry_count < max_retries;
|
||||
```
|
||||
|
||||
## Concurrent Task Processing (Added 2024-12)
|
||||
|
||||
Workers can now process multiple tasks concurrently within a single worker instance. This improves throughput by utilizing async I/O efficiently.
|
||||
|
||||
### Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ Pod (K8s) │
|
||||
│ │
|
||||
│ ┌─────────────────────────────────────────────────────┐ │
|
||||
│ │ TaskWorker │ │
|
||||
│ │ │ │
|
||||
│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │
|
||||
│ │ │ Task 1 │ │ Task 2 │ │ Task 3 │ (concurrent)│ │
|
||||
│ │ └─────────┘ └─────────┘ └─────────┘ │ │
|
||||
│ │ │ │
|
||||
│ │ Resource Monitor │ │
|
||||
│ │ ├── Memory: 65% (threshold: 85%) │ │
|
||||
│ │ ├── CPU: 45% (threshold: 90%) │ │
|
||||
│ │ └── Status: Normal │ │
|
||||
│ └─────────────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `MAX_CONCURRENT_TASKS` | 3 | Maximum tasks a worker will run concurrently |
|
||||
| `MEMORY_BACKOFF_THRESHOLD` | 0.85 | Back off when heap memory exceeds 85% |
|
||||
| `CPU_BACKOFF_THRESHOLD` | 0.90 | Back off when CPU exceeds 90% |
|
||||
| `BACKOFF_DURATION_MS` | 10000 | How long to wait when backing off (10s) |
|
||||
|
||||
### How It Works
|
||||
|
||||
1. **Main Loop**: Worker continuously tries to fill up to `MAX_CONCURRENT_TASKS`
|
||||
2. **Resource Monitoring**: Before claiming a new task, worker checks memory and CPU
|
||||
3. **Backoff**: If resources exceed thresholds, worker pauses and stops claiming new tasks
|
||||
4. **Concurrent Execution**: Tasks run in parallel using `Promise` - they don't block each other
|
||||
5. **Graceful Shutdown**: On SIGTERM/decommission, worker stops claiming but waits for active tasks
|
||||
|
||||
### Resource Monitoring
|
||||
|
||||
```typescript
|
||||
// ResourceStats interface
|
||||
interface ResourceStats {
|
||||
memoryPercent: number; // Current heap usage as decimal (0.0-1.0)
|
||||
memoryMb: number; // Current heap used in MB
|
||||
memoryTotalMb: number; // Total heap available in MB
|
||||
cpuPercent: number; // CPU usage as percentage (0-100)
|
||||
isBackingOff: boolean; // True if worker is in backoff state
|
||||
backoffReason: string; // Why the worker is backing off
|
||||
}
|
||||
```
|
||||
|
||||
### Heartbeat Data
|
||||
|
||||
Workers report the following in their heartbeat:
|
||||
|
||||
```json
|
||||
{
|
||||
"worker_id": "worker-abc123",
|
||||
"current_task_id": 456,
|
||||
"current_task_ids": [456, 457, 458],
|
||||
"active_task_count": 3,
|
||||
"max_concurrent_tasks": 3,
|
||||
"status": "active",
|
||||
"resources": {
|
||||
"memory_mb": 256,
|
||||
"memory_total_mb": 512,
|
||||
"memory_rss_mb": 320,
|
||||
"memory_percent": 50,
|
||||
"cpu_user_ms": 12500,
|
||||
"cpu_system_ms": 3200,
|
||||
"cpu_percent": 45,
|
||||
"is_backing_off": false,
|
||||
"backoff_reason": null
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Backoff Behavior
|
||||
|
||||
When resources exceed thresholds:
|
||||
|
||||
1. Worker logs the backoff reason:
|
||||
```
|
||||
[TaskWorker] MyWorker backing off: Memory at 87.3% (threshold: 85%)
|
||||
```
|
||||
|
||||
2. Worker stops claiming new tasks but continues existing tasks
|
||||
|
||||
3. After `BACKOFF_DURATION_MS`, worker rechecks resources
|
||||
|
||||
4. When resources return to normal:
|
||||
```
|
||||
[TaskWorker] MyWorker resuming normal operation
|
||||
```
|
||||
|
||||
### UI Display
|
||||
|
||||
The Workers Dashboard shows:
|
||||
|
||||
- **Tasks Column**: `2/3 tasks` (active/max concurrent)
|
||||
- **Resources Column**: Memory % and CPU % with color coding
|
||||
- Green: < 50%
|
||||
- Yellow: 50-74%
|
||||
- Amber: 75-89%
|
||||
- Red: 90%+
|
||||
- **Backing Off**: Orange warning badge when worker is in backoff state
|
||||
|
||||
### Task Count Badge Details
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────┐
|
||||
│ Worker: "MyWorker" │
|
||||
│ Tasks: 2/3 tasks #456, #457 │
|
||||
│ Resources: 🧠 65% 💻 45% │
|
||||
│ Status: ● Active │
|
||||
└─────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Best Practices
|
||||
|
||||
1. **Start Conservative**: Use `MAX_CONCURRENT_TASKS=3` initially
|
||||
2. **Monitor Resources**: Watch for frequent backoffs in logs
|
||||
3. **Tune Per Workload**: I/O-bound tasks benefit from higher concurrency
|
||||
4. **Scale Horizontally**: Add more pods rather than cranking concurrency too high
|
||||
|
||||
### Code References
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `src/tasks/task-worker.ts:68-71` | Concurrency environment variables |
|
||||
| `src/tasks/task-worker.ts:104-111` | ResourceStats interface |
|
||||
| `src/tasks/task-worker.ts:149-179` | getResourceStats() method |
|
||||
| `src/tasks/task-worker.ts:184-196` | shouldBackOff() method |
|
||||
| `src/tasks/task-worker.ts:462-516` | mainLoop() with concurrent claiming |
|
||||
| `src/routes/worker-registry.ts:148-195` | Heartbeat endpoint handling |
|
||||
| `cannaiq/src/pages/WorkersDashboard.tsx:233-305` | UI components for resources |
|
||||
|
||||
## Monitoring
|
||||
|
||||
### Logs
|
||||
27
backend/migrations/074_worker_commands.sql
Normal file
27
backend/migrations/074_worker_commands.sql
Normal file
@@ -0,0 +1,27 @@
|
||||
-- Migration: Worker Commands Table
|
||||
-- Purpose: Store commands for workers (decommission, etc.)
|
||||
-- Workers poll this table after each task to check for commands
|
||||
|
||||
CREATE TABLE IF NOT EXISTS worker_commands (
|
||||
id SERIAL PRIMARY KEY,
|
||||
worker_id TEXT NOT NULL,
|
||||
command TEXT NOT NULL, -- 'decommission', 'pause', 'resume'
|
||||
reason TEXT,
|
||||
issued_by TEXT,
|
||||
issued_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
acknowledged_at TIMESTAMPTZ,
|
||||
executed_at TIMESTAMPTZ,
|
||||
status TEXT DEFAULT 'pending' -- 'pending', 'acknowledged', 'executed', 'cancelled'
|
||||
);
|
||||
|
||||
-- Index for worker lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_commands_worker_id ON worker_commands(worker_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_commands_pending ON worker_commands(worker_id, status) WHERE status = 'pending';
|
||||
|
||||
-- Add decommission_requested column to worker_registry for quick checks
|
||||
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested BOOLEAN DEFAULT FALSE;
|
||||
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_reason TEXT;
|
||||
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested_at TIMESTAMPTZ;
|
||||
|
||||
-- Comment
|
||||
COMMENT ON TABLE worker_commands IS 'Commands issued to workers (decommission after task, pause, etc.)';
|
||||
27
backend/migrations/082_proxy_notification_trigger.sql
Normal file
27
backend/migrations/082_proxy_notification_trigger.sql
Normal file
@@ -0,0 +1,27 @@
|
||||
-- Migration: 082_proxy_notification_trigger
|
||||
-- Date: 2024-12-11
|
||||
-- Description: Add PostgreSQL NOTIFY trigger to alert workers when proxies are added
|
||||
|
||||
-- Create function to notify workers when active proxy is added/activated
|
||||
CREATE OR REPLACE FUNCTION notify_proxy_added()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
-- Only notify if proxy is active
|
||||
IF NEW.active = true THEN
|
||||
PERFORM pg_notify('proxy_added', NEW.id::text);
|
||||
END IF;
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Drop existing trigger if any
|
||||
DROP TRIGGER IF EXISTS proxy_added_trigger ON proxies;
|
||||
|
||||
-- Create trigger on insert and update of active column
|
||||
CREATE TRIGGER proxy_added_trigger
|
||||
AFTER INSERT OR UPDATE OF active ON proxies
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION notify_proxy_added();
|
||||
|
||||
COMMENT ON FUNCTION notify_proxy_added() IS
|
||||
'Sends PostgreSQL NOTIFY to proxy_added channel when an active proxy is added or activated. Workers LISTEN on this channel to wake up immediately.';
|
||||
88
backend/migrations/083_discovery_runs.sql
Normal file
88
backend/migrations/083_discovery_runs.sql
Normal file
@@ -0,0 +1,88 @@
|
||||
-- Migration 083: Discovery Run Tracking
|
||||
-- Tracks progress of store discovery runs step-by-step
|
||||
|
||||
-- Main discovery runs table
|
||||
CREATE TABLE IF NOT EXISTS discovery_runs (
|
||||
id SERIAL PRIMARY KEY,
|
||||
platform VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'running', -- running, completed, failed
|
||||
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
finished_at TIMESTAMPTZ,
|
||||
task_id INTEGER REFERENCES worker_task_queue(id),
|
||||
|
||||
-- Totals
|
||||
states_total INTEGER DEFAULT 0,
|
||||
states_completed INTEGER DEFAULT 0,
|
||||
locations_discovered INTEGER DEFAULT 0,
|
||||
locations_promoted INTEGER DEFAULT 0,
|
||||
new_store_ids INTEGER[] DEFAULT '{}',
|
||||
|
||||
-- Error info
|
||||
error_message TEXT,
|
||||
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Per-state progress within a run
|
||||
CREATE TABLE IF NOT EXISTS discovery_run_states (
|
||||
id SERIAL PRIMARY KEY,
|
||||
run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
|
||||
state_code VARCHAR(2) NOT NULL,
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'pending', -- pending, running, completed, failed
|
||||
started_at TIMESTAMPTZ,
|
||||
finished_at TIMESTAMPTZ,
|
||||
|
||||
-- Results
|
||||
cities_found INTEGER DEFAULT 0,
|
||||
locations_found INTEGER DEFAULT 0,
|
||||
locations_upserted INTEGER DEFAULT 0,
|
||||
new_dispensary_ids INTEGER[] DEFAULT '{}',
|
||||
|
||||
-- Error info
|
||||
error_message TEXT,
|
||||
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
UNIQUE(run_id, state_code)
|
||||
);
|
||||
|
||||
-- Step-by-step log for detailed progress tracking
|
||||
CREATE TABLE IF NOT EXISTS discovery_run_steps (
|
||||
id SERIAL PRIMARY KEY,
|
||||
run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
|
||||
state_code VARCHAR(2),
|
||||
step_name VARCHAR(100) NOT NULL,
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'started', -- started, completed, failed
|
||||
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
finished_at TIMESTAMPTZ,
|
||||
|
||||
-- Details (JSON for flexibility)
|
||||
details JSONB DEFAULT '{}',
|
||||
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Indexes for querying
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_runs_status ON discovery_runs(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_runs_platform ON discovery_runs(platform);
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_runs_started_at ON discovery_runs(started_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_run_states_run_id ON discovery_run_states(run_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_run_steps_run_id ON discovery_run_steps(run_id);
|
||||
|
||||
-- View for latest run status per platform
|
||||
CREATE OR REPLACE VIEW v_latest_discovery_runs AS
|
||||
SELECT DISTINCT ON (platform)
|
||||
id,
|
||||
platform,
|
||||
status,
|
||||
started_at,
|
||||
finished_at,
|
||||
states_total,
|
||||
states_completed,
|
||||
locations_discovered,
|
||||
locations_promoted,
|
||||
array_length(new_store_ids, 1) as new_stores_count,
|
||||
error_message,
|
||||
EXTRACT(EPOCH FROM (COALESCE(finished_at, NOW()) - started_at)) as duration_seconds
|
||||
FROM discovery_runs
|
||||
ORDER BY platform, started_at DESC;
|
||||
253
backend/migrations/084_dual_transport_preflight.sql
Normal file
253
backend/migrations/084_dual_transport_preflight.sql
Normal file
@@ -0,0 +1,253 @@
|
||||
-- Migration 084: Dual Transport Preflight System
|
||||
-- Workers run both curl and http (Puppeteer) preflights on startup
|
||||
-- Tasks can require a specific transport method
|
||||
|
||||
-- ===================================================================
|
||||
-- PART 1: Add preflight columns to worker_registry
|
||||
-- ===================================================================
|
||||
|
||||
-- Preflight status for curl/axios transport (proxy-based)
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS preflight_curl_status VARCHAR(20) DEFAULT 'pending';
|
||||
|
||||
-- Preflight status for http/Puppeteer transport (browser-based)
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS preflight_http_status VARCHAR(20) DEFAULT 'pending';
|
||||
|
||||
-- Timestamps for when each preflight completed
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS preflight_curl_at TIMESTAMPTZ;
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS preflight_http_at TIMESTAMPTZ;
|
||||
|
||||
-- Error messages for failed preflights
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS preflight_curl_error TEXT;
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS preflight_http_error TEXT;
|
||||
|
||||
-- Response time for successful preflights (ms)
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS preflight_curl_ms INTEGER;
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS preflight_http_ms INTEGER;
|
||||
|
||||
-- Constraints for preflight status values
|
||||
ALTER TABLE worker_registry
|
||||
DROP CONSTRAINT IF EXISTS valid_preflight_curl_status;
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD CONSTRAINT valid_preflight_curl_status
|
||||
CHECK (preflight_curl_status IN ('pending', 'passed', 'failed', 'skipped'));
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
DROP CONSTRAINT IF EXISTS valid_preflight_http_status;
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD CONSTRAINT valid_preflight_http_status
|
||||
CHECK (preflight_http_status IN ('pending', 'passed', 'failed', 'skipped'));
|
||||
|
||||
-- ===================================================================
|
||||
-- PART 2: Add method column to worker_tasks
|
||||
-- ===================================================================
|
||||
|
||||
-- Transport method requirement for the task
|
||||
-- NULL = no preference (any worker can claim)
|
||||
-- 'curl' = requires curl/axios transport (proxy-based, fast)
|
||||
-- 'http' = requires http/Puppeteer transport (browser-based, anti-detect)
|
||||
ALTER TABLE worker_tasks
|
||||
ADD COLUMN IF NOT EXISTS method VARCHAR(10);
|
||||
|
||||
-- Constraint for valid method values
|
||||
ALTER TABLE worker_tasks
|
||||
DROP CONSTRAINT IF EXISTS valid_task_method;
|
||||
|
||||
ALTER TABLE worker_tasks
|
||||
ADD CONSTRAINT valid_task_method
|
||||
CHECK (method IS NULL OR method IN ('curl', 'http'));
|
||||
|
||||
-- Index for method-based task claiming
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_tasks_method
|
||||
ON worker_tasks(method)
|
||||
WHERE status = 'pending';
|
||||
|
||||
-- Set default method for all existing pending tasks to 'http'
|
||||
-- ALL current tasks require Puppeteer/browser-based transport
|
||||
UPDATE worker_tasks
|
||||
SET method = 'http'
|
||||
WHERE method IS NULL;
|
||||
|
||||
-- ===================================================================
|
||||
-- PART 3: Update claim_task function for method compatibility
|
||||
-- ===================================================================
|
||||
|
||||
CREATE OR REPLACE FUNCTION claim_task(
|
||||
p_role VARCHAR(50),
|
||||
p_worker_id VARCHAR(100),
|
||||
p_curl_passed BOOLEAN DEFAULT TRUE,
|
||||
p_http_passed BOOLEAN DEFAULT FALSE
|
||||
) RETURNS worker_tasks AS $$
|
||||
DECLARE
|
||||
claimed_task worker_tasks;
|
||||
BEGIN
|
||||
UPDATE worker_tasks
|
||||
SET
|
||||
status = 'claimed',
|
||||
worker_id = p_worker_id,
|
||||
claimed_at = NOW(),
|
||||
updated_at = NOW()
|
||||
WHERE id = (
|
||||
SELECT id FROM worker_tasks
|
||||
WHERE role = p_role
|
||||
AND status = 'pending'
|
||||
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
|
||||
-- Method compatibility: worker must have passed the required preflight
|
||||
AND (
|
||||
method IS NULL -- No preference, any worker can claim
|
||||
OR (method = 'curl' AND p_curl_passed = TRUE)
|
||||
OR (method = 'http' AND p_http_passed = TRUE)
|
||||
)
|
||||
-- Exclude stores that already have an active task
|
||||
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
|
||||
SELECT dispensary_id FROM worker_tasks
|
||||
WHERE status IN ('claimed', 'running')
|
||||
AND dispensary_id IS NOT NULL
|
||||
))
|
||||
ORDER BY priority DESC, created_at ASC
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING * INTO claimed_task;
|
||||
|
||||
RETURN claimed_task;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ===================================================================
|
||||
-- PART 4: Update v_active_workers view
|
||||
-- ===================================================================
|
||||
|
||||
DROP VIEW IF EXISTS v_active_workers;
|
||||
|
||||
CREATE VIEW v_active_workers AS
|
||||
SELECT
|
||||
wr.id,
|
||||
wr.worker_id,
|
||||
wr.friendly_name,
|
||||
wr.role,
|
||||
wr.status,
|
||||
wr.pod_name,
|
||||
wr.hostname,
|
||||
wr.started_at,
|
||||
wr.last_heartbeat_at,
|
||||
wr.last_task_at,
|
||||
wr.tasks_completed,
|
||||
wr.tasks_failed,
|
||||
wr.current_task_id,
|
||||
-- Preflight status
|
||||
wr.preflight_curl_status,
|
||||
wr.preflight_http_status,
|
||||
wr.preflight_curl_at,
|
||||
wr.preflight_http_at,
|
||||
wr.preflight_curl_error,
|
||||
wr.preflight_http_error,
|
||||
wr.preflight_curl_ms,
|
||||
wr.preflight_http_ms,
|
||||
-- Computed fields
|
||||
EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
|
||||
CASE
|
||||
WHEN wr.status = 'offline' THEN 'offline'
|
||||
WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
||||
WHEN wr.current_task_id IS NOT NULL THEN 'busy'
|
||||
ELSE 'ready'
|
||||
END as health_status,
|
||||
-- Capability flags (can this worker handle curl/http tasks?)
|
||||
(wr.preflight_curl_status = 'passed') as can_curl,
|
||||
(wr.preflight_http_status = 'passed') as can_http
|
||||
FROM worker_registry wr
|
||||
WHERE wr.status != 'terminated'
|
||||
ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
|
||||
|
||||
-- ===================================================================
|
||||
-- PART 5: View for task queue with method info
|
||||
-- ===================================================================
|
||||
|
||||
DROP VIEW IF EXISTS v_task_history;
|
||||
|
||||
CREATE VIEW v_task_history AS
|
||||
SELECT
|
||||
t.id,
|
||||
t.role,
|
||||
t.dispensary_id,
|
||||
d.name as dispensary_name,
|
||||
t.platform,
|
||||
t.status,
|
||||
t.priority,
|
||||
t.method,
|
||||
t.worker_id,
|
||||
t.scheduled_for,
|
||||
t.claimed_at,
|
||||
t.started_at,
|
||||
t.completed_at,
|
||||
t.error_message,
|
||||
t.retry_count,
|
||||
t.created_at,
|
||||
EXTRACT(EPOCH FROM (t.completed_at - t.started_at)) as duration_sec
|
||||
FROM worker_tasks t
|
||||
LEFT JOIN dispensaries d ON d.id = t.dispensary_id
|
||||
ORDER BY t.created_at DESC;
|
||||
|
||||
-- ===================================================================
|
||||
-- PART 6: Helper function to update worker preflight status
|
||||
-- ===================================================================
|
||||
|
||||
CREATE OR REPLACE FUNCTION update_worker_preflight(
|
||||
p_worker_id VARCHAR(100),
|
||||
p_transport VARCHAR(10), -- 'curl' or 'http'
|
||||
p_status VARCHAR(20), -- 'passed', 'failed', 'skipped'
|
||||
p_response_ms INTEGER DEFAULT NULL,
|
||||
p_error TEXT DEFAULT NULL
|
||||
) RETURNS VOID AS $$
|
||||
BEGIN
|
||||
IF p_transport = 'curl' THEN
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
preflight_curl_status = p_status,
|
||||
preflight_curl_at = NOW(),
|
||||
preflight_curl_ms = p_response_ms,
|
||||
preflight_curl_error = p_error,
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
ELSIF p_transport = 'http' THEN
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
preflight_http_status = p_status,
|
||||
preflight_http_at = NOW(),
|
||||
preflight_http_ms = p_response_ms,
|
||||
preflight_http_error = p_error,
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
END IF;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ===================================================================
|
||||
-- Comments
|
||||
-- ===================================================================
|
||||
|
||||
COMMENT ON COLUMN worker_registry.preflight_curl_status IS 'Status of curl/axios preflight: pending, passed, failed, skipped';
|
||||
COMMENT ON COLUMN worker_registry.preflight_http_status IS 'Status of http/Puppeteer preflight: pending, passed, failed, skipped';
|
||||
COMMENT ON COLUMN worker_registry.preflight_curl_at IS 'When curl preflight completed';
|
||||
COMMENT ON COLUMN worker_registry.preflight_http_at IS 'When http preflight completed';
|
||||
COMMENT ON COLUMN worker_registry.preflight_curl_error IS 'Error message if curl preflight failed';
|
||||
COMMENT ON COLUMN worker_registry.preflight_http_error IS 'Error message if http preflight failed';
|
||||
COMMENT ON COLUMN worker_registry.preflight_curl_ms IS 'Response time of successful curl preflight (ms)';
|
||||
COMMENT ON COLUMN worker_registry.preflight_http_ms IS 'Response time of successful http preflight (ms)';
|
||||
|
||||
COMMENT ON COLUMN worker_tasks.method IS 'Transport method required: NULL=any, curl=proxy-based, http=browser-based';
|
||||
|
||||
COMMENT ON FUNCTION claim_task IS 'Atomically claim a task, respecting method requirements and per-store locking';
|
||||
COMMENT ON FUNCTION update_worker_preflight IS 'Update a workers preflight status for a given transport';
|
||||
286
backend/node_modules/.package-lock.json
generated
vendored
286
backend/node_modules/.package-lock.json
generated
vendored
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "dutchie-menus-backend",
|
||||
"version": "1.5.1",
|
||||
"version": "1.6.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
@@ -46,6 +46,97 @@
|
||||
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
|
||||
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
|
||||
},
|
||||
"node_modules/@jsep-plugin/assignment": {
|
||||
"version": "1.3.0",
|
||||
"resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.3.0.tgz",
|
||||
"integrity": "sha512-VVgV+CXrhbMI3aSusQyclHkenWSAm95WaiKrMxRFam3JSUiIaQjoMIw2sEs/OX4XifnqeQUN4DYbJjlA8EfktQ==",
|
||||
"engines": {
|
||||
"node": ">= 10.16.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"jsep": "^0.4.0||^1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@jsep-plugin/regex": {
|
||||
"version": "1.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@jsep-plugin/regex/-/regex-1.0.4.tgz",
|
||||
"integrity": "sha512-q7qL4Mgjs1vByCaTnDFcBnV9HS7GVPJX5vyVoCgZHNSC9rjwIlmbXG5sUuorR5ndfHAIlJ8pVStxvjXHbNvtUg==",
|
||||
"engines": {
|
||||
"node": ">= 10.16.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"jsep": "^0.4.0||^1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@kubernetes/client-node": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/@kubernetes/client-node/-/client-node-1.4.0.tgz",
|
||||
"integrity": "sha512-Zge3YvF7DJi264dU1b3wb/GmzR99JhUpqTvp+VGHfwZT+g7EOOYNScDJNZwXy9cszyIGPIs0VHr+kk8e95qqrA==",
|
||||
"dependencies": {
|
||||
"@types/js-yaml": "^4.0.1",
|
||||
"@types/node": "^24.0.0",
|
||||
"@types/node-fetch": "^2.6.13",
|
||||
"@types/stream-buffers": "^3.0.3",
|
||||
"form-data": "^4.0.0",
|
||||
"hpagent": "^1.2.0",
|
||||
"isomorphic-ws": "^5.0.0",
|
||||
"js-yaml": "^4.1.0",
|
||||
"jsonpath-plus": "^10.3.0",
|
||||
"node-fetch": "^2.7.0",
|
||||
"openid-client": "^6.1.3",
|
||||
"rfc4648": "^1.3.0",
|
||||
"socks-proxy-agent": "^8.0.4",
|
||||
"stream-buffers": "^3.0.2",
|
||||
"tar-fs": "^3.0.9",
|
||||
"ws": "^8.18.2"
|
||||
}
|
||||
},
|
||||
"node_modules/@kubernetes/client-node/node_modules/@types/node": {
|
||||
"version": "24.10.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.3.tgz",
|
||||
"integrity": "sha512-gqkrWUsS8hcm0r44yn7/xZeV1ERva/nLgrLxFRUGb7aoNMIJfZJ3AC261zDQuOAKC7MiXai1WCpYc48jAHoShQ==",
|
||||
"dependencies": {
|
||||
"undici-types": "~7.16.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@kubernetes/client-node/node_modules/tar-fs": {
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz",
|
||||
"integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==",
|
||||
"dependencies": {
|
||||
"pump": "^3.0.0",
|
||||
"tar-stream": "^3.1.5"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"bare-fs": "^4.0.1",
|
||||
"bare-path": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@kubernetes/client-node/node_modules/undici-types": {
|
||||
"version": "7.16.0",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
|
||||
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="
|
||||
},
|
||||
"node_modules/@kubernetes/client-node/node_modules/ws": {
|
||||
"version": "8.18.3",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
|
||||
"integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bufferutil": "^4.0.1",
|
||||
"utf-8-validate": ">=5.0.2"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bufferutil": {
|
||||
"optional": true
|
||||
},
|
||||
"utf-8-validate": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@mapbox/node-pre-gyp": {
|
||||
"version": "1.0.11",
|
||||
"resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
|
||||
@@ -251,6 +342,11 @@
|
||||
"integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/js-yaml": {
|
||||
"version": "4.0.9",
|
||||
"resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
|
||||
"integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="
|
||||
},
|
||||
"node_modules/@types/jsonwebtoken": {
|
||||
"version": "9.0.10",
|
||||
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
|
||||
@@ -276,7 +372,6 @@
|
||||
"version": "20.19.25",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
|
||||
"integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
|
||||
"devOptional": true,
|
||||
"dependencies": {
|
||||
"undici-types": "~6.21.0"
|
||||
}
|
||||
@@ -287,6 +382,15 @@
|
||||
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/node-fetch": {
|
||||
"version": "2.6.13",
|
||||
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
|
||||
"integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
|
||||
"dependencies": {
|
||||
"@types/node": "*",
|
||||
"form-data": "^4.0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/pg": {
|
||||
"version": "8.15.6",
|
||||
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
|
||||
@@ -340,6 +444,14 @@
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/stream-buffers": {
|
||||
"version": "3.0.8",
|
||||
"resolved": "https://registry.npmjs.org/@types/stream-buffers/-/stream-buffers-3.0.8.tgz",
|
||||
"integrity": "sha512-J+7VaHKNvlNPJPEJXX/fKa9DZtR/xPMwuIbe+yNOwp1YB+ApUOBv2aUpEoBJEi8nJgbgs1x8e73ttg0r1rSUdw==",
|
||||
"dependencies": {
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/uuid": {
|
||||
"version": "9.0.8",
|
||||
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
||||
@@ -520,6 +632,78 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/bare-fs": {
|
||||
"version": "4.5.2",
|
||||
"resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz",
|
||||
"integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"bare-events": "^2.5.4",
|
||||
"bare-path": "^3.0.0",
|
||||
"bare-stream": "^2.6.4",
|
||||
"bare-url": "^2.2.2",
|
||||
"fast-fifo": "^1.3.2"
|
||||
},
|
||||
"engines": {
|
||||
"bare": ">=1.16.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bare-buffer": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bare-buffer": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/bare-os": {
|
||||
"version": "3.6.2",
|
||||
"resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz",
|
||||
"integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==",
|
||||
"optional": true,
|
||||
"engines": {
|
||||
"bare": ">=1.14.0"
|
||||
}
|
||||
},
|
||||
"node_modules/bare-path": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
|
||||
"integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"bare-os": "^3.0.1"
|
||||
}
|
||||
},
|
||||
"node_modules/bare-stream": {
|
||||
"version": "2.7.0",
|
||||
"resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz",
|
||||
"integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"streamx": "^2.21.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bare-buffer": "*",
|
||||
"bare-events": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bare-buffer": {
|
||||
"optional": true
|
||||
},
|
||||
"bare-events": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/bare-url": {
|
||||
"version": "2.3.2",
|
||||
"resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz",
|
||||
"integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"bare-path": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/base64-js": {
|
||||
"version": "1.5.1",
|
||||
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
||||
@@ -2019,6 +2203,14 @@
|
||||
"node": ">=16.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/hpagent": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/hpagent/-/hpagent-1.2.0.tgz",
|
||||
"integrity": "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==",
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/htmlparser2": {
|
||||
"version": "10.0.0",
|
||||
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
|
||||
@@ -2382,6 +2574,22 @@
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/isomorphic-ws": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmjs.org/isomorphic-ws/-/isomorphic-ws-5.0.0.tgz",
|
||||
"integrity": "sha512-muId7Zzn9ywDsyXgTIafTry2sV3nySZeUDe6YedVd1Hvuuep5AsIlqK+XefWpYTyJG5e503F2xIuT2lcU6rCSw==",
|
||||
"peerDependencies": {
|
||||
"ws": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/jose": {
|
||||
"version": "6.1.3",
|
||||
"resolved": "https://registry.npmjs.org/jose/-/jose-6.1.3.tgz",
|
||||
"integrity": "sha512-0TpaTfihd4QMNwrz/ob2Bp7X04yuxJkjRGi4aKmOqwhov54i6u79oCv7T+C7lo70MKH6BesI3vscD1yb/yzKXQ==",
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/panva"
|
||||
}
|
||||
},
|
||||
"node_modules/js-tokens": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
||||
@@ -2398,6 +2606,14 @@
|
||||
"js-yaml": "bin/js-yaml.js"
|
||||
}
|
||||
},
|
||||
"node_modules/jsep": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/jsep/-/jsep-1.4.0.tgz",
|
||||
"integrity": "sha512-B7qPcEVE3NVkmSJbaYxvv4cHkVW7DQsZz13pUMrfS8z8Q/BuShN+gcTXrUlPiGqM2/t/EEaI030bpxMqY8gMlw==",
|
||||
"engines": {
|
||||
"node": ">= 10.16.0"
|
||||
}
|
||||
},
|
||||
"node_modules/json-parse-even-better-errors": {
|
||||
"version": "2.3.1",
|
||||
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
|
||||
@@ -2419,6 +2635,23 @@
|
||||
"graceful-fs": "^4.1.6"
|
||||
}
|
||||
},
|
||||
"node_modules/jsonpath-plus": {
|
||||
"version": "10.3.0",
|
||||
"resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.3.0.tgz",
|
||||
"integrity": "sha512-8TNmfeTCk2Le33A3vRRwtuworG/L5RrgMvdjhKZxvyShO+mBu2fP50OWUjRLNtvw344DdDarFh9buFAZs5ujeA==",
|
||||
"dependencies": {
|
||||
"@jsep-plugin/assignment": "^1.3.0",
|
||||
"@jsep-plugin/regex": "^1.0.4",
|
||||
"jsep": "^1.4.0"
|
||||
},
|
||||
"bin": {
|
||||
"jsonpath": "bin/jsonpath-cli.js",
|
||||
"jsonpath-plus": "bin/jsonpath-cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/jsonwebtoken": {
|
||||
"version": "9.0.2",
|
||||
"resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
|
||||
@@ -2493,6 +2726,11 @@
|
||||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
||||
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
|
||||
},
|
||||
"node_modules/lodash.clonedeep": {
|
||||
"version": "4.5.0",
|
||||
"resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
|
||||
"integrity": "sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ=="
|
||||
},
|
||||
"node_modules/lodash.defaults": {
|
||||
"version": "4.2.0",
|
||||
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
||||
@@ -2942,6 +3180,14 @@
|
||||
"url": "https://github.com/fb55/nth-check?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/oauth4webapi": {
|
||||
"version": "3.8.3",
|
||||
"resolved": "https://registry.npmjs.org/oauth4webapi/-/oauth4webapi-3.8.3.tgz",
|
||||
"integrity": "sha512-pQ5BsX3QRTgnt5HxgHwgunIRaDXBdkT23tf8dfzmtTIL2LTpdmxgbpbBm0VgFWAIDlezQvQCTgnVIUmHupXHxw==",
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/panva"
|
||||
}
|
||||
},
|
||||
"node_modules/object-assign": {
|
||||
"version": "4.1.1",
|
||||
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
||||
@@ -2980,6 +3226,18 @@
|
||||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"node_modules/openid-client": {
|
||||
"version": "6.8.1",
|
||||
"resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.1.tgz",
|
||||
"integrity": "sha512-VoYT6enBo6Vj2j3Q5Ec0AezS+9YGzQo1f5Xc42lreMGlfP4ljiXPKVDvCADh+XHCV/bqPu/wWSiCVXbJKvrODw==",
|
||||
"dependencies": {
|
||||
"jose": "^6.1.0",
|
||||
"oauth4webapi": "^3.8.2"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/panva"
|
||||
}
|
||||
},
|
||||
"node_modules/pac-proxy-agent": {
|
||||
"version": "7.2.0",
|
||||
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
|
||||
@@ -3883,6 +4141,11 @@
|
||||
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/rfc4648": {
|
||||
"version": "1.5.4",
|
||||
"resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.4.tgz",
|
||||
"integrity": "sha512-rRg/6Lb+IGfJqO05HZkN50UtY7K/JhxJag1kP23+zyMfrvoB0B7RWv06MbOzoc79RgCdNTiUaNsTT1AJZ7Z+cg=="
|
||||
},
|
||||
"node_modules/rimraf": {
|
||||
"version": "3.0.2",
|
||||
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
|
||||
@@ -4313,6 +4576,14 @@
|
||||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/stream-buffers": {
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/stream-buffers/-/stream-buffers-3.0.3.tgz",
|
||||
"integrity": "sha512-pqMqwQCso0PBJt2PQmDO0cFj0lyqmiwOMiMSkVtRokl7e+ZTRYgDHKnuZNbqjiJXgsg4nuqtD/zxuo9KqTp0Yw==",
|
||||
"engines": {
|
||||
"node": ">= 0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/streamx": {
|
||||
"version": "2.23.0",
|
||||
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
|
||||
@@ -4532,8 +4803,7 @@
|
||||
"node_modules/undici-types": {
|
||||
"version": "6.21.0",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
||||
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
|
||||
"devOptional": true
|
||||
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="
|
||||
},
|
||||
"node_modules/universalify": {
|
||||
"version": "2.0.1",
|
||||
@@ -4556,6 +4826,14 @@
|
||||
"resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
|
||||
"integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
|
||||
},
|
||||
"node_modules/user-agents": {
|
||||
"version": "1.1.669",
|
||||
"resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.1.669.tgz",
|
||||
"integrity": "sha512-pbIzG+AOqCaIpySKJ4IAm1l0VyE4jMnK4y1thV8lm8PYxI+7X5uWcppOK7zY79TCKKTAnJH3/4gaVIZHsjrmJA==",
|
||||
"dependencies": {
|
||||
"lodash.clonedeep": "^4.5.0"
|
||||
}
|
||||
},
|
||||
"node_modules/util": {
|
||||
"version": "0.12.5",
|
||||
"resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
|
||||
|
||||
46
backend/src/_deprecated/DONT_USE.md
Normal file
46
backend/src/_deprecated/DONT_USE.md
Normal file
@@ -0,0 +1,46 @@
|
||||
# DEPRECATED CODE - DO NOT USE
|
||||
|
||||
**These directories contain OLD, ABANDONED code.**
|
||||
|
||||
## What's Here
|
||||
|
||||
| Directory | What It Was | Why Deprecated |
|
||||
|-----------|-------------|----------------|
|
||||
| `hydration/` | Old pipeline for processing crawl data | Replaced by `src/tasks/handlers/` |
|
||||
| `scraper-v2/` | Old Puppeteer-based scraper engine | Replaced by curl-based `src/platforms/dutchie/client.ts` |
|
||||
| `canonical-hydration/` | Intermediate step toward canonical schema | Merged into task handlers |
|
||||
|
||||
## What to Use Instead
|
||||
|
||||
| Old (DONT USE) | New (USE THIS) |
|
||||
|----------------|----------------|
|
||||
| `hydration/normalizers/dutchie.ts` | `src/tasks/handlers/product-refresh.ts` |
|
||||
| `hydration/producer.ts` | `src/tasks/handlers/payload-fetch.ts` |
|
||||
| `scraper-v2/engine.ts` | `src/platforms/dutchie/client.ts` |
|
||||
| `scraper-v2/scheduler.ts` | `src/services/task-scheduler.ts` |
|
||||
|
||||
## Why Keep This Code?
|
||||
|
||||
- Historical reference only
|
||||
- Some patterns may be useful for debugging
|
||||
- Will be deleted once confirmed not needed
|
||||
|
||||
## Claude Instructions
|
||||
|
||||
**IF YOU ARE CLAUDE:**
|
||||
|
||||
1. NEVER import from `src/_deprecated/`
|
||||
2. NEVER reference these files as examples
|
||||
3. NEVER try to "fix" or "update" code in here
|
||||
4. If you see imports from these directories, suggest replacing them
|
||||
|
||||
**Correct imports:**
|
||||
```typescript
|
||||
// GOOD
|
||||
import { executeGraphQL } from '../platforms/dutchie/client';
|
||||
import { pool } from '../db/pool';
|
||||
|
||||
// BAD - DO NOT USE
|
||||
import { something } from '../_deprecated/hydration/...';
|
||||
import { something } from '../_deprecated/scraper-v2/...';
|
||||
```
|
||||
584
backend/src/_deprecated/system/routes/index.ts
Normal file
584
backend/src/_deprecated/system/routes/index.ts
Normal file
@@ -0,0 +1,584 @@
|
||||
/**
|
||||
* System API Routes
|
||||
*
|
||||
* Provides REST API endpoints for system monitoring and control:
|
||||
* - /api/system/sync/* - Sync orchestrator
|
||||
* - /api/system/dlq/* - Dead-letter queue
|
||||
* - /api/system/integrity/* - Integrity checks
|
||||
* - /api/system/fix/* - Auto-fix routines
|
||||
* - /api/system/alerts/* - System alerts
|
||||
* - /metrics - Prometheus metrics
|
||||
*
|
||||
* Phase 5: Full Production Sync + Monitoring
|
||||
*/
|
||||
|
||||
import { Router, Request, Response } from 'express';
|
||||
import { Pool } from 'pg';
|
||||
import {
|
||||
SyncOrchestrator,
|
||||
MetricsService,
|
||||
DLQService,
|
||||
AlertService,
|
||||
IntegrityService,
|
||||
AutoFixService,
|
||||
} from '../services';
|
||||
|
||||
export function createSystemRouter(pool: Pool): Router {
|
||||
const router = Router();
|
||||
|
||||
// Initialize services
|
||||
const metrics = new MetricsService(pool);
|
||||
const dlq = new DLQService(pool);
|
||||
const alerts = new AlertService(pool);
|
||||
const integrity = new IntegrityService(pool, alerts);
|
||||
const autoFix = new AutoFixService(pool, alerts);
|
||||
const orchestrator = new SyncOrchestrator(pool, metrics, dlq, alerts);
|
||||
|
||||
// ============================================================
|
||||
// SYNC ORCHESTRATOR ENDPOINTS
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* GET /api/system/sync/status
|
||||
* Get current sync status
|
||||
*/
|
||||
router.get('/sync/status', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const status = await orchestrator.getStatus();
|
||||
res.json(status);
|
||||
} catch (error) {
|
||||
console.error('[System] Sync status error:', error);
|
||||
res.status(500).json({ error: 'Failed to get sync status' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/sync/run
|
||||
* Trigger a sync run
|
||||
*/
|
||||
router.post('/sync/run', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const triggeredBy = req.body.triggeredBy || 'api';
|
||||
const result = await orchestrator.runSync();
|
||||
res.json({
|
||||
success: true,
|
||||
triggeredBy,
|
||||
metrics: result,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('[System] Sync run error:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Sync run failed',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/sync/queue-depth
|
||||
* Get queue depth information
|
||||
*/
|
||||
router.get('/sync/queue-depth', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const depth = await orchestrator.getQueueDepth();
|
||||
res.json(depth);
|
||||
} catch (error) {
|
||||
console.error('[System] Queue depth error:', error);
|
||||
res.status(500).json({ error: 'Failed to get queue depth' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/sync/health
|
||||
* Get sync health status
|
||||
*/
|
||||
router.get('/sync/health', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const health = await orchestrator.getHealth();
|
||||
res.status(health.healthy ? 200 : 503).json(health);
|
||||
} catch (error) {
|
||||
console.error('[System] Health check error:', error);
|
||||
res.status(500).json({ healthy: false, error: 'Health check failed' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/sync/pause
|
||||
* Pause the orchestrator
|
||||
*/
|
||||
router.post('/sync/pause', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const reason = req.body.reason || 'Manual pause';
|
||||
await orchestrator.pause(reason);
|
||||
res.json({ success: true, message: 'Orchestrator paused' });
|
||||
} catch (error) {
|
||||
console.error('[System] Pause error:', error);
|
||||
res.status(500).json({ error: 'Failed to pause orchestrator' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/sync/resume
|
||||
* Resume the orchestrator
|
||||
*/
|
||||
router.post('/sync/resume', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
await orchestrator.resume();
|
||||
res.json({ success: true, message: 'Orchestrator resumed' });
|
||||
} catch (error) {
|
||||
console.error('[System] Resume error:', error);
|
||||
res.status(500).json({ error: 'Failed to resume orchestrator' });
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// DLQ ENDPOINTS
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* GET /api/system/dlq
|
||||
* List DLQ payloads
|
||||
*/
|
||||
router.get('/dlq', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const options = {
|
||||
status: req.query.status as string,
|
||||
errorType: req.query.errorType as string,
|
||||
dispensaryId: req.query.dispensaryId ? parseInt(req.query.dispensaryId as string) : undefined,
|
||||
limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
|
||||
offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
|
||||
};
|
||||
|
||||
const result = await dlq.listPayloads(options);
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('[System] DLQ list error:', error);
|
||||
res.status(500).json({ error: 'Failed to list DLQ payloads' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/dlq/stats
|
||||
* Get DLQ statistics
|
||||
*/
|
||||
router.get('/dlq/stats', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const stats = await dlq.getStats();
|
||||
res.json(stats);
|
||||
} catch (error) {
|
||||
console.error('[System] DLQ stats error:', error);
|
||||
res.status(500).json({ error: 'Failed to get DLQ stats' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/dlq/summary
|
||||
* Get DLQ summary by error type
|
||||
*/
|
||||
router.get('/dlq/summary', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const summary = await dlq.getSummary();
|
||||
res.json(summary);
|
||||
} catch (error) {
|
||||
console.error('[System] DLQ summary error:', error);
|
||||
res.status(500).json({ error: 'Failed to get DLQ summary' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/dlq/:id
|
||||
* Get a specific DLQ payload
|
||||
*/
|
||||
router.get('/dlq/:id', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const payload = await dlq.getPayload(req.params.id);
|
||||
if (!payload) {
|
||||
return res.status(404).json({ error: 'Payload not found' });
|
||||
}
|
||||
res.json(payload);
|
||||
} catch (error) {
|
||||
console.error('[System] DLQ get error:', error);
|
||||
res.status(500).json({ error: 'Failed to get DLQ payload' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/dlq/:id/retry
|
||||
* Retry a DLQ payload
|
||||
*/
|
||||
router.post('/dlq/:id/retry', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const result = await dlq.retryPayload(req.params.id);
|
||||
if (result.success) {
|
||||
res.json(result);
|
||||
} else {
|
||||
res.status(400).json(result);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('[System] DLQ retry error:', error);
|
||||
res.status(500).json({ error: 'Failed to retry payload' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/dlq/:id/abandon
|
||||
* Abandon a DLQ payload
|
||||
*/
|
||||
router.post('/dlq/:id/abandon', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const reason = req.body.reason || 'Manually abandoned';
|
||||
const abandonedBy = req.body.abandonedBy || 'api';
|
||||
const success = await dlq.abandonPayload(req.params.id, reason, abandonedBy);
|
||||
res.json({ success });
|
||||
} catch (error) {
|
||||
console.error('[System] DLQ abandon error:', error);
|
||||
res.status(500).json({ error: 'Failed to abandon payload' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/dlq/bulk-retry
|
||||
* Bulk retry payloads by error type
|
||||
*/
|
||||
router.post('/dlq/bulk-retry', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { errorType } = req.body;
|
||||
if (!errorType) {
|
||||
return res.status(400).json({ error: 'errorType is required' });
|
||||
}
|
||||
const result = await dlq.bulkRetryByErrorType(errorType);
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('[System] DLQ bulk retry error:', error);
|
||||
res.status(500).json({ error: 'Failed to bulk retry' });
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// INTEGRITY CHECK ENDPOINTS
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* POST /api/system/integrity/run
|
||||
* Run all integrity checks
|
||||
*/
|
||||
router.post('/integrity/run', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const triggeredBy = req.body.triggeredBy || 'api';
|
||||
const result = await integrity.runAllChecks(triggeredBy);
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('[System] Integrity run error:', error);
|
||||
res.status(500).json({ error: 'Failed to run integrity checks' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/integrity/runs
|
||||
* Get recent integrity check runs
|
||||
*/
|
||||
router.get('/integrity/runs', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const limit = req.query.limit ? parseInt(req.query.limit as string) : 10;
|
||||
const runs = await integrity.getRecentRuns(limit);
|
||||
res.json(runs);
|
||||
} catch (error) {
|
||||
console.error('[System] Integrity runs error:', error);
|
||||
res.status(500).json({ error: 'Failed to get integrity runs' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/integrity/runs/:runId
|
||||
* Get results for a specific integrity run
|
||||
*/
|
||||
router.get('/integrity/runs/:runId', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const results = await integrity.getRunResults(req.params.runId);
|
||||
res.json(results);
|
||||
} catch (error) {
|
||||
console.error('[System] Integrity run results error:', error);
|
||||
res.status(500).json({ error: 'Failed to get run results' });
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// AUTO-FIX ENDPOINTS
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* GET /api/system/fix/routines
|
||||
* Get available fix routines
|
||||
*/
|
||||
router.get('/fix/routines', (_req: Request, res: Response) => {
|
||||
try {
|
||||
const routines = autoFix.getAvailableRoutines();
|
||||
res.json(routines);
|
||||
} catch (error) {
|
||||
console.error('[System] Get routines error:', error);
|
||||
res.status(500).json({ error: 'Failed to get routines' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/fix/:routine
|
||||
* Run a fix routine
|
||||
*/
|
||||
router.post('/fix/:routine', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const routineName = req.params.routine;
|
||||
const dryRun = req.body.dryRun === true;
|
||||
const triggeredBy = req.body.triggeredBy || 'api';
|
||||
|
||||
const result = await autoFix.runRoutine(routineName as any, triggeredBy, { dryRun });
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('[System] Fix routine error:', error);
|
||||
res.status(500).json({ error: 'Failed to run fix routine' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/fix/runs
|
||||
* Get recent fix runs
|
||||
*/
|
||||
router.get('/fix/runs', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const limit = req.query.limit ? parseInt(req.query.limit as string) : 20;
|
||||
const runs = await autoFix.getRecentRuns(limit);
|
||||
res.json(runs);
|
||||
} catch (error) {
|
||||
console.error('[System] Fix runs error:', error);
|
||||
res.status(500).json({ error: 'Failed to get fix runs' });
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// ALERTS ENDPOINTS
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* GET /api/system/alerts
|
||||
* List alerts
|
||||
*/
|
||||
router.get('/alerts', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const options = {
|
||||
status: req.query.status as any,
|
||||
severity: req.query.severity as any,
|
||||
type: req.query.type as string,
|
||||
limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
|
||||
offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
|
||||
};
|
||||
|
||||
const result = await alerts.listAlerts(options);
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('[System] Alerts list error:', error);
|
||||
res.status(500).json({ error: 'Failed to list alerts' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/alerts/active
|
||||
* Get active alerts
|
||||
*/
|
||||
router.get('/alerts/active', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const activeAlerts = await alerts.getActiveAlerts();
|
||||
res.json(activeAlerts);
|
||||
} catch (error) {
|
||||
console.error('[System] Active alerts error:', error);
|
||||
res.status(500).json({ error: 'Failed to get active alerts' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/alerts/summary
|
||||
* Get alert summary
|
||||
*/
|
||||
router.get('/alerts/summary', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const summary = await alerts.getSummary();
|
||||
res.json(summary);
|
||||
} catch (error) {
|
||||
console.error('[System] Alerts summary error:', error);
|
||||
res.status(500).json({ error: 'Failed to get alerts summary' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/alerts/:id/acknowledge
|
||||
* Acknowledge an alert
|
||||
*/
|
||||
router.post('/alerts/:id/acknowledge', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const alertId = parseInt(req.params.id);
|
||||
const acknowledgedBy = req.body.acknowledgedBy || 'api';
|
||||
const success = await alerts.acknowledgeAlert(alertId, acknowledgedBy);
|
||||
res.json({ success });
|
||||
} catch (error) {
|
||||
console.error('[System] Acknowledge alert error:', error);
|
||||
res.status(500).json({ error: 'Failed to acknowledge alert' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/alerts/:id/resolve
|
||||
* Resolve an alert
|
||||
*/
|
||||
router.post('/alerts/:id/resolve', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const alertId = parseInt(req.params.id);
|
||||
const resolvedBy = req.body.resolvedBy || 'api';
|
||||
const success = await alerts.resolveAlert(alertId, resolvedBy);
|
||||
res.json({ success });
|
||||
} catch (error) {
|
||||
console.error('[System] Resolve alert error:', error);
|
||||
res.status(500).json({ error: 'Failed to resolve alert' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/alerts/bulk-acknowledge
|
||||
* Bulk acknowledge alerts
|
||||
*/
|
||||
router.post('/alerts/bulk-acknowledge', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { ids, acknowledgedBy } = req.body;
|
||||
if (!ids || !Array.isArray(ids)) {
|
||||
return res.status(400).json({ error: 'ids array is required' });
|
||||
}
|
||||
const count = await alerts.bulkAcknowledge(ids, acknowledgedBy || 'api');
|
||||
res.json({ acknowledged: count });
|
||||
} catch (error) {
|
||||
console.error('[System] Bulk acknowledge error:', error);
|
||||
res.status(500).json({ error: 'Failed to bulk acknowledge' });
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// METRICS ENDPOINTS
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* GET /api/system/metrics
|
||||
* Get all current metrics
|
||||
*/
|
||||
router.get('/metrics', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const allMetrics = await metrics.getAllMetrics();
|
||||
res.json(allMetrics);
|
||||
} catch (error) {
|
||||
console.error('[System] Metrics error:', error);
|
||||
res.status(500).json({ error: 'Failed to get metrics' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/metrics/:name
|
||||
* Get a specific metric
|
||||
*/
|
||||
router.get('/metrics/:name', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const metric = await metrics.getMetric(req.params.name);
|
||||
if (!metric) {
|
||||
return res.status(404).json({ error: 'Metric not found' });
|
||||
}
|
||||
res.json(metric);
|
||||
} catch (error) {
|
||||
console.error('[System] Metric error:', error);
|
||||
res.status(500).json({ error: 'Failed to get metric' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/metrics/:name/history
|
||||
* Get metric time series
|
||||
*/
|
||||
router.get('/metrics/:name/history', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const hours = req.query.hours ? parseInt(req.query.hours as string) : 24;
|
||||
const history = await metrics.getMetricHistory(req.params.name, hours);
|
||||
res.json(history);
|
||||
} catch (error) {
|
||||
console.error('[System] Metric history error:', error);
|
||||
res.status(500).json({ error: 'Failed to get metric history' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/errors
|
||||
* Get error summary
|
||||
*/
|
||||
router.get('/errors', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const summary = await metrics.getErrorSummary();
|
||||
res.json(summary);
|
||||
} catch (error) {
|
||||
console.error('[System] Error summary error:', error);
|
||||
res.status(500).json({ error: 'Failed to get error summary' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/errors/recent
|
||||
* Get recent errors
|
||||
*/
|
||||
router.get('/errors/recent', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const limit = req.query.limit ? parseInt(req.query.limit as string) : 50;
|
||||
const errorType = req.query.type as string;
|
||||
const errors = await metrics.getRecentErrors(limit, errorType);
|
||||
res.json(errors);
|
||||
} catch (error) {
|
||||
console.error('[System] Recent errors error:', error);
|
||||
res.status(500).json({ error: 'Failed to get recent errors' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/errors/acknowledge
|
||||
* Acknowledge errors
|
||||
*/
|
||||
router.post('/errors/acknowledge', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { ids, acknowledgedBy } = req.body;
|
||||
if (!ids || !Array.isArray(ids)) {
|
||||
return res.status(400).json({ error: 'ids array is required' });
|
||||
}
|
||||
const count = await metrics.acknowledgeErrors(ids, acknowledgedBy || 'api');
|
||||
res.json({ acknowledged: count });
|
||||
} catch (error) {
|
||||
console.error('[System] Acknowledge errors error:', error);
|
||||
res.status(500).json({ error: 'Failed to acknowledge errors' });
|
||||
}
|
||||
});
|
||||
|
||||
return router;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create Prometheus metrics endpoint (standalone)
|
||||
*/
|
||||
export function createPrometheusRouter(pool: Pool): Router {
|
||||
const router = Router();
|
||||
const metrics = new MetricsService(pool);
|
||||
|
||||
/**
|
||||
* GET /metrics
|
||||
* Prometheus-compatible metrics endpoint
|
||||
*/
|
||||
router.get('/', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const prometheusOutput = await metrics.getPrometheusMetrics();
|
||||
res.set('Content-Type', 'text/plain; version=0.0.4');
|
||||
res.send(prometheusOutput);
|
||||
} catch (error) {
|
||||
console.error('[Prometheus] Metrics error:', error);
|
||||
res.status(500).send('# Error generating metrics');
|
||||
}
|
||||
});
|
||||
|
||||
return router;
|
||||
}
|
||||
@@ -109,7 +109,7 @@ import scraperMonitorRoutes from './routes/scraper-monitor';
|
||||
import apiTokensRoutes from './routes/api-tokens';
|
||||
import apiPermissionsRoutes from './routes/api-permissions';
|
||||
import parallelScrapeRoutes from './routes/parallel-scrape';
|
||||
import crawlerSandboxRoutes from './routes/crawler-sandbox';
|
||||
// crawler-sandbox moved to _deprecated
|
||||
import versionRoutes from './routes/version';
|
||||
import deployStatusRoutes from './routes/deploy-status';
|
||||
import publicApiRoutes from './routes/public-api';
|
||||
@@ -146,6 +146,7 @@ import tasksRoutes from './routes/tasks';
|
||||
import workerRegistryRoutes from './routes/worker-registry';
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Raw payload access API
|
||||
import payloadsRoutes from './routes/payloads';
|
||||
import k8sRoutes from './routes/k8s';
|
||||
|
||||
// Mark requests from trusted domains (cannaiq.co, findagram.co, findadispo.com)
|
||||
// These domains can access the API without authentication
|
||||
@@ -186,7 +187,7 @@ app.use('/api/scraper-monitor', scraperMonitorRoutes);
|
||||
app.use('/api/api-tokens', apiTokensRoutes);
|
||||
app.use('/api/api-permissions', apiPermissionsRoutes);
|
||||
app.use('/api/parallel-scrape', parallelScrapeRoutes);
|
||||
app.use('/api/crawler-sandbox', crawlerSandboxRoutes);
|
||||
// crawler-sandbox moved to _deprecated
|
||||
app.use('/api/version', versionRoutes);
|
||||
app.use('/api/admin/deploy-status', deployStatusRoutes);
|
||||
console.log('[DeployStatus] Routes registered at /api/admin/deploy-status');
|
||||
@@ -230,6 +231,10 @@ console.log('[WorkerRegistry] Routes registered at /api/worker-registry');
|
||||
app.use('/api/payloads', payloadsRoutes);
|
||||
console.log('[Payloads] Routes registered at /api/payloads');
|
||||
|
||||
// K8s control routes - worker scaling from admin UI
|
||||
app.use('/api/k8s', k8sRoutes);
|
||||
console.log('[K8s] Routes registered at /api/k8s');
|
||||
|
||||
// Phase 3: Analytics V2 - Enhanced analytics with rec/med state segmentation
|
||||
try {
|
||||
const analyticsV2Router = createAnalyticsV2Router(getPool());
|
||||
|
||||
@@ -702,12 +702,10 @@ export class StateQueryService {
|
||||
async getNationalSummary(): Promise<NationalSummary> {
|
||||
const stateMetrics = await this.getAllStateMetrics();
|
||||
|
||||
// Get all states count and aggregate metrics
|
||||
const result = await this.pool.query(`
|
||||
SELECT
|
||||
COUNT(DISTINCT s.code) AS total_states,
|
||||
COUNT(DISTINCT CASE WHEN EXISTS (
|
||||
SELECT 1 FROM dispensaries d WHERE d.state = s.code AND d.menu_type IS NOT NULL
|
||||
) THEN s.code END) AS active_states,
|
||||
(SELECT COUNT(*) FROM dispensaries WHERE state IS NOT NULL) AS total_stores,
|
||||
(SELECT COUNT(*) FROM store_products sp
|
||||
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||
@@ -725,7 +723,7 @@ export class StateQueryService {
|
||||
|
||||
return {
|
||||
totalStates: parseInt(data.total_states),
|
||||
activeStates: parseInt(data.active_states),
|
||||
activeStates: parseInt(data.total_states), // Same as totalStates - all states shown
|
||||
totalStores: parseInt(data.total_stores),
|
||||
totalProducts: parseInt(data.total_products),
|
||||
totalBrands: parseInt(data.total_brands),
|
||||
|
||||
@@ -47,4 +47,27 @@ router.post('/refresh', authMiddleware, async (req: AuthRequest, res) => {
|
||||
res.json({ token });
|
||||
});
|
||||
|
||||
// Verify password for sensitive actions (requires current user to be authenticated)
|
||||
router.post('/verify-password', authMiddleware, async (req: AuthRequest, res) => {
|
||||
try {
|
||||
const { password } = req.body;
|
||||
|
||||
if (!password) {
|
||||
return res.status(400).json({ error: 'Password required' });
|
||||
}
|
||||
|
||||
// Re-authenticate the current user with the provided password
|
||||
const user = await authenticateUser(req.user!.email, password);
|
||||
|
||||
if (!user) {
|
||||
return res.status(401).json({ error: 'Invalid password', verified: false });
|
||||
}
|
||||
|
||||
res.json({ verified: true });
|
||||
} catch (error) {
|
||||
console.error('Password verification error:', error);
|
||||
res.status(500).json({ error: 'Internal server error' });
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
|
||||
@@ -14,13 +14,25 @@ router.use(authMiddleware);
|
||||
/**
|
||||
* GET /api/admin/intelligence/brands
|
||||
* List all brands with state presence, store counts, and pricing
|
||||
* Query params:
|
||||
* - state: Filter by state (e.g., "AZ")
|
||||
* - limit: Max results (default 500)
|
||||
* - offset: Pagination offset
|
||||
*/
|
||||
router.get('/brands', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { limit = '500', offset = '0' } = req.query;
|
||||
const { limit = '500', offset = '0', state } = req.query;
|
||||
const limitNum = Math.min(parseInt(limit as string, 10), 1000);
|
||||
const offsetNum = parseInt(offset as string, 10);
|
||||
|
||||
// Build WHERE clause based on state filter
|
||||
let stateFilter = '';
|
||||
const params: any[] = [limitNum, offsetNum];
|
||||
if (state && state !== 'all') {
|
||||
stateFilter = 'AND d.state = $3';
|
||||
params.push(state);
|
||||
}
|
||||
|
||||
const { rows } = await pool.query(`
|
||||
SELECT
|
||||
sp.brand_name_raw as brand_name,
|
||||
@@ -32,17 +44,26 @@ router.get('/brands', async (req: Request, res: Response) => {
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||
WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != ''
|
||||
${stateFilter}
|
||||
GROUP BY sp.brand_name_raw
|
||||
ORDER BY store_count DESC, sku_count DESC
|
||||
LIMIT $1 OFFSET $2
|
||||
`, [limitNum, offsetNum]);
|
||||
`, params);
|
||||
|
||||
// Get total count
|
||||
// Get total count with same state filter
|
||||
const countParams: any[] = [];
|
||||
let countStateFilter = '';
|
||||
if (state && state !== 'all') {
|
||||
countStateFilter = 'AND d.state = $1';
|
||||
countParams.push(state);
|
||||
}
|
||||
const { rows: countRows } = await pool.query(`
|
||||
SELECT COUNT(DISTINCT brand_name_raw) as total
|
||||
FROM store_products
|
||||
WHERE brand_name_raw IS NOT NULL AND brand_name_raw != ''
|
||||
`);
|
||||
SELECT COUNT(DISTINCT sp.brand_name_raw) as total
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||
WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != ''
|
||||
${countStateFilter}
|
||||
`, countParams);
|
||||
|
||||
res.json({
|
||||
brands: rows.map((r: any) => ({
|
||||
@@ -147,23 +168,58 @@ router.get('/brands/:brandName/penetration', async (req: Request, res: Response)
|
||||
/**
|
||||
* GET /api/admin/intelligence/pricing
|
||||
* Get pricing analytics by category
|
||||
* Query params:
|
||||
* - state: Filter by state (e.g., "AZ")
|
||||
*/
|
||||
router.get('/pricing', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { rows: categoryRows } = await pool.query(`
|
||||
SELECT
|
||||
sp.category_raw as category,
|
||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||
MIN(sp.price_rec) as min_price,
|
||||
MAX(sp.price_rec) as max_price,
|
||||
ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2) as median_price,
|
||||
COUNT(*) as product_count
|
||||
FROM store_products sp
|
||||
WHERE sp.category_raw IS NOT NULL AND sp.price_rec > 0
|
||||
GROUP BY sp.category_raw
|
||||
ORDER BY product_count DESC
|
||||
`);
|
||||
const { state } = req.query;
|
||||
|
||||
// Build WHERE clause based on state filter
|
||||
let stateFilter = '';
|
||||
const categoryParams: any[] = [];
|
||||
const stateQueryParams: any[] = [];
|
||||
const overallParams: any[] = [];
|
||||
|
||||
if (state && state !== 'all') {
|
||||
stateFilter = 'AND d.state = $1';
|
||||
categoryParams.push(state);
|
||||
overallParams.push(state);
|
||||
}
|
||||
|
||||
// Category pricing with optional state filter
|
||||
const categoryQuery = state && state !== 'all'
|
||||
? `
|
||||
SELECT
|
||||
sp.category_raw as category,
|
||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||
MIN(sp.price_rec) as min_price,
|
||||
MAX(sp.price_rec) as max_price,
|
||||
ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2) as median_price,
|
||||
COUNT(*) as product_count
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||
WHERE sp.category_raw IS NOT NULL AND sp.price_rec > 0 ${stateFilter}
|
||||
GROUP BY sp.category_raw
|
||||
ORDER BY product_count DESC
|
||||
`
|
||||
: `
|
||||
SELECT
|
||||
sp.category_raw as category,
|
||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||
MIN(sp.price_rec) as min_price,
|
||||
MAX(sp.price_rec) as max_price,
|
||||
ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2) as median_price,
|
||||
COUNT(*) as product_count
|
||||
FROM store_products sp
|
||||
WHERE sp.category_raw IS NOT NULL AND sp.price_rec > 0
|
||||
GROUP BY sp.category_raw
|
||||
ORDER BY product_count DESC
|
||||
`;
|
||||
|
||||
const { rows: categoryRows } = await pool.query(categoryQuery, categoryParams);
|
||||
|
||||
// State pricing
|
||||
const { rows: stateRows } = await pool.query(`
|
||||
SELECT
|
||||
d.state,
|
||||
@@ -178,6 +234,31 @@ router.get('/pricing', async (req: Request, res: Response) => {
|
||||
ORDER BY avg_price DESC
|
||||
`);
|
||||
|
||||
// Overall stats with optional state filter
|
||||
const overallQuery = state && state !== 'all'
|
||||
? `
|
||||
SELECT
|
||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||
MIN(sp.price_rec) as min_price,
|
||||
MAX(sp.price_rec) as max_price,
|
||||
COUNT(*) as total_products
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||
WHERE sp.price_rec > 0 ${stateFilter}
|
||||
`
|
||||
: `
|
||||
SELECT
|
||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||
MIN(sp.price_rec) as min_price,
|
||||
MAX(sp.price_rec) as max_price,
|
||||
COUNT(*) as total_products
|
||||
FROM store_products sp
|
||||
WHERE sp.price_rec > 0
|
||||
`;
|
||||
|
||||
const { rows: overallRows } = await pool.query(overallQuery, overallParams);
|
||||
const overall = overallRows[0];
|
||||
|
||||
res.json({
|
||||
byCategory: categoryRows.map((r: any) => ({
|
||||
category: r.category,
|
||||
@@ -194,6 +275,12 @@ router.get('/pricing', async (req: Request, res: Response) => {
|
||||
maxPrice: r.max_price ? parseFloat(r.max_price) : null,
|
||||
productCount: parseInt(r.product_count, 10),
|
||||
})),
|
||||
overall: {
|
||||
avgPrice: overall?.avg_price ? parseFloat(overall.avg_price) : null,
|
||||
minPrice: overall?.min_price ? parseFloat(overall.min_price) : null,
|
||||
maxPrice: overall?.max_price ? parseFloat(overall.max_price) : null,
|
||||
totalProducts: parseInt(overall?.total_products || '0', 10),
|
||||
},
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('[Intelligence] Error fetching pricing:', error.message);
|
||||
@@ -204,9 +291,23 @@ router.get('/pricing', async (req: Request, res: Response) => {
|
||||
/**
|
||||
* GET /api/admin/intelligence/stores
|
||||
* Get store intelligence summary
|
||||
* Query params:
|
||||
* - state: Filter by state (e.g., "AZ")
|
||||
* - limit: Max results (default 200)
|
||||
*/
|
||||
router.get('/stores', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { state, limit = '200' } = req.query;
|
||||
const limitNum = Math.min(parseInt(limit as string, 10), 500);
|
||||
|
||||
// Build WHERE clause based on state filter
|
||||
let stateFilter = '';
|
||||
const params: any[] = [limitNum];
|
||||
if (state && state !== 'all') {
|
||||
stateFilter = 'AND d.state = $2';
|
||||
params.push(state);
|
||||
}
|
||||
|
||||
const { rows: storeRows } = await pool.query(`
|
||||
SELECT
|
||||
d.id,
|
||||
@@ -216,17 +317,22 @@ router.get('/stores', async (req: Request, res: Response) => {
|
||||
d.state,
|
||||
d.menu_type,
|
||||
d.crawl_enabled,
|
||||
COUNT(DISTINCT sp.id) as product_count,
|
||||
c.name as chain_name,
|
||||
COUNT(DISTINCT sp.id) as sku_count,
|
||||
COUNT(DISTINCT sp.brand_name_raw) as brand_count,
|
||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||
MAX(sp.updated_at) as last_product_update
|
||||
MAX(sp.updated_at) as last_crawl,
|
||||
(SELECT COUNT(*) FROM store_product_snapshots sps
|
||||
WHERE sps.store_product_id IN (SELECT id FROM store_products WHERE dispensary_id = d.id)) as snapshot_count
|
||||
FROM dispensaries d
|
||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
|
||||
WHERE d.state IS NOT NULL
|
||||
GROUP BY d.id, d.name, d.dba_name, d.city, d.state, d.menu_type, d.crawl_enabled
|
||||
ORDER BY product_count DESC
|
||||
LIMIT 200
|
||||
`);
|
||||
LEFT JOIN chains c ON d.chain_id = c.id
|
||||
WHERE d.state IS NOT NULL AND d.crawl_enabled = true
|
||||
${stateFilter}
|
||||
GROUP BY d.id, d.name, d.dba_name, d.city, d.state, d.menu_type, d.crawl_enabled, c.name
|
||||
ORDER BY sku_count DESC
|
||||
LIMIT $1
|
||||
`, params);
|
||||
|
||||
res.json({
|
||||
stores: storeRows.map((r: any) => ({
|
||||
@@ -237,10 +343,13 @@ router.get('/stores', async (req: Request, res: Response) => {
|
||||
state: r.state,
|
||||
menuType: r.menu_type,
|
||||
crawlEnabled: r.crawl_enabled,
|
||||
productCount: parseInt(r.product_count || '0', 10),
|
||||
chainName: r.chain_name || null,
|
||||
skuCount: parseInt(r.sku_count || '0', 10),
|
||||
snapshotCount: parseInt(r.snapshot_count || '0', 10),
|
||||
brandCount: parseInt(r.brand_count || '0', 10),
|
||||
avgPrice: r.avg_price ? parseFloat(r.avg_price) : null,
|
||||
lastProductUpdate: r.last_product_update,
|
||||
lastCrawl: r.last_crawl,
|
||||
crawlFrequencyHours: 4, // Default crawl frequency
|
||||
})),
|
||||
total: storeRows.length,
|
||||
});
|
||||
|
||||
140
backend/src/routes/k8s.ts
Normal file
140
backend/src/routes/k8s.ts
Normal file
@@ -0,0 +1,140 @@
|
||||
/**
|
||||
* Kubernetes Control Routes
|
||||
*
|
||||
* Provides admin UI control over k8s resources like worker scaling.
|
||||
* Uses in-cluster config when running in k8s, or kubeconfig locally.
|
||||
*/
|
||||
|
||||
import { Router, Request, Response } from 'express';
|
||||
import * as k8s from '@kubernetes/client-node';
|
||||
|
||||
const router = Router();
|
||||
|
||||
// K8s client setup - lazy initialization
|
||||
let appsApi: k8s.AppsV1Api | null = null;
|
||||
let k8sError: string | null = null;
|
||||
|
||||
function getK8sClient(): k8s.AppsV1Api | null {
|
||||
if (appsApi) return appsApi;
|
||||
if (k8sError) return null;
|
||||
|
||||
try {
|
||||
const kc = new k8s.KubeConfig();
|
||||
|
||||
// Try in-cluster config first (when running in k8s)
|
||||
try {
|
||||
kc.loadFromCluster();
|
||||
console.log('[K8s] Loaded in-cluster config');
|
||||
} catch {
|
||||
// Fall back to default kubeconfig (local dev)
|
||||
try {
|
||||
kc.loadFromDefault();
|
||||
console.log('[K8s] Loaded default kubeconfig');
|
||||
} catch (e) {
|
||||
k8sError = 'No k8s config available';
|
||||
console.log('[K8s] No config available - k8s routes disabled');
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
appsApi = kc.makeApiClient(k8s.AppsV1Api);
|
||||
return appsApi;
|
||||
} catch (e: any) {
|
||||
k8sError = e.message;
|
||||
console.error('[K8s] Failed to initialize client:', e.message);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
const NAMESPACE = process.env.K8S_NAMESPACE || 'dispensary-scraper';
|
||||
const WORKER_DEPLOYMENT = 'scraper-worker';
|
||||
|
||||
/**
|
||||
* GET /api/k8s/workers
|
||||
* Get current worker deployment status
|
||||
*/
|
||||
router.get('/workers', async (_req: Request, res: Response) => {
|
||||
const client = getK8sClient();
|
||||
|
||||
if (!client) {
|
||||
return res.json({
|
||||
success: true,
|
||||
available: false,
|
||||
error: k8sError || 'K8s not available',
|
||||
replicas: 0,
|
||||
readyReplicas: 0,
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
const deployment = await client.readNamespacedDeployment({
|
||||
name: WORKER_DEPLOYMENT,
|
||||
namespace: NAMESPACE,
|
||||
});
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
available: true,
|
||||
replicas: deployment.spec?.replicas || 0,
|
||||
readyReplicas: deployment.status?.readyReplicas || 0,
|
||||
availableReplicas: deployment.status?.availableReplicas || 0,
|
||||
updatedReplicas: deployment.status?.updatedReplicas || 0,
|
||||
});
|
||||
} catch (e: any) {
|
||||
console.error('[K8s] Error getting deployment:', e.message);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: e.message,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/k8s/workers/scale
|
||||
* Scale worker deployment
|
||||
* Body: { replicas: number }
|
||||
*/
|
||||
router.post('/workers/scale', async (req: Request, res: Response) => {
|
||||
const client = getK8sClient();
|
||||
|
||||
if (!client) {
|
||||
return res.status(503).json({
|
||||
success: false,
|
||||
error: k8sError || 'K8s not available',
|
||||
});
|
||||
}
|
||||
|
||||
const { replicas } = req.body;
|
||||
|
||||
if (typeof replicas !== 'number' || replicas < 0 || replicas > 50) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: 'replicas must be a number between 0 and 50',
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
// Patch the deployment to set replicas
|
||||
await client.patchNamespacedDeploymentScale({
|
||||
name: WORKER_DEPLOYMENT,
|
||||
namespace: NAMESPACE,
|
||||
body: { spec: { replicas } },
|
||||
});
|
||||
|
||||
console.log(`[K8s] Scaled ${WORKER_DEPLOYMENT} to ${replicas} replicas`);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
replicas,
|
||||
message: `Scaled to ${replicas} workers`,
|
||||
});
|
||||
} catch (e: any) {
|
||||
console.error('[K8s] Error scaling deployment:', e.message);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: e.message,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
@@ -291,6 +291,107 @@ router.get('/stores/:id/summary', async (req: Request, res: Response) => {
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/markets/stores/:id/crawl-history
|
||||
* Get crawl history for a specific store
|
||||
*/
|
||||
router.get('/stores/:id/crawl-history', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
const { limit = '50' } = req.query;
|
||||
const dispensaryId = parseInt(id, 10);
|
||||
const limitNum = Math.min(parseInt(limit as string, 10), 100);
|
||||
|
||||
// Get crawl history from crawl_orchestration_traces
|
||||
const { rows: historyRows } = await pool.query(`
|
||||
SELECT
|
||||
id,
|
||||
run_id,
|
||||
profile_key,
|
||||
crawler_module,
|
||||
state_at_start,
|
||||
state_at_end,
|
||||
total_steps,
|
||||
duration_ms,
|
||||
success,
|
||||
error_message,
|
||||
products_found,
|
||||
started_at,
|
||||
completed_at
|
||||
FROM crawl_orchestration_traces
|
||||
WHERE dispensary_id = $1
|
||||
ORDER BY started_at DESC
|
||||
LIMIT $2
|
||||
`, [dispensaryId, limitNum]);
|
||||
|
||||
// Get next scheduled crawl if available
|
||||
const { rows: scheduleRows } = await pool.query(`
|
||||
SELECT
|
||||
js.id as schedule_id,
|
||||
js.job_name,
|
||||
js.enabled,
|
||||
js.base_interval_minutes,
|
||||
js.jitter_minutes,
|
||||
js.next_run_at,
|
||||
js.last_run_at,
|
||||
js.last_status
|
||||
FROM job_schedules js
|
||||
WHERE js.enabled = true
|
||||
AND js.job_config->>'dispensaryId' = $1::text
|
||||
ORDER BY js.next_run_at
|
||||
LIMIT 1
|
||||
`, [dispensaryId.toString()]);
|
||||
|
||||
// Get dispensary info for slug
|
||||
const { rows: dispRows } = await pool.query(`
|
||||
SELECT
|
||||
id,
|
||||
name,
|
||||
dba_name,
|
||||
slug,
|
||||
state,
|
||||
city,
|
||||
menu_type,
|
||||
platform_dispensary_id,
|
||||
last_menu_scrape
|
||||
FROM dispensaries
|
||||
WHERE id = $1
|
||||
`, [dispensaryId]);
|
||||
|
||||
res.json({
|
||||
dispensary: dispRows[0] || null,
|
||||
history: historyRows.map(row => ({
|
||||
id: row.id,
|
||||
runId: row.run_id,
|
||||
profileKey: row.profile_key,
|
||||
crawlerModule: row.crawler_module,
|
||||
stateAtStart: row.state_at_start,
|
||||
stateAtEnd: row.state_at_end,
|
||||
totalSteps: row.total_steps,
|
||||
durationMs: row.duration_ms,
|
||||
success: row.success,
|
||||
errorMessage: row.error_message,
|
||||
productsFound: row.products_found,
|
||||
startedAt: row.started_at?.toISOString() || null,
|
||||
completedAt: row.completed_at?.toISOString() || null,
|
||||
})),
|
||||
nextSchedule: scheduleRows[0] ? {
|
||||
scheduleId: scheduleRows[0].schedule_id,
|
||||
jobName: scheduleRows[0].job_name,
|
||||
enabled: scheduleRows[0].enabled,
|
||||
baseIntervalMinutes: scheduleRows[0].base_interval_minutes,
|
||||
jitterMinutes: scheduleRows[0].jitter_minutes,
|
||||
nextRunAt: scheduleRows[0].next_run_at?.toISOString() || null,
|
||||
lastRunAt: scheduleRows[0].last_run_at?.toISOString() || null,
|
||||
lastStatus: scheduleRows[0].last_status,
|
||||
} : null,
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('[Markets] Error fetching crawl history:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/markets/stores/:id/products
|
||||
* Get products for a store with filtering and pagination
|
||||
|
||||
@@ -78,14 +78,14 @@ router.get('/metrics', async (_req: Request, res: Response) => {
|
||||
|
||||
/**
|
||||
* GET /api/admin/orchestrator/states
|
||||
* Returns array of states with at least one known dispensary
|
||||
* Returns array of states with at least one crawl-enabled dispensary
|
||||
*/
|
||||
router.get('/states', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const { rows } = await pool.query(`
|
||||
SELECT DISTINCT state, COUNT(*) as store_count
|
||||
FROM dispensaries
|
||||
WHERE state IS NOT NULL
|
||||
WHERE state IS NOT NULL AND crawl_enabled = true
|
||||
GROUP BY state
|
||||
ORDER BY state
|
||||
`);
|
||||
|
||||
@@ -278,7 +278,7 @@ router.post('/update-locations', requireRole('superadmin', 'admin'), async (req,
|
||||
|
||||
// Run in background
|
||||
updateAllProxyLocations().catch(err => {
|
||||
console.error('❌ Location update failed:', err);
|
||||
console.error('Location update failed:', err);
|
||||
});
|
||||
|
||||
res.json({ message: 'Location update job started' });
|
||||
|
||||
@@ -13,6 +13,12 @@ import {
|
||||
TaskFilter,
|
||||
} from '../tasks/task-service';
|
||||
import { pool } from '../db/pool';
|
||||
import {
|
||||
isTaskPoolPaused,
|
||||
pauseTaskPool,
|
||||
resumeTaskPool,
|
||||
getTaskPoolStatus,
|
||||
} from '../tasks/task-pool-state';
|
||||
|
||||
const router = Router();
|
||||
|
||||
@@ -592,4 +598,42 @@ router.post('/migration/full-migrate', async (req: Request, res: Response) => {
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/tasks/pool/status
|
||||
* Check if task pool is paused
|
||||
*/
|
||||
router.get('/pool/status', async (_req: Request, res: Response) => {
|
||||
const status = getTaskPoolStatus();
|
||||
res.json({
|
||||
success: true,
|
||||
...status,
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/tasks/pool/pause
|
||||
* Pause the task pool - workers won't pick up new tasks
|
||||
*/
|
||||
router.post('/pool/pause', async (_req: Request, res: Response) => {
|
||||
pauseTaskPool();
|
||||
res.json({
|
||||
success: true,
|
||||
paused: true,
|
||||
message: 'Task pool paused - workers will not pick up new tasks',
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/tasks/pool/resume
|
||||
* Resume the task pool - workers will pick up tasks again
|
||||
*/
|
||||
router.post('/pool/resume', async (_req: Request, res: Response) => {
|
||||
resumeTaskPool();
|
||||
res.json({
|
||||
success: true,
|
||||
paused: false,
|
||||
message: 'Task pool resumed - workers will pick up new tasks',
|
||||
});
|
||||
});
|
||||
|
||||
export default router;
|
||||
|
||||
@@ -70,21 +70,20 @@ router.post('/register', async (req: Request, res: Response) => {
|
||||
);
|
||||
|
||||
if (existing.rows.length > 0) {
|
||||
// Re-activate existing worker
|
||||
// Re-activate existing worker - keep existing pod_name (fantasy name), don't overwrite with K8s name
|
||||
const { rows } = await pool.query(`
|
||||
UPDATE worker_registry
|
||||
SET status = 'active',
|
||||
role = $1,
|
||||
pod_name = $2,
|
||||
hostname = $3,
|
||||
ip_address = $4,
|
||||
hostname = $2,
|
||||
ip_address = $3,
|
||||
last_heartbeat_at = NOW(),
|
||||
started_at = NOW(),
|
||||
metadata = $5,
|
||||
metadata = $4,
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = $6
|
||||
RETURNING id, worker_id, friendly_name, role
|
||||
`, [role, pod_name, finalHostname, clientIp, metadata, finalWorkerId]);
|
||||
WHERE worker_id = $5
|
||||
RETURNING id, worker_id, friendly_name, pod_name, role
|
||||
`, [role, finalHostname, clientIp, metadata, finalWorkerId]);
|
||||
|
||||
const worker = rows[0];
|
||||
const roleMsg = role ? `for ${role}` : 'as role-agnostic';
|
||||
@@ -105,13 +104,13 @@ router.post('/register', async (req: Request, res: Response) => {
|
||||
const nameResult = await pool.query('SELECT assign_worker_name($1) as name', [finalWorkerId]);
|
||||
const friendlyName = nameResult.rows[0].name;
|
||||
|
||||
// Register the worker
|
||||
// Register the worker - use friendlyName as pod_name (not K8s name)
|
||||
const { rows } = await pool.query(`
|
||||
INSERT INTO worker_registry (
|
||||
worker_id, friendly_name, role, pod_name, hostname, ip_address, status, metadata
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, 'active', $7)
|
||||
RETURNING id, worker_id, friendly_name, role
|
||||
`, [finalWorkerId, friendlyName, role, pod_name, finalHostname, clientIp, metadata]);
|
||||
RETURNING id, worker_id, friendly_name, pod_name, role
|
||||
`, [finalWorkerId, friendlyName, role, friendlyName, finalHostname, clientIp, metadata]);
|
||||
|
||||
const worker = rows[0];
|
||||
const roleMsg = role ? `for ${role}` : 'as role-agnostic';
|
||||
@@ -138,17 +137,36 @@ router.post('/register', async (req: Request, res: Response) => {
|
||||
*
|
||||
* Body:
|
||||
* - worker_id: string (required)
|
||||
* - current_task_id: number (optional) - task currently being processed
|
||||
* - current_task_id: number (optional) - task currently being processed (primary task)
|
||||
* - current_task_ids: number[] (optional) - all tasks currently being processed (concurrent)
|
||||
* - active_task_count: number (optional) - number of tasks currently running
|
||||
* - max_concurrent_tasks: number (optional) - max concurrent tasks this worker can handle
|
||||
* - status: string (optional) - 'active', 'idle'
|
||||
* - resources: object (optional) - memory_mb, cpu_user_ms, cpu_system_ms, etc.
|
||||
*/
|
||||
router.post('/heartbeat', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { worker_id, current_task_id, status = 'active', resources } = req.body;
|
||||
const {
|
||||
worker_id,
|
||||
current_task_id,
|
||||
current_task_ids,
|
||||
active_task_count,
|
||||
max_concurrent_tasks,
|
||||
status = 'active',
|
||||
resources
|
||||
} = req.body;
|
||||
|
||||
if (!worker_id) {
|
||||
return res.status(400).json({ success: false, error: 'worker_id is required' });
|
||||
}
|
||||
|
||||
// Build metadata object with all the new fields
|
||||
const metadata: Record<string, unknown> = {};
|
||||
if (resources) Object.assign(metadata, resources);
|
||||
if (current_task_ids) metadata.current_task_ids = current_task_ids;
|
||||
if (active_task_count !== undefined) metadata.active_task_count = active_task_count;
|
||||
if (max_concurrent_tasks !== undefined) metadata.max_concurrent_tasks = max_concurrent_tasks;
|
||||
|
||||
// Store resources in metadata jsonb column
|
||||
const { rows } = await pool.query(`
|
||||
UPDATE worker_registry
|
||||
@@ -159,7 +177,7 @@ router.post('/heartbeat', async (req: Request, res: Response) => {
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = $3
|
||||
RETURNING id, friendly_name, status
|
||||
`, [current_task_id || null, status, worker_id, resources ? JSON.stringify(resources) : null]);
|
||||
`, [current_task_id || null, status, worker_id, Object.keys(metadata).length > 0 ? JSON.stringify(metadata) : null]);
|
||||
|
||||
if (rows.length === 0) {
|
||||
return res.status(404).json({ success: false, error: 'Worker not found - please register first' });
|
||||
@@ -330,12 +348,21 @@ router.get('/workers', async (req: Request, res: Response) => {
|
||||
tasks_completed,
|
||||
tasks_failed,
|
||||
current_task_id,
|
||||
-- Concurrent task fields from metadata
|
||||
(metadata->>'current_task_ids')::jsonb as current_task_ids,
|
||||
(metadata->>'active_task_count')::int as active_task_count,
|
||||
(metadata->>'max_concurrent_tasks')::int as max_concurrent_tasks,
|
||||
-- Decommission fields
|
||||
COALESCE(decommission_requested, false) as decommission_requested,
|
||||
decommission_reason,
|
||||
-- Full metadata for resources
|
||||
metadata,
|
||||
EXTRACT(EPOCH FROM (NOW() - last_heartbeat_at)) as seconds_since_heartbeat,
|
||||
CASE
|
||||
WHEN status = 'offline' OR status = 'terminated' THEN status
|
||||
WHEN last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
||||
WHEN current_task_id IS NOT NULL THEN 'busy'
|
||||
WHEN (metadata->>'active_task_count')::int > 0 THEN 'busy'
|
||||
ELSE 'ready'
|
||||
END as health_status,
|
||||
created_at
|
||||
@@ -672,4 +699,163 @@ router.get('/capacity', async (_req: Request, res: Response) => {
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// WORKER LIFECYCLE MANAGEMENT
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* POST /api/worker-registry/workers/:workerId/decommission
|
||||
* Request graceful decommission of a worker (will stop after current task)
|
||||
*/
|
||||
router.post('/workers/:workerId/decommission', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { workerId } = req.params;
|
||||
const { reason, issued_by } = req.body;
|
||||
|
||||
// Update worker_registry to flag for decommission
|
||||
const result = await pool.query(
|
||||
`UPDATE worker_registry
|
||||
SET decommission_requested = true,
|
||||
decommission_reason = $2,
|
||||
decommission_requested_at = NOW()
|
||||
WHERE worker_id = $1
|
||||
RETURNING friendly_name, status, current_task_id`,
|
||||
[workerId, reason || 'Manual decommission from admin']
|
||||
);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||
}
|
||||
|
||||
const worker = result.rows[0];
|
||||
|
||||
// Also log to worker_commands for audit trail
|
||||
await pool.query(
|
||||
`INSERT INTO worker_commands (worker_id, command, reason, issued_by)
|
||||
VALUES ($1, 'decommission', $2, $3)
|
||||
ON CONFLICT DO NOTHING`,
|
||||
[workerId, reason || 'Manual decommission', issued_by || 'admin']
|
||||
).catch(() => {
|
||||
// Table might not exist yet - ignore
|
||||
});
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: worker.current_task_id
|
||||
? `Worker ${worker.friendly_name} will stop after completing task #${worker.current_task_id}`
|
||||
: `Worker ${worker.friendly_name} will stop on next poll`,
|
||||
worker: {
|
||||
friendly_name: worker.friendly_name,
|
||||
status: worker.status,
|
||||
current_task_id: worker.current_task_id,
|
||||
decommission_requested: true
|
||||
}
|
||||
});
|
||||
} catch (error: any) {
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/worker-registry/workers/:workerId/cancel-decommission
|
||||
* Cancel a pending decommission request
|
||||
*/
|
||||
router.post('/workers/:workerId/cancel-decommission', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { workerId } = req.params;
|
||||
|
||||
const result = await pool.query(
|
||||
`UPDATE worker_registry
|
||||
SET decommission_requested = false,
|
||||
decommission_reason = NULL,
|
||||
decommission_requested_at = NULL
|
||||
WHERE worker_id = $1
|
||||
RETURNING friendly_name`,
|
||||
[workerId]
|
||||
);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `Decommission cancelled for ${result.rows[0].friendly_name}`
|
||||
});
|
||||
} catch (error: any) {
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/worker-registry/spawn
|
||||
* Spawn a new worker in the current pod (only works in multi-worker-per-pod mode)
|
||||
* For now, this is a placeholder - actual spawning requires the pod supervisor
|
||||
*/
|
||||
router.post('/spawn', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { pod_name, role } = req.body;
|
||||
|
||||
// For now, we can't actually spawn workers from the API
|
||||
// This would require a supervisor process in each pod that listens for spawn commands
|
||||
// Instead, return instructions for how to scale
|
||||
res.json({
|
||||
success: false,
|
||||
error: 'Direct worker spawning not yet implemented',
|
||||
instructions: 'To add workers, scale the K8s deployment: kubectl scale deployment/scraper-worker --replicas=N'
|
||||
});
|
||||
} catch (error: any) {
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/worker-registry/pods
|
||||
* Get workers grouped by pod
|
||||
*/
|
||||
router.get('/pods', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const { rows } = await pool.query(`
|
||||
SELECT
|
||||
COALESCE(pod_name, 'Unknown') as pod_name,
|
||||
COUNT(*) as worker_count,
|
||||
COUNT(*) FILTER (WHERE current_task_id IS NOT NULL) as busy_count,
|
||||
COUNT(*) FILTER (WHERE current_task_id IS NULL) as idle_count,
|
||||
SUM(tasks_completed) as total_completed,
|
||||
SUM(tasks_failed) as total_failed,
|
||||
SUM((metadata->>'memory_rss_mb')::int) as total_memory_mb,
|
||||
array_agg(json_build_object(
|
||||
'worker_id', worker_id,
|
||||
'friendly_name', friendly_name,
|
||||
'status', status,
|
||||
'current_task_id', current_task_id,
|
||||
'tasks_completed', tasks_completed,
|
||||
'tasks_failed', tasks_failed,
|
||||
'decommission_requested', COALESCE(decommission_requested, false),
|
||||
'last_heartbeat_at', last_heartbeat_at
|
||||
)) as workers
|
||||
FROM worker_registry
|
||||
WHERE status NOT IN ('offline', 'terminated')
|
||||
GROUP BY pod_name
|
||||
ORDER BY pod_name
|
||||
`);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
pods: rows.map(row => ({
|
||||
pod_name: row.pod_name,
|
||||
worker_count: parseInt(row.worker_count),
|
||||
busy_count: parseInt(row.busy_count),
|
||||
idle_count: parseInt(row.idle_count),
|
||||
total_completed: parseInt(row.total_completed) || 0,
|
||||
total_failed: parseInt(row.total_failed) || 0,
|
||||
total_memory_mb: parseInt(row.total_memory_mb) || 0,
|
||||
workers: row.workers
|
||||
}))
|
||||
});
|
||||
} catch (error: any) {
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
|
||||
@@ -35,7 +35,7 @@ const router = Router();
|
||||
// ============================================================
|
||||
|
||||
const K8S_NAMESPACE = process.env.K8S_NAMESPACE || 'dispensary-scraper';
|
||||
const K8S_STATEFULSET_NAME = process.env.K8S_WORKER_STATEFULSET || 'scraper-worker';
|
||||
const K8S_DEPLOYMENT_NAME = process.env.K8S_WORKER_DEPLOYMENT || 'scraper-worker';
|
||||
|
||||
// Initialize K8s client - uses in-cluster config when running in K8s,
|
||||
// or kubeconfig when running locally
|
||||
@@ -70,7 +70,7 @@ function getK8sClient(): k8s.AppsV1Api | null {
|
||||
|
||||
/**
|
||||
* GET /api/workers/k8s/replicas - Get current worker replica count
|
||||
* Returns current and desired replica counts from the StatefulSet
|
||||
* Returns current and desired replica counts from the Deployment
|
||||
*/
|
||||
router.get('/k8s/replicas', async (_req: Request, res: Response) => {
|
||||
const client = getK8sClient();
|
||||
@@ -84,21 +84,21 @@ router.get('/k8s/replicas', async (_req: Request, res: Response) => {
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await client.readNamespacedStatefulSet({
|
||||
name: K8S_STATEFULSET_NAME,
|
||||
const response = await client.readNamespacedDeployment({
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
});
|
||||
|
||||
const statefulSet = response;
|
||||
const deployment = response;
|
||||
res.json({
|
||||
success: true,
|
||||
replicas: {
|
||||
current: statefulSet.status?.readyReplicas || 0,
|
||||
desired: statefulSet.spec?.replicas || 0,
|
||||
available: statefulSet.status?.availableReplicas || 0,
|
||||
updated: statefulSet.status?.updatedReplicas || 0,
|
||||
current: deployment.status?.readyReplicas || 0,
|
||||
desired: deployment.spec?.replicas || 0,
|
||||
available: deployment.status?.availableReplicas || 0,
|
||||
updated: deployment.status?.updatedReplicas || 0,
|
||||
},
|
||||
statefulset: K8S_STATEFULSET_NAME,
|
||||
deployment: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
});
|
||||
} catch (err: any) {
|
||||
@@ -112,7 +112,7 @@ router.get('/k8s/replicas', async (_req: Request, res: Response) => {
|
||||
|
||||
/**
|
||||
* POST /api/workers/k8s/scale - Scale worker replicas
|
||||
* Body: { replicas: number } - desired replica count (1-20)
|
||||
* Body: { replicas: number } - desired replica count (0-20)
|
||||
*/
|
||||
router.post('/k8s/scale', async (req: Request, res: Response) => {
|
||||
const client = getK8sClient();
|
||||
@@ -136,21 +136,21 @@ router.post('/k8s/scale', async (req: Request, res: Response) => {
|
||||
|
||||
try {
|
||||
// Get current state first
|
||||
const currentResponse = await client.readNamespacedStatefulSetScale({
|
||||
name: K8S_STATEFULSET_NAME,
|
||||
const currentResponse = await client.readNamespacedDeploymentScale({
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
});
|
||||
const currentReplicas = currentResponse.spec?.replicas || 0;
|
||||
|
||||
// Update scale using replaceNamespacedStatefulSetScale
|
||||
await client.replaceNamespacedStatefulSetScale({
|
||||
name: K8S_STATEFULSET_NAME,
|
||||
// Update scale using replaceNamespacedDeploymentScale
|
||||
await client.replaceNamespacedDeploymentScale({
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
body: {
|
||||
apiVersion: 'autoscaling/v1',
|
||||
kind: 'Scale',
|
||||
metadata: {
|
||||
name: K8S_STATEFULSET_NAME,
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
},
|
||||
spec: {
|
||||
@@ -159,14 +159,14 @@ router.post('/k8s/scale', async (req: Request, res: Response) => {
|
||||
},
|
||||
});
|
||||
|
||||
console.log(`[Workers] Scaled ${K8S_STATEFULSET_NAME} from ${currentReplicas} to ${replicas} replicas`);
|
||||
console.log(`[Workers] Scaled ${K8S_DEPLOYMENT_NAME} from ${currentReplicas} to ${replicas} replicas`);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `Scaled from ${currentReplicas} to ${replicas} replicas`,
|
||||
previous: currentReplicas,
|
||||
desired: replicas,
|
||||
statefulset: K8S_STATEFULSET_NAME,
|
||||
deployment: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
});
|
||||
} catch (err: any) {
|
||||
@@ -178,6 +178,73 @@ router.post('/k8s/scale', async (req: Request, res: Response) => {
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/workers/k8s/scale-up - Scale up worker replicas by 1
|
||||
* Convenience endpoint for adding a single worker
|
||||
*/
|
||||
router.post('/k8s/scale-up', async (_req: Request, res: Response) => {
|
||||
const client = getK8sClient();
|
||||
|
||||
if (!client) {
|
||||
return res.status(503).json({
|
||||
success: false,
|
||||
error: 'K8s client not available (not running in cluster or no kubeconfig)',
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
// Get current replica count
|
||||
const currentResponse = await client.readNamespacedDeploymentScale({
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
});
|
||||
const currentReplicas = currentResponse.spec?.replicas || 0;
|
||||
const newReplicas = currentReplicas + 1;
|
||||
|
||||
// Cap at 20 replicas
|
||||
if (newReplicas > 20) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: 'Maximum replica count (20) reached',
|
||||
});
|
||||
}
|
||||
|
||||
// Scale up by 1
|
||||
await client.replaceNamespacedDeploymentScale({
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
body: {
|
||||
apiVersion: 'autoscaling/v1',
|
||||
kind: 'Scale',
|
||||
metadata: {
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
},
|
||||
spec: {
|
||||
replicas: newReplicas,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
console.log(`[Workers] Scaled up ${K8S_DEPLOYMENT_NAME} from ${currentReplicas} to ${newReplicas} replicas`);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `Added worker (${currentReplicas} → ${newReplicas} replicas)`,
|
||||
previous: currentReplicas,
|
||||
desired: newReplicas,
|
||||
deployment: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
});
|
||||
} catch (err: any) {
|
||||
console.error('[Workers] K8s scale-up error:', err.body?.message || err.message);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: err.body?.message || err.message,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// STATIC ROUTES (must come before parameterized routes)
|
||||
// ============================================================
|
||||
|
||||
@@ -683,6 +683,118 @@ export class CrawlRotator {
|
||||
const current = this.proxy.getCurrent();
|
||||
return current?.timezone;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preflight check - verifies proxy and anti-detect are working
|
||||
* MUST be called before any task execution to ensure anonymity.
|
||||
*
|
||||
* Tests:
|
||||
* 1. Proxy available - a proxy must be loaded and active
|
||||
* 2. Proxy connectivity - makes HTTP request through proxy to verify connection
|
||||
* 3. Anti-detect headers - verifies fingerprint is set with required headers
|
||||
*
|
||||
* @returns Promise<PreflightResult> with pass/fail status and details
|
||||
*/
|
||||
async preflight(): Promise<PreflightResult> {
|
||||
const result: PreflightResult = {
|
||||
passed: false,
|
||||
proxyAvailable: false,
|
||||
proxyConnected: false,
|
||||
antidetectReady: false,
|
||||
proxyIp: null,
|
||||
fingerprint: null,
|
||||
error: null,
|
||||
responseTimeMs: null,
|
||||
};
|
||||
|
||||
// Step 1: Check proxy is available
|
||||
const currentProxy = this.proxy.getCurrent();
|
||||
if (!currentProxy) {
|
||||
result.error = 'No proxy available';
|
||||
console.log('[Preflight] FAILED - No proxy available');
|
||||
return result;
|
||||
}
|
||||
result.proxyAvailable = true;
|
||||
result.proxyIp = currentProxy.host;
|
||||
|
||||
// Step 2: Check fingerprint/anti-detect is ready
|
||||
const fingerprint = this.userAgent.getCurrent();
|
||||
if (!fingerprint || !fingerprint.userAgent) {
|
||||
result.error = 'Anti-detect fingerprint not initialized';
|
||||
console.log('[Preflight] FAILED - No fingerprint');
|
||||
return result;
|
||||
}
|
||||
result.antidetectReady = true;
|
||||
result.fingerprint = {
|
||||
userAgent: fingerprint.userAgent,
|
||||
browserName: fingerprint.browserName,
|
||||
deviceCategory: fingerprint.deviceCategory,
|
||||
};
|
||||
|
||||
// Step 3: Test proxy connectivity with an actual HTTP request
|
||||
// Use httpbin.org/ip to verify request goes through proxy
|
||||
const proxyUrl = this.proxy.getProxyUrl(currentProxy);
|
||||
const testUrl = 'https://httpbin.org/ip';
|
||||
|
||||
try {
|
||||
const { default: axios } = await import('axios');
|
||||
const { HttpsProxyAgent } = await import('https-proxy-agent');
|
||||
|
||||
const agent = new HttpsProxyAgent(proxyUrl);
|
||||
const startTime = Date.now();
|
||||
|
||||
const response = await axios.get(testUrl, {
|
||||
httpsAgent: agent,
|
||||
timeout: 15000, // 15 second timeout
|
||||
headers: {
|
||||
'User-Agent': fingerprint.userAgent,
|
||||
'Accept-Language': fingerprint.acceptLanguage,
|
||||
...(fingerprint.secChUa && { 'sec-ch-ua': fingerprint.secChUa }),
|
||||
...(fingerprint.secChUaPlatform && { 'sec-ch-ua-platform': fingerprint.secChUaPlatform }),
|
||||
...(fingerprint.secChUaMobile && { 'sec-ch-ua-mobile': fingerprint.secChUaMobile }),
|
||||
},
|
||||
});
|
||||
|
||||
result.responseTimeMs = Date.now() - startTime;
|
||||
result.proxyConnected = true;
|
||||
result.passed = true;
|
||||
|
||||
// Mark success on proxy stats
|
||||
await this.proxy.markSuccess(currentProxy.id, result.responseTimeMs);
|
||||
|
||||
console.log(`[Preflight] PASSED - Proxy ${currentProxy.host} connected (${result.responseTimeMs}ms), UA: ${fingerprint.browserName}/${fingerprint.deviceCategory}`);
|
||||
} catch (err: any) {
|
||||
result.error = `Proxy connection failed: ${err.message || 'Unknown error'}`;
|
||||
console.log(`[Preflight] FAILED - Proxy connection error: ${err.message}`);
|
||||
|
||||
// Mark failure on proxy stats
|
||||
await this.proxy.markFailed(currentProxy.id, err.message);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Result from preflight check
|
||||
*/
|
||||
export interface PreflightResult {
|
||||
/** Overall pass/fail */
|
||||
passed: boolean;
|
||||
/** Step 1: Is a proxy loaded? */
|
||||
proxyAvailable: boolean;
|
||||
/** Step 2: Did HTTP request through proxy succeed? */
|
||||
proxyConnected: boolean;
|
||||
/** Step 3: Is fingerprint/anti-detect ready? */
|
||||
antidetectReady: boolean;
|
||||
/** Current proxy IP */
|
||||
proxyIp: string | null;
|
||||
/** Fingerprint summary */
|
||||
fingerprint: { userAgent: string; browserName: string; deviceCategory: string } | null;
|
||||
/** Error message if failed */
|
||||
error: string | null;
|
||||
/** Proxy response time in ms */
|
||||
responseTimeMs: number | null;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
|
||||
100
backend/src/services/curl-preflight.ts
Normal file
100
backend/src/services/curl-preflight.ts
Normal file
@@ -0,0 +1,100 @@
|
||||
/**
|
||||
* Curl Preflight - Verify curl/axios transport works through proxy
|
||||
*
|
||||
* Tests:
|
||||
* 1. Proxy is available and active
|
||||
* 2. HTTP request through proxy succeeds
|
||||
* 3. Anti-detect headers are properly set
|
||||
*
|
||||
* Use case: Fast, simple API requests that don't need browser fingerprint
|
||||
*/
|
||||
|
||||
import axios from 'axios';
|
||||
import { HttpsProxyAgent } from 'https-proxy-agent';
|
||||
import { CrawlRotator, PreflightResult } from './crawl-rotator';
|
||||
|
||||
export interface CurlPreflightResult extends PreflightResult {
|
||||
method: 'curl';
|
||||
}
|
||||
|
||||
/**
|
||||
* Run curl preflight check
|
||||
* Tests proxy connectivity using axios/curl through the proxy
|
||||
*/
|
||||
export async function runCurlPreflight(
|
||||
crawlRotator: CrawlRotator
|
||||
): Promise<CurlPreflightResult> {
|
||||
const result: CurlPreflightResult = {
|
||||
method: 'curl',
|
||||
passed: false,
|
||||
proxyAvailable: false,
|
||||
proxyConnected: false,
|
||||
antidetectReady: false,
|
||||
proxyIp: null,
|
||||
fingerprint: null,
|
||||
error: null,
|
||||
responseTimeMs: null,
|
||||
};
|
||||
|
||||
// Step 1: Check proxy is available
|
||||
const currentProxy = crawlRotator.proxy.getCurrent();
|
||||
if (!currentProxy) {
|
||||
result.error = 'No proxy available';
|
||||
console.log('[CurlPreflight] FAILED - No proxy available');
|
||||
return result;
|
||||
}
|
||||
result.proxyAvailable = true;
|
||||
result.proxyIp = currentProxy.host;
|
||||
|
||||
// Step 2: Check fingerprint/anti-detect is ready
|
||||
const fingerprint = crawlRotator.userAgent.getCurrent();
|
||||
if (!fingerprint || !fingerprint.userAgent) {
|
||||
result.error = 'Anti-detect fingerprint not initialized';
|
||||
console.log('[CurlPreflight] FAILED - No fingerprint');
|
||||
return result;
|
||||
}
|
||||
result.antidetectReady = true;
|
||||
result.fingerprint = {
|
||||
userAgent: fingerprint.userAgent,
|
||||
browserName: fingerprint.browserName,
|
||||
deviceCategory: fingerprint.deviceCategory,
|
||||
};
|
||||
|
||||
// Step 3: Test proxy connectivity with an actual HTTP request
|
||||
const proxyUrl = crawlRotator.proxy.getProxyUrl(currentProxy);
|
||||
const testUrl = 'https://httpbin.org/ip';
|
||||
|
||||
try {
|
||||
const agent = new HttpsProxyAgent(proxyUrl);
|
||||
const startTime = Date.now();
|
||||
|
||||
const response = await axios.get(testUrl, {
|
||||
httpsAgent: agent,
|
||||
timeout: 15000, // 15 second timeout
|
||||
headers: {
|
||||
'User-Agent': fingerprint.userAgent,
|
||||
'Accept-Language': fingerprint.acceptLanguage,
|
||||
...(fingerprint.secChUa && { 'sec-ch-ua': fingerprint.secChUa }),
|
||||
...(fingerprint.secChUaPlatform && { 'sec-ch-ua-platform': fingerprint.secChUaPlatform }),
|
||||
...(fingerprint.secChUaMobile && { 'sec-ch-ua-mobile': fingerprint.secChUaMobile }),
|
||||
},
|
||||
});
|
||||
|
||||
result.responseTimeMs = Date.now() - startTime;
|
||||
result.proxyConnected = true;
|
||||
result.passed = true;
|
||||
|
||||
// Mark success on proxy stats
|
||||
await crawlRotator.proxy.markSuccess(currentProxy.id, result.responseTimeMs);
|
||||
|
||||
console.log(`[CurlPreflight] PASSED - Proxy ${currentProxy.host} connected (${result.responseTimeMs}ms), UA: ${fingerprint.browserName}/${fingerprint.deviceCategory}`);
|
||||
} catch (err: any) {
|
||||
result.error = `Proxy connection failed: ${err.message || 'Unknown error'}`;
|
||||
console.log(`[CurlPreflight] FAILED - Proxy connection error: ${err.message}`);
|
||||
|
||||
// Mark failure on proxy stats
|
||||
await crawlRotator.proxy.markFailed(currentProxy.id, err.message);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
399
backend/src/services/puppeteer-preflight.ts
Normal file
399
backend/src/services/puppeteer-preflight.ts
Normal file
@@ -0,0 +1,399 @@
|
||||
/**
|
||||
* Puppeteer Preflight - Verify browser-based transport works with anti-detect
|
||||
*
|
||||
* Uses Puppeteer + StealthPlugin to:
|
||||
* 1. Launch headless browser with stealth mode + PROXY
|
||||
* 2. Visit fingerprint.com demo to verify anti-detect and confirm proxy IP
|
||||
* 3. Establish session by visiting Dutchie embedded menu
|
||||
* 4. Make GraphQL request from browser context
|
||||
* 5. Verify we get a valid response (not blocked)
|
||||
*
|
||||
* Use case: Anti-detect scraping that needs real browser fingerprint through proxy
|
||||
*
|
||||
* Based on test-intercept.js which successfully captures 1000+ products
|
||||
*/
|
||||
|
||||
import { PreflightResult, CrawlRotator } from './crawl-rotator';
|
||||
|
||||
// GraphQL hash for FilteredProducts query - MUST match CLAUDE.md
|
||||
const FILTERED_PRODUCTS_HASH = 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0';
|
||||
|
||||
// Test dispensary - AZ-Deeply-Rooted (known working)
|
||||
const TEST_CNAME = 'AZ-Deeply-Rooted';
|
||||
const TEST_PLATFORM_ID = '6405ef617056e8014d79101b';
|
||||
|
||||
// Anti-detect verification sites (primary + fallback)
|
||||
const FINGERPRINT_DEMO_URL = 'https://demo.fingerprint.com/';
|
||||
const AMIUNIQUE_URL = 'https://amiunique.org/fingerprint';
|
||||
|
||||
export interface PuppeteerPreflightResult extends PreflightResult {
|
||||
method: 'http';
|
||||
/** Number of products returned (proves API access) */
|
||||
productsReturned?: number;
|
||||
/** Browser user agent used */
|
||||
browserUserAgent?: string;
|
||||
/** Bot detection result from fingerprint.com */
|
||||
botDetection?: {
|
||||
detected: boolean;
|
||||
probability?: number;
|
||||
type?: string;
|
||||
};
|
||||
/** Expected proxy IP (from pool) */
|
||||
expectedProxyIp?: string;
|
||||
/** Whether IP verification passed (detected IP matches proxy) */
|
||||
ipVerified?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run Puppeteer preflight check with proxy
|
||||
* Tests browser-based access with anti-detect verification via fingerprint.com
|
||||
*
|
||||
* @param crawlRotator - CrawlRotator instance to get proxy from pool
|
||||
*/
|
||||
export async function runPuppeteerPreflight(
|
||||
crawlRotator?: CrawlRotator
|
||||
): Promise<PuppeteerPreflightResult> {
|
||||
const result: PuppeteerPreflightResult = {
|
||||
method: 'http',
|
||||
passed: false,
|
||||
proxyAvailable: false,
|
||||
proxyConnected: false,
|
||||
antidetectReady: false,
|
||||
proxyIp: null,
|
||||
fingerprint: null,
|
||||
error: null,
|
||||
responseTimeMs: null,
|
||||
productsReturned: 0,
|
||||
ipVerified: false,
|
||||
};
|
||||
|
||||
let browser: any = null;
|
||||
|
||||
try {
|
||||
// Step 0: Get a proxy from the pool
|
||||
let proxyUrl: string | null = null;
|
||||
let expectedProxyHost: string | null = null;
|
||||
|
||||
if (crawlRotator) {
|
||||
const currentProxy = crawlRotator.proxy.getCurrent();
|
||||
if (currentProxy) {
|
||||
result.proxyAvailable = true;
|
||||
proxyUrl = crawlRotator.proxy.getProxyUrl(currentProxy);
|
||||
expectedProxyHost = currentProxy.host;
|
||||
result.expectedProxyIp = expectedProxyHost;
|
||||
console.log(`[PuppeteerPreflight] Using proxy: ${currentProxy.host}:${currentProxy.port}`);
|
||||
} else {
|
||||
result.error = 'No proxy available from pool';
|
||||
console.log(`[PuppeteerPreflight] FAILED - No proxy available`);
|
||||
return result;
|
||||
}
|
||||
} else {
|
||||
console.log(`[PuppeteerPreflight] WARNING: No CrawlRotator provided - using direct connection`);
|
||||
result.proxyAvailable = true; // No proxy needed for direct
|
||||
}
|
||||
|
||||
// Dynamic imports to avoid loading Puppeteer unless needed
|
||||
const puppeteer = require('puppeteer-extra');
|
||||
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
// Build browser args
|
||||
const browserArgs = ['--no-sandbox', '--disable-setuid-sandbox'];
|
||||
if (proxyUrl) {
|
||||
// Extract host:port for Puppeteer (it handles auth separately)
|
||||
const proxyUrlParsed = new URL(proxyUrl);
|
||||
browserArgs.push(`--proxy-server=${proxyUrlParsed.host}`);
|
||||
}
|
||||
|
||||
// Launch browser with stealth + proxy
|
||||
browser = await puppeteer.launch({
|
||||
headless: 'new',
|
||||
args: browserArgs,
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
|
||||
// If proxy has auth, set it up
|
||||
if (proxyUrl) {
|
||||
const proxyUrlParsed = new URL(proxyUrl);
|
||||
if (proxyUrlParsed.username && proxyUrlParsed.password) {
|
||||
await page.authenticate({
|
||||
username: decodeURIComponent(proxyUrlParsed.username),
|
||||
password: decodeURIComponent(proxyUrlParsed.password),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Get browser user agent
|
||||
const userAgent = await page.evaluate(() => navigator.userAgent);
|
||||
result.browserUserAgent = userAgent;
|
||||
result.fingerprint = {
|
||||
userAgent,
|
||||
browserName: 'Chrome (Puppeteer)',
|
||||
deviceCategory: 'desktop',
|
||||
};
|
||||
|
||||
// =========================================================================
|
||||
// STEP 1: Visit fingerprint.com demo to verify anti-detect and get IP
|
||||
// =========================================================================
|
||||
console.log(`[PuppeteerPreflight] Testing anti-detect at ${FINGERPRINT_DEMO_URL}...`);
|
||||
|
||||
try {
|
||||
await page.goto(FINGERPRINT_DEMO_URL, {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 30000,
|
||||
});
|
||||
|
||||
result.proxyConnected = true; // If we got here, proxy is working
|
||||
|
||||
// Wait for fingerprint results to load
|
||||
await page.waitForSelector('[data-test="visitor-id"]', { timeout: 10000 }).catch(() => {});
|
||||
|
||||
// Extract fingerprint data from the page
|
||||
const fingerprintData = await page.evaluate(() => {
|
||||
// Try to find the IP address displayed on the page
|
||||
const ipElement = document.querySelector('[data-test="ip-address"]');
|
||||
const ip = ipElement?.textContent?.trim() || null;
|
||||
|
||||
// Try to find bot detection info
|
||||
const botElement = document.querySelector('[data-test="bot-detected"]');
|
||||
const botDetected = botElement?.textContent?.toLowerCase().includes('true') || false;
|
||||
|
||||
// Try to find visitor ID (proves fingerprinting worked)
|
||||
const visitorIdElement = document.querySelector('[data-test="visitor-id"]');
|
||||
const visitorId = visitorIdElement?.textContent?.trim() || null;
|
||||
|
||||
// Alternative: look for common UI patterns if data-test attrs not present
|
||||
let detectedIp = ip;
|
||||
if (!detectedIp) {
|
||||
// Look for IP in any element containing IP-like pattern
|
||||
const allText = document.body.innerText;
|
||||
const ipMatch = allText.match(/\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b/);
|
||||
detectedIp = ipMatch ? ipMatch[1] : null;
|
||||
}
|
||||
|
||||
return {
|
||||
ip: detectedIp,
|
||||
botDetected,
|
||||
visitorId,
|
||||
pageLoaded: !!document.body,
|
||||
};
|
||||
});
|
||||
|
||||
if (fingerprintData.ip) {
|
||||
result.proxyIp = fingerprintData.ip;
|
||||
console.log(`[PuppeteerPreflight] Detected IP: ${fingerprintData.ip}`);
|
||||
|
||||
// Verify IP matches expected proxy
|
||||
if (expectedProxyHost) {
|
||||
// Check if detected IP contains the proxy host (or is close match)
|
||||
if (fingerprintData.ip === expectedProxyHost ||
|
||||
expectedProxyHost.includes(fingerprintData.ip) ||
|
||||
fingerprintData.ip.includes(expectedProxyHost.split('.').slice(0, 3).join('.'))) {
|
||||
result.ipVerified = true;
|
||||
console.log(`[PuppeteerPreflight] IP VERIFIED - matches proxy`);
|
||||
} else {
|
||||
console.log(`[PuppeteerPreflight] IP mismatch: expected ${expectedProxyHost}, got ${fingerprintData.ip}`);
|
||||
// Don't fail - residential proxies often show different egress IPs
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (fingerprintData.visitorId) {
|
||||
console.log(`[PuppeteerPreflight] Fingerprint visitor ID: ${fingerprintData.visitorId}`);
|
||||
}
|
||||
|
||||
result.botDetection = {
|
||||
detected: fingerprintData.botDetected,
|
||||
};
|
||||
|
||||
if (fingerprintData.botDetected) {
|
||||
console.log(`[PuppeteerPreflight] WARNING: Bot detection triggered!`);
|
||||
} else {
|
||||
console.log(`[PuppeteerPreflight] Anti-detect check: NOT detected as bot`);
|
||||
result.antidetectReady = true;
|
||||
}
|
||||
} catch (fpErr: any) {
|
||||
// Could mean proxy connection failed
|
||||
console.log(`[PuppeteerPreflight] Fingerprint.com check failed: ${fpErr.message}`);
|
||||
if (fpErr.message.includes('net::ERR_PROXY') || fpErr.message.includes('ECONNREFUSED')) {
|
||||
result.error = `Proxy connection failed: ${fpErr.message}`;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Try fallback: amiunique.org
|
||||
console.log(`[PuppeteerPreflight] Trying fallback: ${AMIUNIQUE_URL}...`);
|
||||
try {
|
||||
await page.goto(AMIUNIQUE_URL, {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 30000,
|
||||
});
|
||||
|
||||
result.proxyConnected = true;
|
||||
|
||||
// Extract IP from amiunique.org page
|
||||
const amiData = await page.evaluate(() => {
|
||||
const allText = document.body.innerText;
|
||||
const ipMatch = allText.match(/\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b/);
|
||||
return {
|
||||
ip: ipMatch ? ipMatch[1] : null,
|
||||
pageLoaded: !!document.body,
|
||||
};
|
||||
});
|
||||
|
||||
if (amiData.ip) {
|
||||
result.proxyIp = amiData.ip;
|
||||
console.log(`[PuppeteerPreflight] Detected IP via amiunique.org: ${amiData.ip}`);
|
||||
}
|
||||
|
||||
result.antidetectReady = true;
|
||||
console.log(`[PuppeteerPreflight] amiunique.org fallback succeeded`);
|
||||
} catch (amiErr: any) {
|
||||
console.log(`[PuppeteerPreflight] amiunique.org fallback also failed: ${amiErr.message}`);
|
||||
// Continue with Dutchie test anyway
|
||||
result.proxyConnected = true;
|
||||
result.antidetectReady = true;
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// STEP 2: Test Dutchie API access (the real test)
|
||||
// =========================================================================
|
||||
const embedUrl = `https://dutchie.com/embedded-menu/${TEST_CNAME}?menuType=rec`;
|
||||
console.log(`[PuppeteerPreflight] Establishing session at ${embedUrl}...`);
|
||||
|
||||
await page.goto(embedUrl, {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 30000,
|
||||
});
|
||||
|
||||
// Make GraphQL request from browser context
|
||||
const graphqlResult = await page.evaluate(
|
||||
async (platformId: string, hash: string) => {
|
||||
try {
|
||||
const variables = {
|
||||
includeEnterpriseSpecials: false,
|
||||
productsFilter: {
|
||||
dispensaryId: platformId,
|
||||
pricingType: 'rec',
|
||||
Status: 'Active', // CRITICAL: Must be 'Active' per CLAUDE.md
|
||||
types: [],
|
||||
useCache: true,
|
||||
isDefaultSort: true,
|
||||
sortBy: 'popularSortIdx',
|
||||
sortDirection: 1,
|
||||
bypassOnlineThresholds: true,
|
||||
isKioskMenu: false,
|
||||
removeProductsBelowOptionThresholds: false,
|
||||
},
|
||||
page: 0,
|
||||
perPage: 10, // Just need a few to prove it works
|
||||
};
|
||||
|
||||
const extensions = {
|
||||
persistedQuery: {
|
||||
version: 1,
|
||||
sha256Hash: hash,
|
||||
},
|
||||
};
|
||||
|
||||
const qs = new URLSearchParams({
|
||||
operationName: 'FilteredProducts',
|
||||
variables: JSON.stringify(variables),
|
||||
extensions: JSON.stringify(extensions),
|
||||
});
|
||||
|
||||
const url = `https://dutchie.com/api-3/graphql?${qs.toString()}`;
|
||||
const sessionId = 'preflight-' + Date.now();
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
Accept: 'application/json',
|
||||
'content-type': 'application/json',
|
||||
'x-dutchie-session': sessionId,
|
||||
'apollographql-client-name': 'Marketplace (production)',
|
||||
},
|
||||
credentials: 'include',
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
return { error: `HTTP ${response.status}`, products: 0 };
|
||||
}
|
||||
|
||||
const json = await response.json();
|
||||
|
||||
if (json.errors) {
|
||||
return { error: JSON.stringify(json.errors).slice(0, 200), products: 0 };
|
||||
}
|
||||
|
||||
const products = json?.data?.filteredProducts?.products || [];
|
||||
return { error: null, products: products.length };
|
||||
} catch (err: any) {
|
||||
return { error: err.message || 'Unknown error', products: 0 };
|
||||
}
|
||||
},
|
||||
TEST_PLATFORM_ID,
|
||||
FILTERED_PRODUCTS_HASH
|
||||
);
|
||||
|
||||
result.responseTimeMs = Date.now() - startTime;
|
||||
|
||||
if (graphqlResult.error) {
|
||||
result.error = `GraphQL error: ${graphqlResult.error}`;
|
||||
console.log(`[PuppeteerPreflight] FAILED - ${result.error}`);
|
||||
} else if (graphqlResult.products === 0) {
|
||||
result.error = 'GraphQL returned 0 products';
|
||||
console.log(`[PuppeteerPreflight] FAILED - No products returned`);
|
||||
} else {
|
||||
result.passed = true;
|
||||
result.productsReturned = graphqlResult.products;
|
||||
console.log(
|
||||
`[PuppeteerPreflight] PASSED - Got ${graphqlResult.products} products in ${result.responseTimeMs}ms`
|
||||
);
|
||||
if (result.proxyIp) {
|
||||
console.log(`[PuppeteerPreflight] Browser IP via proxy: ${result.proxyIp}`);
|
||||
}
|
||||
}
|
||||
} catch (err: any) {
|
||||
result.error = `Browser error: ${err.message || 'Unknown error'}`;
|
||||
console.log(`[PuppeteerPreflight] FAILED - ${result.error}`);
|
||||
} finally {
|
||||
if (browser) {
|
||||
await browser.close().catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run Puppeteer preflight with retry
|
||||
* Retries once on failure to handle transient issues
|
||||
*
|
||||
* @param crawlRotator - CrawlRotator instance to get proxy from pool
|
||||
* @param maxRetries - Number of retry attempts (default 1)
|
||||
*/
|
||||
export async function runPuppeteerPreflightWithRetry(
|
||||
crawlRotator?: CrawlRotator,
|
||||
maxRetries: number = 1
|
||||
): Promise<PuppeteerPreflightResult> {
|
||||
let lastResult: PuppeteerPreflightResult | null = null;
|
||||
|
||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||
if (attempt > 0) {
|
||||
console.log(`[PuppeteerPreflight] Retry attempt ${attempt}/${maxRetries}...`);
|
||||
await new Promise((r) => setTimeout(r, 5000)); // Wait 5s between retries
|
||||
}
|
||||
|
||||
lastResult = await runPuppeteerPreflight(crawlRotator);
|
||||
|
||||
if (lastResult.passed) {
|
||||
return lastResult;
|
||||
}
|
||||
}
|
||||
|
||||
return lastResult!;
|
||||
}
|
||||
@@ -1,566 +1,30 @@
|
||||
/**
|
||||
* System API Routes
|
||||
* System API Routes (Stub)
|
||||
*
|
||||
* Provides REST API endpoints for system monitoring and control:
|
||||
* - /api/system/sync/* - Sync orchestrator
|
||||
* - /api/system/dlq/* - Dead-letter queue
|
||||
* - /api/system/integrity/* - Integrity checks
|
||||
* - /api/system/fix/* - Auto-fix routines
|
||||
* - /api/system/alerts/* - System alerts
|
||||
* - /metrics - Prometheus metrics
|
||||
* The full system routes depend on SyncOrchestrator which was moved to _deprecated.
|
||||
* This stub provides empty routers to maintain backward compatibility.
|
||||
*
|
||||
* Phase 5: Full Production Sync + Monitoring
|
||||
* Full implementation available at: src/_deprecated/system/routes/index.ts
|
||||
*/
|
||||
|
||||
import { Router, Request, Response } from 'express';
|
||||
import { Pool } from 'pg';
|
||||
import {
|
||||
SyncOrchestrator,
|
||||
MetricsService,
|
||||
DLQService,
|
||||
AlertService,
|
||||
IntegrityService,
|
||||
AutoFixService,
|
||||
} from '../services';
|
||||
import { MetricsService } from '../services';
|
||||
|
||||
export function createSystemRouter(pool: Pool): Router {
|
||||
export function createSystemRouter(_pool: Pool): Router {
|
||||
const router = Router();
|
||||
|
||||
// Initialize services
|
||||
const metrics = new MetricsService(pool);
|
||||
const dlq = new DLQService(pool);
|
||||
const alerts = new AlertService(pool);
|
||||
const integrity = new IntegrityService(pool, alerts);
|
||||
const autoFix = new AutoFixService(pool, alerts);
|
||||
const orchestrator = new SyncOrchestrator(pool, metrics, dlq, alerts);
|
||||
|
||||
// ============================================================
|
||||
// SYNC ORCHESTRATOR ENDPOINTS
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* GET /api/system/sync/status
|
||||
* Get current sync status
|
||||
*/
|
||||
router.get('/sync/status', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const status = await orchestrator.getStatus();
|
||||
res.json(status);
|
||||
} catch (error) {
|
||||
console.error('[System] Sync status error:', error);
|
||||
res.status(500).json({ error: 'Failed to get sync status' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/sync/run
|
||||
* Trigger a sync run
|
||||
*/
|
||||
router.post('/sync/run', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const triggeredBy = req.body.triggeredBy || 'api';
|
||||
const result = await orchestrator.runSync();
|
||||
res.json({
|
||||
success: true,
|
||||
triggeredBy,
|
||||
metrics: result,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('[System] Sync run error:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Sync run failed',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/sync/queue-depth
|
||||
* Get queue depth information
|
||||
*/
|
||||
router.get('/sync/queue-depth', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const depth = await orchestrator.getQueueDepth();
|
||||
res.json(depth);
|
||||
} catch (error) {
|
||||
console.error('[System] Queue depth error:', error);
|
||||
res.status(500).json({ error: 'Failed to get queue depth' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/sync/health
|
||||
* Get sync health status
|
||||
*/
|
||||
router.get('/sync/health', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const health = await orchestrator.getHealth();
|
||||
res.status(health.healthy ? 200 : 503).json(health);
|
||||
} catch (error) {
|
||||
console.error('[System] Health check error:', error);
|
||||
res.status(500).json({ healthy: false, error: 'Health check failed' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/sync/pause
|
||||
* Pause the orchestrator
|
||||
*/
|
||||
router.post('/sync/pause', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const reason = req.body.reason || 'Manual pause';
|
||||
await orchestrator.pause(reason);
|
||||
res.json({ success: true, message: 'Orchestrator paused' });
|
||||
} catch (error) {
|
||||
console.error('[System] Pause error:', error);
|
||||
res.status(500).json({ error: 'Failed to pause orchestrator' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/sync/resume
|
||||
* Resume the orchestrator
|
||||
*/
|
||||
router.post('/sync/resume', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
await orchestrator.resume();
|
||||
res.json({ success: true, message: 'Orchestrator resumed' });
|
||||
} catch (error) {
|
||||
console.error('[System] Resume error:', error);
|
||||
res.status(500).json({ error: 'Failed to resume orchestrator' });
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// DLQ ENDPOINTS
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* GET /api/system/dlq
|
||||
* List DLQ payloads
|
||||
*/
|
||||
router.get('/dlq', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const options = {
|
||||
status: req.query.status as string,
|
||||
errorType: req.query.errorType as string,
|
||||
dispensaryId: req.query.dispensaryId ? parseInt(req.query.dispensaryId as string) : undefined,
|
||||
limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
|
||||
offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
|
||||
};
|
||||
|
||||
const result = await dlq.listPayloads(options);
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('[System] DLQ list error:', error);
|
||||
res.status(500).json({ error: 'Failed to list DLQ payloads' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/dlq/stats
|
||||
* Get DLQ statistics
|
||||
*/
|
||||
router.get('/dlq/stats', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const stats = await dlq.getStats();
|
||||
res.json(stats);
|
||||
} catch (error) {
|
||||
console.error('[System] DLQ stats error:', error);
|
||||
res.status(500).json({ error: 'Failed to get DLQ stats' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/dlq/summary
|
||||
* Get DLQ summary by error type
|
||||
*/
|
||||
router.get('/dlq/summary', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const summary = await dlq.getSummary();
|
||||
res.json(summary);
|
||||
} catch (error) {
|
||||
console.error('[System] DLQ summary error:', error);
|
||||
res.status(500).json({ error: 'Failed to get DLQ summary' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/dlq/:id
|
||||
* Get a specific DLQ payload
|
||||
*/
|
||||
router.get('/dlq/:id', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const payload = await dlq.getPayload(req.params.id);
|
||||
if (!payload) {
|
||||
return res.status(404).json({ error: 'Payload not found' });
|
||||
}
|
||||
res.json(payload);
|
||||
} catch (error) {
|
||||
console.error('[System] DLQ get error:', error);
|
||||
res.status(500).json({ error: 'Failed to get DLQ payload' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/dlq/:id/retry
|
||||
* Retry a DLQ payload
|
||||
*/
|
||||
router.post('/dlq/:id/retry', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const result = await dlq.retryPayload(req.params.id);
|
||||
if (result.success) {
|
||||
res.json(result);
|
||||
} else {
|
||||
res.status(400).json(result);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('[System] DLQ retry error:', error);
|
||||
res.status(500).json({ error: 'Failed to retry payload' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/dlq/:id/abandon
|
||||
* Abandon a DLQ payload
|
||||
*/
|
||||
router.post('/dlq/:id/abandon', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const reason = req.body.reason || 'Manually abandoned';
|
||||
const abandonedBy = req.body.abandonedBy || 'api';
|
||||
const success = await dlq.abandonPayload(req.params.id, reason, abandonedBy);
|
||||
res.json({ success });
|
||||
} catch (error) {
|
||||
console.error('[System] DLQ abandon error:', error);
|
||||
res.status(500).json({ error: 'Failed to abandon payload' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/dlq/bulk-retry
|
||||
* Bulk retry payloads by error type
|
||||
*/
|
||||
router.post('/dlq/bulk-retry', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { errorType } = req.body;
|
||||
if (!errorType) {
|
||||
return res.status(400).json({ error: 'errorType is required' });
|
||||
}
|
||||
const result = await dlq.bulkRetryByErrorType(errorType);
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('[System] DLQ bulk retry error:', error);
|
||||
res.status(500).json({ error: 'Failed to bulk retry' });
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// INTEGRITY CHECK ENDPOINTS
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* POST /api/system/integrity/run
|
||||
* Run all integrity checks
|
||||
*/
|
||||
router.post('/integrity/run', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const triggeredBy = req.body.triggeredBy || 'api';
|
||||
const result = await integrity.runAllChecks(triggeredBy);
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('[System] Integrity run error:', error);
|
||||
res.status(500).json({ error: 'Failed to run integrity checks' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/integrity/runs
|
||||
* Get recent integrity check runs
|
||||
*/
|
||||
router.get('/integrity/runs', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const limit = req.query.limit ? parseInt(req.query.limit as string) : 10;
|
||||
const runs = await integrity.getRecentRuns(limit);
|
||||
res.json(runs);
|
||||
} catch (error) {
|
||||
console.error('[System] Integrity runs error:', error);
|
||||
res.status(500).json({ error: 'Failed to get integrity runs' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/integrity/runs/:runId
|
||||
* Get results for a specific integrity run
|
||||
*/
|
||||
router.get('/integrity/runs/:runId', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const results = await integrity.getRunResults(req.params.runId);
|
||||
res.json(results);
|
||||
} catch (error) {
|
||||
console.error('[System] Integrity run results error:', error);
|
||||
res.status(500).json({ error: 'Failed to get run results' });
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// AUTO-FIX ENDPOINTS
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* GET /api/system/fix/routines
|
||||
* Get available fix routines
|
||||
*/
|
||||
router.get('/fix/routines', (_req: Request, res: Response) => {
|
||||
try {
|
||||
const routines = autoFix.getAvailableRoutines();
|
||||
res.json(routines);
|
||||
} catch (error) {
|
||||
console.error('[System] Get routines error:', error);
|
||||
res.status(500).json({ error: 'Failed to get routines' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/fix/:routine
|
||||
* Run a fix routine
|
||||
*/
|
||||
router.post('/fix/:routine', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const routineName = req.params.routine;
|
||||
const dryRun = req.body.dryRun === true;
|
||||
const triggeredBy = req.body.triggeredBy || 'api';
|
||||
|
||||
const result = await autoFix.runRoutine(routineName as any, triggeredBy, { dryRun });
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('[System] Fix routine error:', error);
|
||||
res.status(500).json({ error: 'Failed to run fix routine' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/fix/runs
|
||||
* Get recent fix runs
|
||||
*/
|
||||
router.get('/fix/runs', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const limit = req.query.limit ? parseInt(req.query.limit as string) : 20;
|
||||
const runs = await autoFix.getRecentRuns(limit);
|
||||
res.json(runs);
|
||||
} catch (error) {
|
||||
console.error('[System] Fix runs error:', error);
|
||||
res.status(500).json({ error: 'Failed to get fix runs' });
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// ALERTS ENDPOINTS
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* GET /api/system/alerts
|
||||
* List alerts
|
||||
*/
|
||||
router.get('/alerts', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const options = {
|
||||
status: req.query.status as any,
|
||||
severity: req.query.severity as any,
|
||||
type: req.query.type as string,
|
||||
limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
|
||||
offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
|
||||
};
|
||||
|
||||
const result = await alerts.listAlerts(options);
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('[System] Alerts list error:', error);
|
||||
res.status(500).json({ error: 'Failed to list alerts' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/alerts/active
|
||||
* Get active alerts
|
||||
*/
|
||||
router.get('/alerts/active', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const activeAlerts = await alerts.getActiveAlerts();
|
||||
res.json(activeAlerts);
|
||||
} catch (error) {
|
||||
console.error('[System] Active alerts error:', error);
|
||||
res.status(500).json({ error: 'Failed to get active alerts' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/alerts/summary
|
||||
* Get alert summary
|
||||
*/
|
||||
router.get('/alerts/summary', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const summary = await alerts.getSummary();
|
||||
res.json(summary);
|
||||
} catch (error) {
|
||||
console.error('[System] Alerts summary error:', error);
|
||||
res.status(500).json({ error: 'Failed to get alerts summary' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/alerts/:id/acknowledge
|
||||
* Acknowledge an alert
|
||||
*/
|
||||
router.post('/alerts/:id/acknowledge', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const alertId = parseInt(req.params.id);
|
||||
const acknowledgedBy = req.body.acknowledgedBy || 'api';
|
||||
const success = await alerts.acknowledgeAlert(alertId, acknowledgedBy);
|
||||
res.json({ success });
|
||||
} catch (error) {
|
||||
console.error('[System] Acknowledge alert error:', error);
|
||||
res.status(500).json({ error: 'Failed to acknowledge alert' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/alerts/:id/resolve
|
||||
* Resolve an alert
|
||||
*/
|
||||
router.post('/alerts/:id/resolve', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const alertId = parseInt(req.params.id);
|
||||
const resolvedBy = req.body.resolvedBy || 'api';
|
||||
const success = await alerts.resolveAlert(alertId, resolvedBy);
|
||||
res.json({ success });
|
||||
} catch (error) {
|
||||
console.error('[System] Resolve alert error:', error);
|
||||
res.status(500).json({ error: 'Failed to resolve alert' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/alerts/bulk-acknowledge
|
||||
* Bulk acknowledge alerts
|
||||
*/
|
||||
router.post('/alerts/bulk-acknowledge', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { ids, acknowledgedBy } = req.body;
|
||||
if (!ids || !Array.isArray(ids)) {
|
||||
return res.status(400).json({ error: 'ids array is required' });
|
||||
}
|
||||
const count = await alerts.bulkAcknowledge(ids, acknowledgedBy || 'api');
|
||||
res.json({ acknowledged: count });
|
||||
} catch (error) {
|
||||
console.error('[System] Bulk acknowledge error:', error);
|
||||
res.status(500).json({ error: 'Failed to bulk acknowledge' });
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// METRICS ENDPOINTS
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* GET /api/system/metrics
|
||||
* Get all current metrics
|
||||
*/
|
||||
router.get('/metrics', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const allMetrics = await metrics.getAllMetrics();
|
||||
res.json(allMetrics);
|
||||
} catch (error) {
|
||||
console.error('[System] Metrics error:', error);
|
||||
res.status(500).json({ error: 'Failed to get metrics' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/metrics/:name
|
||||
* Get a specific metric
|
||||
*/
|
||||
router.get('/metrics/:name', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const metric = await metrics.getMetric(req.params.name);
|
||||
if (!metric) {
|
||||
return res.status(404).json({ error: 'Metric not found' });
|
||||
}
|
||||
res.json(metric);
|
||||
} catch (error) {
|
||||
console.error('[System] Metric error:', error);
|
||||
res.status(500).json({ error: 'Failed to get metric' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/metrics/:name/history
|
||||
* Get metric time series
|
||||
*/
|
||||
router.get('/metrics/:name/history', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const hours = req.query.hours ? parseInt(req.query.hours as string) : 24;
|
||||
const history = await metrics.getMetricHistory(req.params.name, hours);
|
||||
res.json(history);
|
||||
} catch (error) {
|
||||
console.error('[System] Metric history error:', error);
|
||||
res.status(500).json({ error: 'Failed to get metric history' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/errors
|
||||
* Get error summary
|
||||
*/
|
||||
router.get('/errors', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const summary = await metrics.getErrorSummary();
|
||||
res.json(summary);
|
||||
} catch (error) {
|
||||
console.error('[System] Error summary error:', error);
|
||||
res.status(500).json({ error: 'Failed to get error summary' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/system/errors/recent
|
||||
* Get recent errors
|
||||
*/
|
||||
router.get('/errors/recent', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const limit = req.query.limit ? parseInt(req.query.limit as string) : 50;
|
||||
const errorType = req.query.type as string;
|
||||
const errors = await metrics.getRecentErrors(limit, errorType);
|
||||
res.json(errors);
|
||||
} catch (error) {
|
||||
console.error('[System] Recent errors error:', error);
|
||||
res.status(500).json({ error: 'Failed to get recent errors' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/system/errors/acknowledge
|
||||
* Acknowledge errors
|
||||
*/
|
||||
router.post('/errors/acknowledge', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { ids, acknowledgedBy } = req.body;
|
||||
if (!ids || !Array.isArray(ids)) {
|
||||
return res.status(400).json({ error: 'ids array is required' });
|
||||
}
|
||||
const count = await metrics.acknowledgeErrors(ids, acknowledgedBy || 'api');
|
||||
res.json({ acknowledged: count });
|
||||
} catch (error) {
|
||||
console.error('[System] Acknowledge errors error:', error);
|
||||
res.status(500).json({ error: 'Failed to acknowledge errors' });
|
||||
}
|
||||
// Stub - full sync/dlq/integrity/fix/alerts routes moved to _deprecated
|
||||
router.get('/status', (_req: Request, res: Response) => {
|
||||
res.json({
|
||||
message: 'System routes temporarily disabled - see _deprecated/system/routes',
|
||||
status: 'stub',
|
||||
});
|
||||
});
|
||||
|
||||
return router;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create Prometheus metrics endpoint (standalone)
|
||||
*/
|
||||
export function createPrometheusRouter(pool: Pool): Router {
|
||||
const router = Router();
|
||||
const metrics = new MetricsService(pool);
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
* Phase 5: Full Production Sync + Monitoring
|
||||
*/
|
||||
|
||||
export { SyncOrchestrator, type SyncStatus, type QueueDepth, type SyncRunMetrics, type OrchestratorStatus } from './sync-orchestrator';
|
||||
// SyncOrchestrator moved to _deprecated (depends on hydration module)
|
||||
export { MetricsService, ERROR_TYPES, type Metric, type MetricTimeSeries, type ErrorBucket, type ErrorType } from './metrics';
|
||||
export { DLQService, type DLQPayload, type DLQStats } from './dlq';
|
||||
export { AlertService, type SystemAlert, type AlertSummary, type AlertSeverity, type AlertStatus } from './alerts';
|
||||
|
||||
@@ -4,8 +4,9 @@
|
||||
* Exports all task handlers for the task worker.
|
||||
*/
|
||||
|
||||
export { handleProductRefresh } from './product-refresh';
|
||||
export { handleProductDiscovery } from './product-discovery';
|
||||
export { handleProductRefresh } from './product-refresh';
|
||||
export { handleStoreDiscovery } from './store-discovery';
|
||||
export { handleEntryPointDiscovery } from './entry-point-discovery';
|
||||
export { handleAnalyticsRefresh } from './analytics-refresh';
|
||||
export { handleWhoami } from './whoami';
|
||||
|
||||
@@ -25,7 +25,7 @@ export async function handleStoreDiscovery(ctx: TaskContext): Promise<TaskResult
|
||||
try {
|
||||
// Get states to discover
|
||||
const statesResult = await pool.query(`
|
||||
SELECT code FROM states WHERE active = true ORDER BY code
|
||||
SELECT code FROM states WHERE is_active = true ORDER BY code
|
||||
`);
|
||||
const stateCodes = statesResult.rows.map(r => r.code);
|
||||
|
||||
|
||||
80
backend/src/tasks/handlers/whoami.ts
Normal file
80
backend/src/tasks/handlers/whoami.ts
Normal file
@@ -0,0 +1,80 @@
|
||||
/**
|
||||
* WhoAmI Handler
|
||||
* Tests proxy connectivity and anti-detect by fetching public IP
|
||||
* Reports: proxy IP, fingerprint info, and connection status
|
||||
*/
|
||||
|
||||
import { TaskContext, TaskResult } from '../task-worker';
|
||||
import { execSync } from 'child_process';
|
||||
|
||||
export async function handleWhoami(ctx: TaskContext): Promise<TaskResult> {
|
||||
const { pool, crawlRotator } = ctx;
|
||||
|
||||
console.log('[WhoAmI] Testing proxy and anti-detect...');
|
||||
|
||||
try {
|
||||
// Use the preflight check which tests proxy + anti-detect
|
||||
if (crawlRotator) {
|
||||
const preflight = await crawlRotator.preflight();
|
||||
|
||||
if (!preflight.passed) {
|
||||
return {
|
||||
success: false,
|
||||
error: preflight.error || 'Preflight check failed',
|
||||
proxyAvailable: preflight.proxyAvailable,
|
||||
proxyConnected: preflight.proxyConnected,
|
||||
antidetectReady: preflight.antidetectReady,
|
||||
};
|
||||
}
|
||||
|
||||
console.log(`[WhoAmI] Proxy IP: ${preflight.proxyIp}, Response: ${preflight.responseTimeMs}ms`);
|
||||
console.log(`[WhoAmI] Fingerprint: ${preflight.fingerprint?.browserName}/${preflight.fingerprint?.deviceCategory}`);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
proxyIp: preflight.proxyIp,
|
||||
responseTimeMs: preflight.responseTimeMs,
|
||||
fingerprint: preflight.fingerprint,
|
||||
proxyAvailable: preflight.proxyAvailable,
|
||||
proxyConnected: preflight.proxyConnected,
|
||||
antidetectReady: preflight.antidetectReady,
|
||||
};
|
||||
}
|
||||
|
||||
// Fallback: Direct proxy test without CrawlRotator
|
||||
const proxyResult = await pool.query(`
|
||||
SELECT host, port, username, password
|
||||
FROM proxies
|
||||
WHERE is_active = true
|
||||
LIMIT 1
|
||||
`);
|
||||
|
||||
if (proxyResult.rows.length === 0) {
|
||||
return { success: false, error: 'No active proxy configured' };
|
||||
}
|
||||
|
||||
const p = proxyResult.rows[0];
|
||||
const proxyUrl = p.username
|
||||
? `http://${p.username}:${p.password}@${p.host}:${p.port}`
|
||||
: `http://${p.host}:${p.port}`;
|
||||
|
||||
console.log(`[WhoAmI] Using proxy: ${p.host}:${p.port}`);
|
||||
|
||||
// Fetch IP via proxy
|
||||
const cmd = `curl -s --proxy '${proxyUrl}' 'https://api.ipify.org?format=json'`;
|
||||
const output = execSync(cmd, { timeout: 30000 }).toString().trim();
|
||||
const data = JSON.parse(output);
|
||||
|
||||
console.log(`[WhoAmI] Proxy IP: ${data.ip}`);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
proxyIp: data.ip,
|
||||
proxyHost: p.host,
|
||||
proxyPort: p.port,
|
||||
};
|
||||
} catch (error: any) {
|
||||
console.error('[WhoAmI] Error:', error.message);
|
||||
return { success: false, error: error.message };
|
||||
}
|
||||
}
|
||||
@@ -17,8 +17,8 @@ export {
|
||||
export { TaskWorker, TaskContext, TaskResult } from './task-worker';
|
||||
|
||||
export {
|
||||
handleProductRefresh,
|
||||
handleProductDiscovery,
|
||||
handleProductRefresh,
|
||||
handleStoreDiscovery,
|
||||
handleEntryPointDiscovery,
|
||||
handleAnalyticsRefresh,
|
||||
|
||||
37
backend/src/tasks/task-pool-state.ts
Normal file
37
backend/src/tasks/task-pool-state.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
/**
|
||||
* Task Pool State
|
||||
*
|
||||
* Shared state for task pool pause/resume functionality.
|
||||
* This is kept separate to avoid circular dependencies between
|
||||
* task-service.ts and routes/tasks.ts.
|
||||
*
|
||||
* State is in-memory and resets on server restart.
|
||||
* By default, the pool is PAUSED (closed) - admin must explicitly start it.
|
||||
* This prevents workers from immediately grabbing tasks on deploy before
|
||||
* the system is ready.
|
||||
*/
|
||||
|
||||
let taskPoolPaused = true;
|
||||
|
||||
export function isTaskPoolPaused(): boolean {
|
||||
return taskPoolPaused;
|
||||
}
|
||||
|
||||
export function pauseTaskPool(): void {
|
||||
taskPoolPaused = true;
|
||||
console.log('[TaskPool] Task pool PAUSED - workers will not pick up new tasks');
|
||||
}
|
||||
|
||||
export function resumeTaskPool(): void {
|
||||
taskPoolPaused = false;
|
||||
console.log('[TaskPool] Task pool RESUMED - workers can pick up tasks');
|
||||
}
|
||||
|
||||
export function getTaskPoolStatus(): { paused: boolean; message: string } {
|
||||
return {
|
||||
paused: taskPoolPaused,
|
||||
message: taskPoolPaused
|
||||
? 'Task pool is paused - workers will not pick up new tasks'
|
||||
: 'Task pool is open - workers are picking up tasks',
|
||||
};
|
||||
}
|
||||
@@ -9,6 +9,7 @@
|
||||
*/
|
||||
|
||||
import { pool } from '../db/pool';
|
||||
import { isTaskPoolPaused } from './task-pool-state';
|
||||
|
||||
// Helper to check if a table exists
|
||||
async function tableExists(tableName: string): Promise<boolean> {
|
||||
@@ -23,14 +24,16 @@ async function tableExists(tableName: string): Promise<boolean> {
|
||||
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Task roles
|
||||
// payload_fetch: Hits Dutchie API, saves raw payload to filesystem
|
||||
// product_refresh: Reads local payload, normalizes, upserts to DB
|
||||
// product_discovery: Main product crawl handler
|
||||
// product_refresh: Legacy role (deprecated but kept for compatibility)
|
||||
export type TaskRole =
|
||||
| 'store_discovery'
|
||||
| 'entry_point_discovery'
|
||||
| 'product_discovery'
|
||||
| 'payload_fetch' // NEW: Fetches from API, saves to disk
|
||||
| 'product_refresh' // CHANGED: Now reads from local payload
|
||||
| 'analytics_refresh';
|
||||
| 'payload_fetch' // Fetches from API, saves to disk
|
||||
| 'product_refresh' // DEPRECATED: Use product_discovery instead
|
||||
| 'analytics_refresh'
|
||||
| 'whoami'; // Tests proxy + anti-detect connectivity
|
||||
|
||||
export type TaskStatus =
|
||||
| 'pending'
|
||||
@@ -49,6 +52,7 @@ export interface WorkerTask {
|
||||
platform: string | null;
|
||||
status: TaskStatus;
|
||||
priority: number;
|
||||
method: 'curl' | 'http' | null; // Transport method: curl=axios/proxy, http=Puppeteer/browser
|
||||
scheduled_for: Date | null;
|
||||
worker_id: string | null;
|
||||
claimed_at: Date | null;
|
||||
@@ -149,18 +153,34 @@ class TaskService {
|
||||
/**
|
||||
* Claim a task atomically for a worker
|
||||
* If role is null, claims ANY available task (role-agnostic worker)
|
||||
* Returns null if task pool is paused.
|
||||
*
|
||||
* @param role - Task role to claim, or null for any task
|
||||
* @param workerId - Worker ID claiming the task
|
||||
* @param curlPassed - Whether worker passed curl preflight (default true for backward compat)
|
||||
* @param httpPassed - Whether worker passed http/Puppeteer preflight (default false)
|
||||
*/
|
||||
async claimTask(role: TaskRole | null, workerId: string): Promise<WorkerTask | null> {
|
||||
async claimTask(
|
||||
role: TaskRole | null,
|
||||
workerId: string,
|
||||
curlPassed: boolean = true,
|
||||
httpPassed: boolean = false
|
||||
): Promise<WorkerTask | null> {
|
||||
// Check if task pool is paused - don't claim any tasks
|
||||
if (isTaskPoolPaused()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (role) {
|
||||
// Role-specific claiming - use the SQL function
|
||||
// Role-specific claiming - use the SQL function with preflight capabilities
|
||||
const result = await pool.query(
|
||||
`SELECT * FROM claim_task($1, $2)`,
|
||||
[role, workerId]
|
||||
`SELECT * FROM claim_task($1, $2, $3, $4)`,
|
||||
[role, workerId, curlPassed, httpPassed]
|
||||
);
|
||||
return (result.rows[0] as WorkerTask) || null;
|
||||
}
|
||||
|
||||
// Role-agnostic claiming - claim ANY pending task
|
||||
// Role-agnostic claiming - claim ANY pending task matching worker capabilities
|
||||
const result = await pool.query(`
|
||||
UPDATE worker_tasks
|
||||
SET
|
||||
@@ -171,6 +191,12 @@ class TaskService {
|
||||
SELECT id FROM worker_tasks
|
||||
WHERE status = 'pending'
|
||||
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
|
||||
-- Method compatibility: worker must have passed the required preflight
|
||||
AND (
|
||||
method IS NULL -- No preference, any worker can claim
|
||||
OR (method = 'curl' AND $2 = TRUE)
|
||||
OR (method = 'http' AND $3 = TRUE)
|
||||
)
|
||||
-- Exclude stores that already have an active task
|
||||
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
|
||||
SELECT dispensary_id FROM worker_tasks
|
||||
@@ -182,7 +208,7 @@ class TaskService {
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING *
|
||||
`, [workerId]);
|
||||
`, [workerId, curlPassed, httpPassed]);
|
||||
|
||||
return (result.rows[0] as WorkerTask) || null;
|
||||
}
|
||||
@@ -223,6 +249,24 @@ class TaskService {
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Release a claimed task back to pending (e.g., when preflight fails)
|
||||
* This allows another worker to pick it up.
|
||||
*/
|
||||
async releaseTask(taskId: number): Promise<void> {
|
||||
await pool.query(
|
||||
`UPDATE worker_tasks
|
||||
SET status = 'pending',
|
||||
worker_id = NULL,
|
||||
claimed_at = NULL,
|
||||
started_at = NULL,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1 AND status IN ('claimed', 'running')`,
|
||||
[taskId]
|
||||
);
|
||||
console.log(`[TaskService] Task ${taskId} released back to pending`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark a task as failed, with auto-retry if under max_retries
|
||||
* Returns true if task was re-queued for retry, false if permanently failed
|
||||
|
||||
@@ -51,6 +51,10 @@ import os from 'os';
|
||||
import { CrawlRotator } from '../services/crawl-rotator';
|
||||
import { setCrawlRotator } from '../platforms/dutchie';
|
||||
|
||||
// Dual-transport preflight system
|
||||
import { runCurlPreflight, CurlPreflightResult } from '../services/curl-preflight';
|
||||
import { runPuppeteerPreflightWithRetry, PuppeteerPreflightResult } from '../services/puppeteer-preflight';
|
||||
|
||||
// Task handlers by role
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: payload_fetch and product_refresh are now separate
|
||||
import { handlePayloadFetch } from './handlers/payload-fetch';
|
||||
@@ -59,16 +63,59 @@ import { handleProductDiscovery } from './handlers/product-discovery';
|
||||
import { handleStoreDiscovery } from './handlers/store-discovery';
|
||||
import { handleEntryPointDiscovery } from './handlers/entry-point-discovery';
|
||||
import { handleAnalyticsRefresh } from './handlers/analytics-refresh';
|
||||
import { handleWhoami } from './handlers/whoami';
|
||||
|
||||
const POLL_INTERVAL_MS = parseInt(process.env.POLL_INTERVAL_MS || '5000');
|
||||
const HEARTBEAT_INTERVAL_MS = parseInt(process.env.HEARTBEAT_INTERVAL_MS || '30000');
|
||||
const API_BASE_URL = process.env.API_BASE_URL || 'http://localhost:3010';
|
||||
|
||||
// =============================================================================
|
||||
// CONCURRENT TASK PROCESSING SETTINGS
|
||||
// =============================================================================
|
||||
// Workers can process multiple tasks simultaneously using async I/O.
|
||||
// This improves throughput for I/O-bound tasks (network calls, DB queries).
|
||||
//
|
||||
// Resource thresholds trigger "backoff" - the worker stops claiming new tasks
|
||||
// but continues processing existing ones until resources return to normal.
|
||||
//
|
||||
// See: docs/WORKER_TASK_ARCHITECTURE.md#concurrent-task-processing
|
||||
// =============================================================================
|
||||
|
||||
// Maximum number of tasks this worker will run concurrently
|
||||
// Tune based on workload: I/O-bound tasks benefit from higher concurrency
|
||||
const MAX_CONCURRENT_TASKS = parseInt(process.env.MAX_CONCURRENT_TASKS || '3');
|
||||
|
||||
// When heap memory usage exceeds this threshold (as decimal 0.0-1.0), stop claiming new tasks
|
||||
// Default 85% - gives headroom before OOM
|
||||
const MEMORY_BACKOFF_THRESHOLD = parseFloat(process.env.MEMORY_BACKOFF_THRESHOLD || '0.85');
|
||||
|
||||
// Parse max heap size from NODE_OPTIONS (--max-old-space-size=1500)
|
||||
// This is used as the denominator for memory percentage calculation
|
||||
// V8's heapTotal is dynamic and stays small when idle, causing false high percentages
|
||||
function getMaxHeapSizeMb(): number {
|
||||
const nodeOptions = process.env.NODE_OPTIONS || '';
|
||||
const match = nodeOptions.match(/--max-old-space-size=(\d+)/);
|
||||
if (match) {
|
||||
return parseInt(match[1], 10);
|
||||
}
|
||||
// Fallback: use 512MB if not specified
|
||||
return 512;
|
||||
}
|
||||
const MAX_HEAP_SIZE_MB = getMaxHeapSizeMb();
|
||||
|
||||
// When CPU usage exceeds this threshold (as decimal 0.0-1.0), stop claiming new tasks
|
||||
// Default 90% - allows some burst capacity
|
||||
const CPU_BACKOFF_THRESHOLD = parseFloat(process.env.CPU_BACKOFF_THRESHOLD || '0.90');
|
||||
|
||||
// How long to wait (ms) when in backoff state before rechecking resources
|
||||
const BACKOFF_DURATION_MS = parseInt(process.env.BACKOFF_DURATION_MS || '10000');
|
||||
|
||||
export interface TaskContext {
|
||||
pool: Pool;
|
||||
workerId: string;
|
||||
task: WorkerTask;
|
||||
heartbeat: () => Promise<void>;
|
||||
crawlRotator?: CrawlRotator;
|
||||
}
|
||||
|
||||
export interface TaskResult {
|
||||
@@ -83,17 +130,38 @@ export interface TaskResult {
|
||||
type TaskHandler = (ctx: TaskContext) => Promise<TaskResult>;
|
||||
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Handler registry
|
||||
// payload_fetch: Fetches from Dutchie API, saves to disk, chains to product_refresh
|
||||
// payload_fetch: Fetches from Dutchie API, saves to disk
|
||||
// product_refresh: Reads local payload, normalizes, upserts to DB
|
||||
// product_discovery: Main handler for product crawling
|
||||
const TASK_HANDLERS: Record<TaskRole, TaskHandler> = {
|
||||
payload_fetch: handlePayloadFetch, // NEW: API fetch -> disk
|
||||
product_refresh: handleProductRefresh, // CHANGED: disk -> DB
|
||||
payload_fetch: handlePayloadFetch, // API fetch -> disk
|
||||
product_refresh: handleProductRefresh, // disk -> DB
|
||||
product_discovery: handleProductDiscovery,
|
||||
store_discovery: handleStoreDiscovery,
|
||||
entry_point_discovery: handleEntryPointDiscovery,
|
||||
analytics_refresh: handleAnalyticsRefresh,
|
||||
whoami: handleWhoami, // Tests proxy + anti-detect
|
||||
};
|
||||
|
||||
/**
|
||||
* Resource usage stats reported to the registry and used for backoff decisions.
|
||||
* These values are included in worker heartbeats and displayed in the UI.
|
||||
*/
|
||||
interface ResourceStats {
|
||||
/** Current heap memory usage as decimal (0.0 to 1.0) */
|
||||
memoryPercent: number;
|
||||
/** Current heap used in MB */
|
||||
memoryMb: number;
|
||||
/** Total heap available in MB */
|
||||
memoryTotalMb: number;
|
||||
/** CPU usage percentage since last check (0 to 100) */
|
||||
cpuPercent: number;
|
||||
/** True if worker is currently in backoff state */
|
||||
isBackingOff: boolean;
|
||||
/** Reason for backoff (e.g., "Memory at 87.3% (threshold: 85%)") */
|
||||
backoffReason: string | null;
|
||||
}
|
||||
|
||||
export class TaskWorker {
|
||||
private pool: Pool;
|
||||
private workerId: string;
|
||||
@@ -102,14 +170,125 @@ export class TaskWorker {
|
||||
private isRunning: boolean = false;
|
||||
private heartbeatInterval: NodeJS.Timeout | null = null;
|
||||
private registryHeartbeatInterval: NodeJS.Timeout | null = null;
|
||||
private currentTask: WorkerTask | null = null;
|
||||
private crawlRotator: CrawlRotator;
|
||||
|
||||
// ==========================================================================
|
||||
// CONCURRENT TASK TRACKING
|
||||
// ==========================================================================
|
||||
// activeTasks: Map of task ID -> task object for all currently running tasks
|
||||
// taskPromises: Map of task ID -> Promise for cleanup when task completes
|
||||
// maxConcurrentTasks: How many tasks this worker will run in parallel
|
||||
// ==========================================================================
|
||||
private activeTasks: Map<number, WorkerTask> = new Map();
|
||||
private taskPromises: Map<number, Promise<void>> = new Map();
|
||||
private maxConcurrentTasks: number = MAX_CONCURRENT_TASKS;
|
||||
|
||||
// ==========================================================================
|
||||
// RESOURCE MONITORING FOR BACKOFF
|
||||
// ==========================================================================
|
||||
// CPU tracking uses differential measurement - we track last values and
|
||||
// calculate percentage based on elapsed time since last check.
|
||||
// ==========================================================================
|
||||
private lastCpuUsage: { user: number; system: number } = { user: 0, system: 0 };
|
||||
private lastCpuCheck: number = Date.now();
|
||||
private isBackingOff: boolean = false;
|
||||
private backoffReason: string | null = null;
|
||||
|
||||
// ==========================================================================
|
||||
// DUAL-TRANSPORT PREFLIGHT STATUS
|
||||
// ==========================================================================
|
||||
// Workers run BOTH preflights on startup:
|
||||
// - curl: axios/proxy transport - fast, for simple API calls
|
||||
// - http: Puppeteer/browser transport - anti-detect, for Dutchie GraphQL
|
||||
//
|
||||
// Task claiming checks method compatibility - worker must have passed
|
||||
// the preflight for the task's required method.
|
||||
// ==========================================================================
|
||||
private preflightCurlPassed: boolean = false;
|
||||
private preflightHttpPassed: boolean = false;
|
||||
private preflightCurlResult: CurlPreflightResult | null = null;
|
||||
private preflightHttpResult: PuppeteerPreflightResult | null = null;
|
||||
|
||||
constructor(role: TaskRole | null = null, workerId?: string) {
|
||||
this.pool = getPool();
|
||||
this.role = role;
|
||||
this.workerId = workerId || `worker-${uuidv4().slice(0, 8)}`;
|
||||
this.crawlRotator = new CrawlRotator(this.pool);
|
||||
|
||||
// Initialize CPU tracking
|
||||
const cpuUsage = process.cpuUsage();
|
||||
this.lastCpuUsage = { user: cpuUsage.user, system: cpuUsage.system };
|
||||
this.lastCpuCheck = Date.now();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current resource usage
|
||||
* Memory percentage is calculated against MAX_HEAP_SIZE_MB (from --max-old-space-size)
|
||||
* NOT against V8's dynamic heapTotal which stays small when idle
|
||||
*/
|
||||
private getResourceStats(): ResourceStats {
|
||||
const memUsage = process.memoryUsage();
|
||||
const heapUsedMb = memUsage.heapUsed / 1024 / 1024;
|
||||
// Use MAX_HEAP_SIZE_MB as ceiling, not dynamic heapTotal
|
||||
// V8's heapTotal stays small when idle (e.g., 36MB) causing false 95%+ readings
|
||||
// With --max-old-space-size=1500, we should calculate against 1500MB
|
||||
const memoryPercent = heapUsedMb / MAX_HEAP_SIZE_MB;
|
||||
|
||||
// Calculate CPU usage since last check
|
||||
const cpuUsage = process.cpuUsage();
|
||||
const now = Date.now();
|
||||
const elapsed = now - this.lastCpuCheck;
|
||||
|
||||
let cpuPercent = 0;
|
||||
if (elapsed > 0) {
|
||||
const userDiff = (cpuUsage.user - this.lastCpuUsage.user) / 1000; // microseconds to ms
|
||||
const systemDiff = (cpuUsage.system - this.lastCpuUsage.system) / 1000;
|
||||
cpuPercent = ((userDiff + systemDiff) / elapsed) * 100;
|
||||
}
|
||||
|
||||
// Update last values
|
||||
this.lastCpuUsage = { user: cpuUsage.user, system: cpuUsage.system };
|
||||
this.lastCpuCheck = now;
|
||||
|
||||
return {
|
||||
memoryPercent,
|
||||
memoryMb: Math.round(heapUsedMb),
|
||||
memoryTotalMb: MAX_HEAP_SIZE_MB, // Use max-old-space-size, not dynamic heapTotal
|
||||
cpuPercent: Math.min(100, cpuPercent), // Cap at 100%
|
||||
isBackingOff: this.isBackingOff,
|
||||
backoffReason: this.backoffReason,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if we should back off from taking new tasks
|
||||
*/
|
||||
private shouldBackOff(): { backoff: boolean; reason: string | null } {
|
||||
const stats = this.getResourceStats();
|
||||
|
||||
if (stats.memoryPercent > MEMORY_BACKOFF_THRESHOLD) {
|
||||
return { backoff: true, reason: `Memory at ${(stats.memoryPercent * 100).toFixed(1)}% (threshold: ${MEMORY_BACKOFF_THRESHOLD * 100}%)` };
|
||||
}
|
||||
|
||||
if (stats.cpuPercent > CPU_BACKOFF_THRESHOLD * 100) {
|
||||
return { backoff: true, reason: `CPU at ${stats.cpuPercent.toFixed(1)}% (threshold: ${CPU_BACKOFF_THRESHOLD * 100}%)` };
|
||||
}
|
||||
|
||||
return { backoff: false, reason: null };
|
||||
}
|
||||
|
||||
/**
|
||||
* Get count of currently running tasks
|
||||
*/
|
||||
get activeTaskCount(): number {
|
||||
return this.activeTasks.size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if we can accept more tasks
|
||||
*/
|
||||
private canAcceptMoreTasks(): boolean {
|
||||
return this.activeTasks.size < this.maxConcurrentTasks;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -117,40 +296,172 @@ export class TaskWorker {
|
||||
* Called once on worker startup before processing any tasks.
|
||||
*
|
||||
* IMPORTANT: Proxies are REQUIRED. Workers will wait until proxies are available.
|
||||
* Workers listen for PostgreSQL NOTIFY 'proxy_added' to wake up immediately when proxies are added.
|
||||
*/
|
||||
private async initializeStealth(): Promise<void> {
|
||||
const MAX_WAIT_MINUTES = 60;
|
||||
const RETRY_INTERVAL_MS = 30000; // 30 seconds
|
||||
const maxAttempts = (MAX_WAIT_MINUTES * 60 * 1000) / RETRY_INTERVAL_MS;
|
||||
const POLL_INTERVAL_MS = 30000; // 30 seconds fallback polling
|
||||
const maxAttempts = (MAX_WAIT_MINUTES * 60 * 1000) / POLL_INTERVAL_MS;
|
||||
let attempts = 0;
|
||||
let notifyClient: any = null;
|
||||
|
||||
while (attempts < maxAttempts) {
|
||||
try {
|
||||
// Load proxies from database
|
||||
await this.crawlRotator.initialize();
|
||||
|
||||
const stats = this.crawlRotator.proxy.getStats();
|
||||
if (stats.activeProxies > 0) {
|
||||
console.log(`[TaskWorker] Loaded ${stats.activeProxies} proxies (${stats.avgSuccessRate.toFixed(1)}% avg success rate)`);
|
||||
|
||||
// Wire rotator to Dutchie client - proxies will be used for ALL requests
|
||||
setCrawlRotator(this.crawlRotator);
|
||||
|
||||
console.log(`[TaskWorker] Stealth initialized: ${this.crawlRotator.userAgent.getCount()} fingerprints, proxy REQUIRED for all requests`);
|
||||
return;
|
||||
}
|
||||
|
||||
attempts++;
|
||||
console.log(`[TaskWorker] No active proxies available (attempt ${attempts}). Waiting ${RETRY_INTERVAL_MS / 1000}s for proxies to be added...`);
|
||||
await this.sleep(RETRY_INTERVAL_MS);
|
||||
} catch (error: any) {
|
||||
attempts++;
|
||||
console.log(`[TaskWorker] Error loading proxies (attempt ${attempts}): ${error.message}. Retrying in ${RETRY_INTERVAL_MS / 1000}s...`);
|
||||
await this.sleep(RETRY_INTERVAL_MS);
|
||||
}
|
||||
// Set up PostgreSQL LISTEN for proxy notifications
|
||||
try {
|
||||
notifyClient = await this.pool.connect();
|
||||
await notifyClient.query('LISTEN proxy_added');
|
||||
console.log(`[TaskWorker] Listening for proxy_added notifications...`);
|
||||
} catch (err: any) {
|
||||
console.log(`[TaskWorker] Could not set up LISTEN (will poll): ${err.message}`);
|
||||
}
|
||||
|
||||
throw new Error(`No active proxies available after waiting ${MAX_WAIT_MINUTES} minutes. Add proxies to the database.`);
|
||||
// Create a promise that resolves when notified
|
||||
let notifyResolve: (() => void) | null = null;
|
||||
if (notifyClient) {
|
||||
notifyClient.on('notification', (msg: any) => {
|
||||
if (msg.channel === 'proxy_added') {
|
||||
console.log(`[TaskWorker] Received proxy_added notification!`);
|
||||
if (notifyResolve) notifyResolve();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
while (attempts < maxAttempts) {
|
||||
try {
|
||||
// Load proxies from database
|
||||
await this.crawlRotator.initialize();
|
||||
|
||||
const stats = this.crawlRotator.proxy.getStats();
|
||||
if (stats.activeProxies > 0) {
|
||||
console.log(`[TaskWorker] Loaded ${stats.activeProxies} proxies (${stats.avgSuccessRate.toFixed(1)}% avg success rate)`);
|
||||
|
||||
// Wire rotator to Dutchie client - proxies will be used for ALL requests
|
||||
setCrawlRotator(this.crawlRotator);
|
||||
|
||||
console.log(`[TaskWorker] Stealth initialized: ${this.crawlRotator.userAgent.getCount()} fingerprints, proxy REQUIRED for all requests`);
|
||||
return;
|
||||
}
|
||||
|
||||
attempts++;
|
||||
console.log(`[TaskWorker] No active proxies available (attempt ${attempts}). Waiting for proxies...`);
|
||||
|
||||
// Wait for either notification or timeout
|
||||
await new Promise<void>((resolve) => {
|
||||
notifyResolve = resolve;
|
||||
setTimeout(resolve, POLL_INTERVAL_MS);
|
||||
});
|
||||
} catch (error: any) {
|
||||
attempts++;
|
||||
console.log(`[TaskWorker] Error loading proxies (attempt ${attempts}): ${error.message}. Retrying...`);
|
||||
await this.sleep(POLL_INTERVAL_MS);
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`No active proxies available after waiting ${MAX_WAIT_MINUTES} minutes. Add proxies to the database.`);
|
||||
} finally {
|
||||
// Clean up LISTEN connection
|
||||
if (notifyClient) {
|
||||
try {
|
||||
await notifyClient.query('UNLISTEN proxy_added');
|
||||
notifyClient.release();
|
||||
} catch {
|
||||
// Ignore cleanup errors
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run dual-transport preflights on startup
|
||||
* Tests both curl (axios/proxy) and http (Puppeteer/browser) transport methods.
|
||||
* Results are reported to worker_registry and used for task claiming.
|
||||
*
|
||||
* NOTE: All current tasks require 'http' method, so http preflight must pass
|
||||
* for the worker to claim any tasks. Curl preflight is for future use.
|
||||
*/
|
||||
private async runDualPreflights(): Promise<void> {
|
||||
console.log(`[TaskWorker] Running dual-transport preflights...`);
|
||||
|
||||
// Run both preflights in parallel for efficiency
|
||||
const [curlResult, httpResult] = await Promise.all([
|
||||
runCurlPreflight(this.crawlRotator).catch((err): CurlPreflightResult => ({
|
||||
method: 'curl',
|
||||
passed: false,
|
||||
proxyAvailable: false,
|
||||
proxyConnected: false,
|
||||
antidetectReady: false,
|
||||
proxyIp: null,
|
||||
fingerprint: null,
|
||||
error: `Preflight error: ${err.message}`,
|
||||
responseTimeMs: null,
|
||||
})),
|
||||
runPuppeteerPreflightWithRetry(this.crawlRotator, 1).catch((err): PuppeteerPreflightResult => ({
|
||||
method: 'http',
|
||||
passed: false,
|
||||
proxyAvailable: false,
|
||||
proxyConnected: false,
|
||||
antidetectReady: false,
|
||||
proxyIp: null,
|
||||
fingerprint: null,
|
||||
error: `Preflight error: ${err.message}`,
|
||||
responseTimeMs: null,
|
||||
productsReturned: 0,
|
||||
})),
|
||||
]);
|
||||
|
||||
// Store results
|
||||
this.preflightCurlResult = curlResult;
|
||||
this.preflightHttpResult = httpResult;
|
||||
this.preflightCurlPassed = curlResult.passed;
|
||||
this.preflightHttpPassed = httpResult.passed;
|
||||
|
||||
// Log results
|
||||
console.log(`[TaskWorker] CURL preflight: ${curlResult.passed ? 'PASSED' : 'FAILED'}${curlResult.error ? ` - ${curlResult.error}` : ''}`);
|
||||
console.log(`[TaskWorker] HTTP preflight: ${httpResult.passed ? 'PASSED' : 'FAILED'}${httpResult.error ? ` - ${httpResult.error}` : ''}`);
|
||||
|
||||
if (httpResult.passed && httpResult.productsReturned) {
|
||||
console.log(`[TaskWorker] HTTP preflight returned ${httpResult.productsReturned} products from test store`);
|
||||
}
|
||||
|
||||
// Report to worker_registry via API
|
||||
await this.reportPreflightStatus();
|
||||
|
||||
// Since all tasks require 'http', warn if http preflight failed
|
||||
if (!this.preflightHttpPassed) {
|
||||
console.warn(`[TaskWorker] WARNING: HTTP preflight failed - this worker cannot claim any tasks!`);
|
||||
console.warn(`[TaskWorker] Error: ${httpResult.error}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Report preflight status to worker_registry
|
||||
*/
|
||||
private async reportPreflightStatus(): Promise<void> {
|
||||
try {
|
||||
// Update worker_registry directly via SQL (more reliable than API)
|
||||
await this.pool.query(`
|
||||
SELECT update_worker_preflight($1, 'curl', $2, $3, $4)
|
||||
`, [
|
||||
this.workerId,
|
||||
this.preflightCurlPassed ? 'passed' : 'failed',
|
||||
this.preflightCurlResult?.responseTimeMs || null,
|
||||
this.preflightCurlResult?.error || null,
|
||||
]);
|
||||
|
||||
await this.pool.query(`
|
||||
SELECT update_worker_preflight($1, 'http', $2, $3, $4)
|
||||
`, [
|
||||
this.workerId,
|
||||
this.preflightHttpPassed ? 'passed' : 'failed',
|
||||
this.preflightHttpResult?.responseTimeMs || null,
|
||||
this.preflightHttpResult?.error || null,
|
||||
]);
|
||||
|
||||
console.log(`[TaskWorker] Preflight status reported to worker_registry`);
|
||||
} catch (err: any) {
|
||||
// Non-fatal - worker can still function
|
||||
console.warn(`[TaskWorker] Could not report preflight status: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -213,21 +524,32 @@ export class TaskWorker {
|
||||
const memUsage = process.memoryUsage();
|
||||
const cpuUsage = process.cpuUsage();
|
||||
const proxyLocation = this.crawlRotator.getProxyLocation();
|
||||
const resourceStats = this.getResourceStats();
|
||||
|
||||
// Get array of active task IDs
|
||||
const activeTaskIds = Array.from(this.activeTasks.keys());
|
||||
|
||||
await fetch(`${API_BASE_URL}/api/worker-registry/heartbeat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
worker_id: this.workerId,
|
||||
current_task_id: this.currentTask?.id || null,
|
||||
status: this.currentTask ? 'active' : 'idle',
|
||||
current_task_id: activeTaskIds[0] || null, // Primary task for backwards compat
|
||||
current_task_ids: activeTaskIds, // All active tasks
|
||||
active_task_count: this.activeTasks.size,
|
||||
max_concurrent_tasks: this.maxConcurrentTasks,
|
||||
status: this.activeTasks.size > 0 ? 'active' : 'idle',
|
||||
resources: {
|
||||
memory_mb: Math.round(memUsage.heapUsed / 1024 / 1024),
|
||||
memory_total_mb: Math.round(memUsage.heapTotal / 1024 / 1024),
|
||||
memory_rss_mb: Math.round(memUsage.rss / 1024 / 1024),
|
||||
memory_percent: Math.round(resourceStats.memoryPercent * 100),
|
||||
cpu_user_ms: Math.round(cpuUsage.user / 1000),
|
||||
cpu_system_ms: Math.round(cpuUsage.system / 1000),
|
||||
cpu_percent: Math.round(resourceStats.cpuPercent),
|
||||
proxy_location: proxyLocation,
|
||||
is_backing_off: this.isBackingOff,
|
||||
backoff_reason: this.backoffReason,
|
||||
}
|
||||
})
|
||||
});
|
||||
@@ -285,24 +607,119 @@ export class TaskWorker {
|
||||
// Register with the API to get a friendly name
|
||||
await this.register();
|
||||
|
||||
// Run dual-transport preflights
|
||||
await this.runDualPreflights();
|
||||
|
||||
// Start registry heartbeat
|
||||
this.startRegistryHeartbeat();
|
||||
|
||||
const roleMsg = this.role ? `for role: ${this.role}` : '(role-agnostic - any task)';
|
||||
console.log(`[TaskWorker] ${this.friendlyName} starting ${roleMsg}`);
|
||||
const preflightMsg = `curl=${this.preflightCurlPassed ? '✓' : '✗'} http=${this.preflightHttpPassed ? '✓' : '✗'}`;
|
||||
console.log(`[TaskWorker] ${this.friendlyName} starting ${roleMsg} (${preflightMsg}, max ${this.maxConcurrentTasks} concurrent tasks)`);
|
||||
|
||||
while (this.isRunning) {
|
||||
try {
|
||||
await this.processNextTask();
|
||||
await this.mainLoop();
|
||||
} catch (error: any) {
|
||||
console.error(`[TaskWorker] Loop error:`, error.message);
|
||||
await this.sleep(POLL_INTERVAL_MS);
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for any remaining tasks to complete
|
||||
if (this.taskPromises.size > 0) {
|
||||
console.log(`[TaskWorker] Waiting for ${this.taskPromises.size} active tasks to complete...`);
|
||||
await Promise.allSettled(this.taskPromises.values());
|
||||
}
|
||||
|
||||
console.log(`[TaskWorker] Worker ${this.workerId} stopped`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Main loop - tries to fill up to maxConcurrentTasks
|
||||
*/
|
||||
private async mainLoop(): Promise<void> {
|
||||
// Check resource usage and backoff if needed
|
||||
const { backoff, reason } = this.shouldBackOff();
|
||||
if (backoff) {
|
||||
if (!this.isBackingOff) {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} backing off: ${reason}`);
|
||||
}
|
||||
this.isBackingOff = true;
|
||||
this.backoffReason = reason;
|
||||
await this.sleep(BACKOFF_DURATION_MS);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clear backoff state
|
||||
if (this.isBackingOff) {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} resuming normal operation`);
|
||||
this.isBackingOff = false;
|
||||
this.backoffReason = null;
|
||||
}
|
||||
|
||||
// Check for decommission signal
|
||||
const shouldDecommission = await this.checkDecommission();
|
||||
if (shouldDecommission) {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} received decommission signal - waiting for ${this.activeTasks.size} tasks to complete`);
|
||||
// Stop accepting new tasks, wait for current to finish
|
||||
this.isRunning = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Try to claim more tasks if we have capacity
|
||||
if (this.canAcceptMoreTasks()) {
|
||||
// Pass preflight capabilities to only claim compatible tasks
|
||||
const task = await taskService.claimTask(
|
||||
this.role,
|
||||
this.workerId,
|
||||
this.preflightCurlPassed,
|
||||
this.preflightHttpPassed
|
||||
);
|
||||
|
||||
if (task) {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} claimed task ${task.id} (${task.role}) [${this.activeTasks.size + 1}/${this.maxConcurrentTasks}]`);
|
||||
|
||||
// =================================================================
|
||||
// PREFLIGHT CHECK - CRITICAL: Worker MUST pass before task execution
|
||||
// Verifies: 1) Proxy available 2) Proxy connected 3) Anti-detect ready
|
||||
// =================================================================
|
||||
const preflight = await this.crawlRotator.preflight();
|
||||
if (!preflight.passed) {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} PREFLIGHT FAILED for task ${task.id}: ${preflight.error}`);
|
||||
console.log(`[TaskWorker] Releasing task ${task.id} back to pending - worker cannot proceed without proxy/anti-detect`);
|
||||
|
||||
// Release task back to pending so another worker can pick it up
|
||||
await taskService.releaseTask(task.id);
|
||||
|
||||
// Wait before trying again - give proxies time to recover
|
||||
await this.sleep(30000); // 30 second wait on preflight failure
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`[TaskWorker] ${this.friendlyName} preflight PASSED for task ${task.id} (proxy: ${preflight.proxyIp}, ${preflight.responseTimeMs}ms)`);
|
||||
|
||||
this.activeTasks.set(task.id, task);
|
||||
|
||||
// Start task in background (don't await)
|
||||
const taskPromise = this.executeTask(task);
|
||||
this.taskPromises.set(task.id, taskPromise);
|
||||
|
||||
// Clean up when done
|
||||
taskPromise.finally(() => {
|
||||
this.activeTasks.delete(task.id);
|
||||
this.taskPromises.delete(task.id);
|
||||
});
|
||||
|
||||
// Immediately try to claim more tasks (don't wait for poll interval)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// No task claimed or at capacity - wait before next poll
|
||||
await this.sleep(POLL_INTERVAL_MS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the worker
|
||||
*/
|
||||
@@ -315,23 +732,10 @@ export class TaskWorker {
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the next available task
|
||||
* Execute a single task (runs concurrently with other tasks)
|
||||
*/
|
||||
private async processNextTask(): Promise<void> {
|
||||
// Try to claim a task
|
||||
const task = await taskService.claimTask(this.role, this.workerId);
|
||||
|
||||
if (!task) {
|
||||
// No tasks available, wait and retry
|
||||
await this.sleep(POLL_INTERVAL_MS);
|
||||
return;
|
||||
}
|
||||
|
||||
this.currentTask = task;
|
||||
console.log(`[TaskWorker] Claimed task ${task.id} (${task.role}) for dispensary ${task.dispensary_id || 'N/A'}`);
|
||||
|
||||
// Start heartbeat
|
||||
this.startHeartbeat(task.id);
|
||||
private async executeTask(task: WorkerTask): Promise<void> {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} starting task ${task.id} (${task.role}) for dispensary ${task.dispensary_id || 'N/A'}`);
|
||||
|
||||
try {
|
||||
// Mark as running
|
||||
@@ -351,6 +755,7 @@ export class TaskWorker {
|
||||
heartbeat: async () => {
|
||||
await taskService.heartbeat(task.id);
|
||||
},
|
||||
crawlRotator: this.crawlRotator,
|
||||
};
|
||||
|
||||
// Execute the task
|
||||
@@ -360,7 +765,7 @@ export class TaskWorker {
|
||||
// Mark as completed
|
||||
await taskService.completeTask(task.id, result);
|
||||
await this.reportTaskCompletion(true);
|
||||
console.log(`[TaskWorker] ${this.friendlyName} completed task ${task.id}`);
|
||||
console.log(`[TaskWorker] ${this.friendlyName} completed task ${task.id} [${this.activeTasks.size}/${this.maxConcurrentTasks} active]`);
|
||||
|
||||
// Chain next task if applicable
|
||||
const chainedTask = await taskService.chainNextTask({
|
||||
@@ -382,9 +787,35 @@ export class TaskWorker {
|
||||
await taskService.failTask(task.id, error.message);
|
||||
await this.reportTaskCompletion(false);
|
||||
console.error(`[TaskWorker] ${this.friendlyName} task ${task.id} error:`, error.message);
|
||||
} finally {
|
||||
this.stopHeartbeat();
|
||||
this.currentTask = null;
|
||||
}
|
||||
// Note: cleanup (removing from activeTasks) is handled in mainLoop's finally block
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if this worker has been flagged for decommission
|
||||
* Returns true if worker should stop after current task
|
||||
*/
|
||||
private async checkDecommission(): Promise<boolean> {
|
||||
try {
|
||||
// Check worker_registry for decommission flag
|
||||
const result = await this.pool.query(
|
||||
`SELECT decommission_requested, decommission_reason
|
||||
FROM worker_registry
|
||||
WHERE worker_id = $1`,
|
||||
[this.workerId]
|
||||
);
|
||||
|
||||
if (result.rows.length > 0 && result.rows[0].decommission_requested) {
|
||||
const reason = result.rows[0].decommission_reason || 'No reason provided';
|
||||
console.log(`[TaskWorker] Decommission requested: ${reason}`);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
} catch (error: any) {
|
||||
// If we can't check, continue running
|
||||
console.warn(`[TaskWorker] Could not check decommission status: ${error.message}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -421,12 +852,29 @@ export class TaskWorker {
|
||||
/**
|
||||
* Get worker info
|
||||
*/
|
||||
getInfo(): { workerId: string; role: TaskRole | null; isRunning: boolean; currentTaskId: number | null } {
|
||||
getInfo(): {
|
||||
workerId: string;
|
||||
role: TaskRole | null;
|
||||
isRunning: boolean;
|
||||
activeTaskIds: number[];
|
||||
activeTaskCount: number;
|
||||
maxConcurrentTasks: number;
|
||||
isBackingOff: boolean;
|
||||
backoffReason: string | null;
|
||||
preflightCurlPassed: boolean;
|
||||
preflightHttpPassed: boolean;
|
||||
} {
|
||||
return {
|
||||
workerId: this.workerId,
|
||||
role: this.role,
|
||||
isRunning: this.isRunning,
|
||||
currentTaskId: this.currentTask?.id || null,
|
||||
activeTaskIds: Array.from(this.activeTasks.keys()),
|
||||
activeTaskCount: this.activeTasks.size,
|
||||
maxConcurrentTasks: this.maxConcurrentTasks,
|
||||
isBackingOff: this.isBackingOff,
|
||||
backoffReason: this.backoffReason,
|
||||
preflightCurlPassed: this.preflightCurlPassed,
|
||||
preflightHttpPassed: this.preflightHttpPassed,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -443,8 +891,8 @@ async function main(): Promise<void> {
|
||||
'store_discovery',
|
||||
'entry_point_discovery',
|
||||
'product_discovery',
|
||||
'payload_fetch', // NEW: Fetches from API, saves to disk
|
||||
'product_refresh', // CHANGED: Reads from disk, processes to DB
|
||||
'payload_fetch', // Fetches from API, saves to disk
|
||||
'product_refresh', // Reads from disk, processes to DB
|
||||
'analytics_refresh',
|
||||
];
|
||||
|
||||
|
||||
180
backend/test-intercept.js
Normal file
180
backend/test-intercept.js
Normal file
@@ -0,0 +1,180 @@
|
||||
/**
|
||||
* Stealth Browser Payload Capture - Direct GraphQL Injection
|
||||
*
|
||||
* Uses the browser session to make GraphQL requests that look organic.
|
||||
* Adds proper headers matching what Dutchie's frontend sends.
|
||||
*/
|
||||
|
||||
const puppeteer = require('puppeteer-extra');
|
||||
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
|
||||
const fs = require('fs');
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
async function capturePayload(config) {
|
||||
const {
|
||||
dispensaryId = null,
|
||||
platformId,
|
||||
cName,
|
||||
outputPath = `/tmp/payload_${cName}_${Date.now()}.json`,
|
||||
} = config;
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: 'new',
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
|
||||
// Establish session by visiting the embedded menu
|
||||
const embedUrl = `https://dutchie.com/embedded-menu/${cName}?menuType=rec`;
|
||||
console.log(`[Capture] Establishing session at ${embedUrl}...`);
|
||||
|
||||
await page.goto(embedUrl, {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
|
||||
console.log('[Capture] Session established, fetching ALL products...');
|
||||
|
||||
// Fetch all products using GET requests with proper headers
|
||||
const result = await page.evaluate(async (platformId, cName) => {
|
||||
const allProducts = [];
|
||||
const logs = [];
|
||||
let pageNum = 0;
|
||||
const perPage = 100;
|
||||
let totalCount = 0;
|
||||
const sessionId = 'browser-session-' + Date.now();
|
||||
|
||||
try {
|
||||
while (pageNum < 30) { // Max 30 pages = 3000 products
|
||||
const variables = {
|
||||
includeEnterpriseSpecials: false,
|
||||
productsFilter: {
|
||||
dispensaryId: platformId,
|
||||
pricingType: 'rec',
|
||||
Status: 'Active', // 'Active' for in-stock products per CLAUDE.md
|
||||
types: [],
|
||||
useCache: true,
|
||||
isDefaultSort: true,
|
||||
sortBy: 'popularSortIdx',
|
||||
sortDirection: 1,
|
||||
bypassOnlineThresholds: true,
|
||||
isKioskMenu: false,
|
||||
removeProductsBelowOptionThresholds: false,
|
||||
},
|
||||
page: pageNum,
|
||||
perPage: perPage,
|
||||
};
|
||||
|
||||
const extensions = {
|
||||
persistedQuery: {
|
||||
version: 1,
|
||||
sha256Hash: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0'
|
||||
}
|
||||
};
|
||||
|
||||
// Build GET URL like the browser does
|
||||
const qs = new URLSearchParams({
|
||||
operationName: 'FilteredProducts',
|
||||
variables: JSON.stringify(variables),
|
||||
extensions: JSON.stringify(extensions)
|
||||
});
|
||||
const url = `https://dutchie.com/api-3/graphql?${qs.toString()}`;
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'Accept': 'application/json',
|
||||
'content-type': 'application/json',
|
||||
'x-dutchie-session': sessionId,
|
||||
'apollographql-client-name': 'Marketplace (production)',
|
||||
},
|
||||
credentials: 'include'
|
||||
});
|
||||
|
||||
logs.push(`Page ${pageNum}: HTTP ${response.status}`);
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text();
|
||||
logs.push(`HTTP error: ${response.status} - ${text.slice(0, 200)}`);
|
||||
break;
|
||||
}
|
||||
|
||||
const json = await response.json();
|
||||
|
||||
if (json.errors) {
|
||||
logs.push(`GraphQL error: ${JSON.stringify(json.errors).slice(0, 200)}`);
|
||||
break;
|
||||
}
|
||||
|
||||
const data = json?.data?.filteredProducts;
|
||||
if (!data || !data.products) {
|
||||
logs.push('No products in response');
|
||||
break;
|
||||
}
|
||||
|
||||
const products = data.products;
|
||||
allProducts.push(...products);
|
||||
|
||||
if (pageNum === 0) {
|
||||
totalCount = data.queryInfo?.totalCount || 0;
|
||||
logs.push(`Total reported: ${totalCount}`);
|
||||
}
|
||||
|
||||
logs.push(`Got ${products.length} products (total: ${allProducts.length}/${totalCount})`);
|
||||
|
||||
if (allProducts.length >= totalCount || products.length < perPage) {
|
||||
break;
|
||||
}
|
||||
|
||||
pageNum++;
|
||||
|
||||
// Small delay between pages to be polite
|
||||
await new Promise(r => setTimeout(r, 200));
|
||||
}
|
||||
} catch (err) {
|
||||
logs.push(`Error: ${err.message}`);
|
||||
}
|
||||
|
||||
return { products: allProducts, totalCount, logs };
|
||||
}, platformId, cName);
|
||||
|
||||
await browser.close();
|
||||
|
||||
// Print logs from browser context
|
||||
result.logs.forEach(log => console.log(`[Browser] ${log}`));
|
||||
|
||||
console.log(`[Capture] Got ${result.products.length} products (API reported ${result.totalCount})`);
|
||||
|
||||
const payload = {
|
||||
dispensaryId: dispensaryId,
|
||||
platformId: platformId,
|
||||
cName,
|
||||
fetchedAt: new Date().toISOString(),
|
||||
productCount: result.products.length,
|
||||
products: result.products,
|
||||
};
|
||||
|
||||
fs.writeFileSync(outputPath, JSON.stringify(payload, null, 2));
|
||||
|
||||
console.log(`\n=== Capture Complete ===`);
|
||||
console.log(`Total products: ${result.products.length}`);
|
||||
console.log(`Saved to: ${outputPath}`);
|
||||
console.log(`File size: ${(fs.statSync(outputPath).size / 1024).toFixed(1)} KB`);
|
||||
|
||||
return payload;
|
||||
}
|
||||
|
||||
// Run
|
||||
(async () => {
|
||||
const payload = await capturePayload({
|
||||
cName: 'AZ-Deeply-Rooted',
|
||||
platformId: '6405ef617056e8014d79101b',
|
||||
});
|
||||
|
||||
if (payload.products.length > 0) {
|
||||
const sample = payload.products[0];
|
||||
console.log(`\nSample: ${sample.Name || sample.name} - ${sample.brand?.name || sample.brandName}`);
|
||||
}
|
||||
})().catch(console.error);
|
||||
@@ -14,5 +14,5 @@
|
||||
"allowSyntheticDefaultImports": true
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist", "src/**/*.test.ts", "src/**/__tests__/**"]
|
||||
"exclude": ["node_modules", "dist", "src/**/*.test.ts", "src/**/__tests__/**", "src/_deprecated/**"]
|
||||
}
|
||||
|
||||
4
cannaiq/dist/index.html
vendored
4
cannaiq/dist/index.html
vendored
@@ -7,8 +7,8 @@
|
||||
<title>CannaIQ - Cannabis Menu Intelligence Platform</title>
|
||||
<meta name="description" content="CannaIQ provides real-time cannabis dispensary menu data, product tracking, and analytics for dispensaries across Arizona." />
|
||||
<meta name="keywords" content="cannabis, dispensary, menu, products, analytics, Arizona" />
|
||||
<script type="module" crossorigin src="/assets/index-BML8-px1.js"></script>
|
||||
<link rel="stylesheet" crossorigin href="/assets/index-B2gR-58G.css">
|
||||
<script type="module" crossorigin src="/assets/index-Dq9S0rVi.js"></script>
|
||||
<link rel="stylesheet" crossorigin href="/assets/index-DhM09B-d.css">
|
||||
</head>
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
||||
<link rel="icon" type="image/svg+xml" href="/favicon.svg" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>CannaIQ - Cannabis Menu Intelligence Platform</title>
|
||||
<meta name="description" content="CannaIQ provides real-time cannabis dispensary menu data, product tracking, and analytics for dispensaries across Arizona." />
|
||||
|
||||
5
cannaiq/public/favicon.svg
Normal file
5
cannaiq/public/favicon.svg
Normal file
@@ -0,0 +1,5 @@
|
||||
<svg viewBox="0 0 32 32" xmlns="http://www.w3.org/2000/svg">
|
||||
<rect width="32" height="32" rx="6" fill="#059669"/>
|
||||
<path d="M16 6C12.5 6 9.5 7.5 7.5 10L16 16L24.5 10C22.5 7.5 19.5 6 16 6Z" fill="white"/>
|
||||
<path d="M7.5 10C6 12 5 14.5 5 17C5 22.5 10 26 16 26C22 26 27 22.5 27 17C27 14.5 26 12 24.5 10L16 16L7.5 10Z" fill="white" fill-opacity="0.7"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 360 B |
@@ -8,6 +8,7 @@ import { ProductDetail } from './pages/ProductDetail';
|
||||
import { Stores } from './pages/Stores';
|
||||
import { Dispensaries } from './pages/Dispensaries';
|
||||
import { DispensaryDetail } from './pages/DispensaryDetail';
|
||||
import { DispensarySchedule } from './pages/DispensarySchedule';
|
||||
import { StoreDetail } from './pages/StoreDetail';
|
||||
import { StoreBrands } from './pages/StoreBrands';
|
||||
import { StoreSpecials } from './pages/StoreSpecials';
|
||||
@@ -46,7 +47,6 @@ import CrossStateCompare from './pages/CrossStateCompare';
|
||||
import StateDetail from './pages/StateDetail';
|
||||
import { Discovery } from './pages/Discovery';
|
||||
import { WorkersDashboard } from './pages/WorkersDashboard';
|
||||
import { JobQueue } from './pages/JobQueue';
|
||||
import TasksDashboard from './pages/TasksDashboard';
|
||||
import { ScraperOverviewDashboard } from './pages/ScraperOverviewDashboard';
|
||||
import { SeoOrchestrator } from './pages/admin/seo/SeoOrchestrator';
|
||||
@@ -66,6 +66,7 @@ export default function App() {
|
||||
<Route path="/stores" element={<PrivateRoute><Stores /></PrivateRoute>} />
|
||||
<Route path="/dispensaries" element={<PrivateRoute><Dispensaries /></PrivateRoute>} />
|
||||
<Route path="/dispensaries/:state/:city/:slug" element={<PrivateRoute><DispensaryDetail /></PrivateRoute>} />
|
||||
<Route path="/dispensaries/:state/:city/:slug/schedule" element={<PrivateRoute><DispensarySchedule /></PrivateRoute>} />
|
||||
<Route path="/stores/:state/:storeName/:slug/brands" element={<PrivateRoute><StoreBrands /></PrivateRoute>} />
|
||||
<Route path="/stores/:state/:storeName/:slug/specials" element={<PrivateRoute><StoreSpecials /></PrivateRoute>} />
|
||||
<Route path="/stores/:state/:storeName/:slug" element={<PrivateRoute><StoreDetail /></PrivateRoute>} />
|
||||
@@ -123,8 +124,6 @@ export default function App() {
|
||||
<Route path="/discovery" element={<PrivateRoute><Discovery /></PrivateRoute>} />
|
||||
{/* Workers Dashboard */}
|
||||
<Route path="/workers" element={<PrivateRoute><WorkersDashboard /></PrivateRoute>} />
|
||||
{/* Job Queue Management */}
|
||||
<Route path="/job-queue" element={<PrivateRoute><JobQueue /></PrivateRoute>} />
|
||||
{/* Task Queue Dashboard */}
|
||||
<Route path="/tasks" element={<PrivateRoute><TasksDashboard /></PrivateRoute>} />
|
||||
{/* Scraper Overview Dashboard (new primary) */}
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
import { ReactNode, useEffect, useState } from 'react';
|
||||
import { useNavigate, useLocation } from 'react-router-dom';
|
||||
import { ReactNode, useEffect, useState, useRef } from 'react';
|
||||
import { useNavigate, useLocation, Link } from 'react-router-dom';
|
||||
import { useAuthStore } from '../store/authStore';
|
||||
import { api } from '../lib/api';
|
||||
import { StateSelector } from './StateSelector';
|
||||
import {
|
||||
LayoutDashboard,
|
||||
Building2,
|
||||
@@ -48,8 +47,8 @@ interface NavLinkProps {
|
||||
|
||||
function NavLink({ to, icon, label, isActive }: NavLinkProps) {
|
||||
return (
|
||||
<a
|
||||
href={to}
|
||||
<Link
|
||||
to={to}
|
||||
className={`flex items-center gap-3 px-3 py-2 rounded-lg text-sm font-medium transition-colors ${
|
||||
isActive
|
||||
? 'bg-emerald-50 text-emerald-700'
|
||||
@@ -58,7 +57,7 @@ function NavLink({ to, icon, label, isActive }: NavLinkProps) {
|
||||
>
|
||||
<span className={`flex-shrink-0 ${isActive ? 'text-emerald-600' : 'text-gray-400'}`}>{icon}</span>
|
||||
<span>{label}</span>
|
||||
</a>
|
||||
</Link>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -86,6 +85,8 @@ export function Layout({ children }: LayoutProps) {
|
||||
const { user, logout } = useAuthStore();
|
||||
const [versionInfo, setVersionInfo] = useState<VersionInfo | null>(null);
|
||||
const [sidebarOpen, setSidebarOpen] = useState(false);
|
||||
const navRef = useRef<HTMLElement>(null);
|
||||
const scrollPositionRef = useRef<number>(0);
|
||||
|
||||
useEffect(() => {
|
||||
const fetchVersion = async () => {
|
||||
@@ -111,16 +112,34 @@ export function Layout({ children }: LayoutProps) {
|
||||
return location.pathname.startsWith(path);
|
||||
};
|
||||
|
||||
// Close sidebar on route change (mobile)
|
||||
// Save scroll position before route change
|
||||
useEffect(() => {
|
||||
const nav = navRef.current;
|
||||
if (nav) {
|
||||
const handleScroll = () => {
|
||||
scrollPositionRef.current = nav.scrollTop;
|
||||
};
|
||||
nav.addEventListener('scroll', handleScroll);
|
||||
return () => nav.removeEventListener('scroll', handleScroll);
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Restore scroll position after route change and close mobile sidebar
|
||||
useEffect(() => {
|
||||
setSidebarOpen(false);
|
||||
// Restore scroll position after render
|
||||
requestAnimationFrame(() => {
|
||||
if (navRef.current) {
|
||||
navRef.current.scrollTop = scrollPositionRef.current;
|
||||
}
|
||||
});
|
||||
}, [location.pathname]);
|
||||
|
||||
const sidebarContent = (
|
||||
<>
|
||||
{/* Logo/Brand */}
|
||||
<div className="px-6 py-5 border-b border-gray-200">
|
||||
<div className="flex items-center gap-3">
|
||||
<Link to="/dashboard" className="flex items-center gap-3 hover:opacity-80 transition-opacity">
|
||||
<div className="w-8 h-8 bg-emerald-600 rounded-lg flex items-center justify-center">
|
||||
<svg viewBox="0 0 24 24" className="w-5 h-5 text-white" fill="currentColor">
|
||||
<path d="M12 2C8.5 2 5.5 3.5 3.5 6L12 12L20.5 6C18.5 3.5 15.5 2 12 2Z" />
|
||||
@@ -131,21 +150,17 @@ export function Layout({ children }: LayoutProps) {
|
||||
<span className="text-lg font-bold text-gray-900">CannaIQ</span>
|
||||
{versionInfo && (
|
||||
<p className="text-xs text-gray-400">
|
||||
v{versionInfo.version} ({versionInfo.git_sha}) {versionInfo.build_time !== 'unknown' && `- ${new Date(versionInfo.build_time).toLocaleDateString()}`}
|
||||
{versionInfo.git_sha || 'dev'}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</Link>
|
||||
<p className="text-xs text-gray-500 mt-2 truncate">{user?.email}</p>
|
||||
</div>
|
||||
|
||||
{/* State Selector */}
|
||||
<div className="px-4 py-3 border-b border-gray-200 bg-gray-50">
|
||||
<StateSelector showLabel={false} />
|
||||
</div>
|
||||
|
||||
{/* Navigation */}
|
||||
<nav className="flex-1 px-3 py-4 space-y-6 overflow-y-auto">
|
||||
<nav ref={navRef} className="flex-1 px-3 py-4 space-y-6 overflow-y-auto">
|
||||
<NavSection title="Main">
|
||||
<NavLink to="/dashboard" icon={<LayoutDashboard className="w-4 h-4" />} label="Dashboard" isActive={isActive('/dashboard', true)} />
|
||||
<NavLink to="/dispensaries" icon={<Building2 className="w-4 h-4" />} label="Dispensaries" isActive={isActive('/dispensaries')} />
|
||||
@@ -164,8 +179,7 @@ export function Layout({ children }: LayoutProps) {
|
||||
<NavLink to="/admin/orchestrator" icon={<Activity className="w-4 h-4" />} label="Orchestrator" isActive={isActive('/admin/orchestrator')} />
|
||||
<NavLink to="/users" icon={<UserCog className="w-4 h-4" />} label="Users" isActive={isActive('/users')} />
|
||||
<NavLink to="/workers" icon={<Users className="w-4 h-4" />} label="Workers" isActive={isActive('/workers')} />
|
||||
<NavLink to="/job-queue" icon={<ListOrdered className="w-4 h-4" />} label="Job Queue" isActive={isActive('/job-queue')} />
|
||||
<NavLink to="/tasks" icon={<ListChecks className="w-4 h-4" />} label="Task Queue" isActive={isActive('/tasks')} />
|
||||
<NavLink to="/tasks" icon={<ListChecks className="w-4 h-4" />} label="Tasks" isActive={isActive('/tasks')} />
|
||||
<NavLink to="/admin/seo" icon={<FileText className="w-4 h-4" />} label="SEO Pages" isActive={isActive('/admin/seo')} />
|
||||
<NavLink to="/proxies" icon={<Shield className="w-4 h-4" />} label="Proxies" isActive={isActive('/proxies')} />
|
||||
<NavLink to="/api-permissions" icon={<Key className="w-4 h-4" />} label="API Keys" isActive={isActive('/api-permissions')} />
|
||||
@@ -214,7 +228,7 @@ export function Layout({ children }: LayoutProps) {
|
||||
<button onClick={() => setSidebarOpen(true)} className="p-2 -ml-2 rounded-lg hover:bg-gray-100">
|
||||
<Menu className="w-5 h-5 text-gray-600" />
|
||||
</button>
|
||||
<div className="flex items-center gap-2">
|
||||
<Link to="/dashboard" className="flex items-center gap-2 hover:opacity-80 transition-opacity">
|
||||
<div className="w-6 h-6 bg-emerald-600 rounded flex items-center justify-center">
|
||||
<svg viewBox="0 0 24 24" className="w-4 h-4 text-white" fill="currentColor">
|
||||
<path d="M12 2C8.5 2 5.5 3.5 3.5 6L12 12L20.5 6C18.5 3.5 15.5 2 12 2Z" />
|
||||
@@ -222,7 +236,7 @@ export function Layout({ children }: LayoutProps) {
|
||||
</svg>
|
||||
</div>
|
||||
<span className="font-semibold text-gray-900">CannaIQ</span>
|
||||
</div>
|
||||
</Link>
|
||||
</div>
|
||||
|
||||
{/* Page content */}
|
||||
|
||||
138
cannaiq/src/components/PasswordConfirmModal.tsx
Normal file
138
cannaiq/src/components/PasswordConfirmModal.tsx
Normal file
@@ -0,0 +1,138 @@
|
||||
import { useState, useEffect, useRef } from 'react';
|
||||
import { api } from '../lib/api';
|
||||
import { Shield, X, Loader2 } from 'lucide-react';
|
||||
|
||||
interface PasswordConfirmModalProps {
|
||||
isOpen: boolean;
|
||||
onClose: () => void;
|
||||
onConfirm: () => void;
|
||||
title: string;
|
||||
description: string;
|
||||
}
|
||||
|
||||
export function PasswordConfirmModal({
|
||||
isOpen,
|
||||
onClose,
|
||||
onConfirm,
|
||||
title,
|
||||
description,
|
||||
}: PasswordConfirmModalProps) {
|
||||
const [password, setPassword] = useState('');
|
||||
const [error, setError] = useState('');
|
||||
const [loading, setLoading] = useState(false);
|
||||
const inputRef = useRef<HTMLInputElement>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (isOpen) {
|
||||
setPassword('');
|
||||
setError('');
|
||||
// Focus the input when modal opens
|
||||
setTimeout(() => inputRef.current?.focus(), 100);
|
||||
}
|
||||
}, [isOpen]);
|
||||
|
||||
const handleSubmit = async (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
if (!password.trim()) {
|
||||
setError('Password is required');
|
||||
return;
|
||||
}
|
||||
|
||||
setLoading(true);
|
||||
setError('');
|
||||
|
||||
try {
|
||||
const result = await api.verifyPassword(password);
|
||||
if (result.verified) {
|
||||
onConfirm();
|
||||
onClose();
|
||||
} else {
|
||||
setError('Invalid password');
|
||||
}
|
||||
} catch (err: any) {
|
||||
setError(err.message || 'Verification failed');
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
if (!isOpen) return null;
|
||||
|
||||
return (
|
||||
<div className="fixed inset-0 z-50 flex items-center justify-center">
|
||||
{/* Backdrop */}
|
||||
<div
|
||||
className="absolute inset-0 bg-black bg-opacity-50"
|
||||
onClick={onClose}
|
||||
/>
|
||||
|
||||
{/* Modal */}
|
||||
<div className="relative bg-white rounded-lg shadow-xl max-w-md w-full mx-4">
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between px-6 py-4 border-b border-gray-200">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="p-2 bg-amber-100 rounded-lg">
|
||||
<Shield className="w-5 h-5 text-amber-600" />
|
||||
</div>
|
||||
<h3 className="text-lg font-semibold text-gray-900">{title}</h3>
|
||||
</div>
|
||||
<button
|
||||
onClick={onClose}
|
||||
className="p-1 hover:bg-gray-100 rounded-lg transition-colors"
|
||||
>
|
||||
<X className="w-5 h-5 text-gray-500" />
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Body */}
|
||||
<form onSubmit={handleSubmit}>
|
||||
<div className="px-6 py-4">
|
||||
<p className="text-gray-600 mb-4">{description}</p>
|
||||
|
||||
<div className="space-y-2">
|
||||
<label
|
||||
htmlFor="password"
|
||||
className="block text-sm font-medium text-gray-700"
|
||||
>
|
||||
Enter your password to continue
|
||||
</label>
|
||||
<input
|
||||
ref={inputRef}
|
||||
type="password"
|
||||
id="password"
|
||||
value={password}
|
||||
onChange={(e) => setPassword(e.target.value)}
|
||||
className="w-full px-4 py-2 border border-gray-300 rounded-lg focus:ring-2 focus:ring-emerald-500 focus:border-emerald-500"
|
||||
placeholder="Password"
|
||||
disabled={loading}
|
||||
/>
|
||||
{error && (
|
||||
<p className="text-sm text-red-600">{error}</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Footer */}
|
||||
<div className="flex justify-end gap-3 px-6 py-4 border-t border-gray-200 bg-gray-50 rounded-b-lg">
|
||||
<button
|
||||
type="button"
|
||||
onClick={onClose}
|
||||
disabled={loading}
|
||||
className="px-4 py-2 text-gray-700 hover:bg-gray-100 rounded-lg transition-colors"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
<button
|
||||
type="submit"
|
||||
disabled={loading}
|
||||
className="px-4 py-2 bg-emerald-600 text-white rounded-lg hover:bg-emerald-700 transition-colors disabled:opacity-50 flex items-center gap-2"
|
||||
>
|
||||
{loading && <Loader2 className="w-4 h-4 animate-spin" />}
|
||||
Confirm
|
||||
</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -84,6 +84,13 @@ class ApiClient {
|
||||
});
|
||||
}
|
||||
|
||||
async verifyPassword(password: string) {
|
||||
return this.request<{ verified: boolean; error?: string }>('/api/auth/verify-password', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ password }),
|
||||
});
|
||||
}
|
||||
|
||||
async getMe() {
|
||||
return this.request<{ user: any }>('/api/auth/me');
|
||||
}
|
||||
@@ -983,6 +990,47 @@ class ApiClient {
|
||||
}>(`/api/markets/stores/${id}/categories`);
|
||||
}
|
||||
|
||||
async getStoreCrawlHistory(id: number, limit = 50) {
|
||||
return this.request<{
|
||||
dispensary: {
|
||||
id: number;
|
||||
name: string;
|
||||
dba_name: string | null;
|
||||
slug: string;
|
||||
state: string;
|
||||
city: string;
|
||||
menu_type: string | null;
|
||||
platform_dispensary_id: string | null;
|
||||
last_menu_scrape: string | null;
|
||||
} | null;
|
||||
history: Array<{
|
||||
id: number;
|
||||
runId: string | null;
|
||||
profileKey: string | null;
|
||||
crawlerModule: string | null;
|
||||
stateAtStart: string | null;
|
||||
stateAtEnd: string | null;
|
||||
totalSteps: number;
|
||||
durationMs: number | null;
|
||||
success: boolean;
|
||||
errorMessage: string | null;
|
||||
productsFound: number | null;
|
||||
startedAt: string | null;
|
||||
completedAt: string | null;
|
||||
}>;
|
||||
nextSchedule: {
|
||||
scheduleId: number;
|
||||
jobName: string;
|
||||
enabled: boolean;
|
||||
baseIntervalMinutes: number;
|
||||
jitterMinutes: number;
|
||||
nextRunAt: string | null;
|
||||
lastRunAt: string | null;
|
||||
lastStatus: string | null;
|
||||
} | null;
|
||||
}>(`/api/markets/stores/${id}/crawl-history?limit=${limit}`);
|
||||
}
|
||||
|
||||
// Global Brands/Categories (from v_brands/v_categories views)
|
||||
async getMarketBrands(params?: { limit?: number; offset?: number }) {
|
||||
const searchParams = new URLSearchParams();
|
||||
@@ -1518,10 +1566,11 @@ class ApiClient {
|
||||
}
|
||||
|
||||
// Intelligence API
|
||||
async getIntelligenceBrands(params?: { limit?: number; offset?: number }) {
|
||||
async getIntelligenceBrands(params?: { limit?: number; offset?: number; state?: string }) {
|
||||
const searchParams = new URLSearchParams();
|
||||
if (params?.limit) searchParams.append('limit', params.limit.toString());
|
||||
if (params?.offset) searchParams.append('offset', params.offset.toString());
|
||||
if (params?.state) searchParams.append('state', params.state);
|
||||
const queryString = searchParams.toString() ? `?${searchParams.toString()}` : '';
|
||||
return this.request<{
|
||||
brands: Array<{
|
||||
@@ -1536,7 +1585,10 @@ class ApiClient {
|
||||
}>(`/api/admin/intelligence/brands${queryString}`);
|
||||
}
|
||||
|
||||
async getIntelligencePricing() {
|
||||
async getIntelligencePricing(params?: { state?: string }) {
|
||||
const searchParams = new URLSearchParams();
|
||||
if (params?.state) searchParams.append('state', params.state);
|
||||
const queryString = searchParams.toString() ? `?${searchParams.toString()}` : '';
|
||||
return this.request<{
|
||||
byCategory: Array<{
|
||||
category: string;
|
||||
@@ -1552,7 +1604,7 @@ class ApiClient {
|
||||
maxPrice: number;
|
||||
totalProducts: number;
|
||||
};
|
||||
}>('/api/admin/intelligence/pricing');
|
||||
}>(`/api/admin/intelligence/pricing${queryString}`);
|
||||
}
|
||||
|
||||
async getIntelligenceStoreActivity(params?: { state?: string; chainId?: number; limit?: number }) {
|
||||
@@ -2884,6 +2936,46 @@ class ApiClient {
|
||||
`/api/tasks/store/${dispensaryId}/active`
|
||||
);
|
||||
}
|
||||
|
||||
// Task Pool Control
|
||||
async getTaskPoolStatus() {
|
||||
return this.request<{ success: boolean; paused: boolean; message: string }>(
|
||||
'/api/tasks/pool/status'
|
||||
);
|
||||
}
|
||||
|
||||
async pauseTaskPool() {
|
||||
return this.request<{ success: boolean; paused: boolean; message: string }>(
|
||||
'/api/tasks/pool/pause',
|
||||
{ method: 'POST' }
|
||||
);
|
||||
}
|
||||
|
||||
async resumeTaskPool() {
|
||||
return this.request<{ success: boolean; paused: boolean; message: string }>(
|
||||
'/api/tasks/pool/resume',
|
||||
{ method: 'POST' }
|
||||
);
|
||||
}
|
||||
|
||||
// K8s Worker Control
|
||||
async getK8sWorkers() {
|
||||
return this.request<{
|
||||
success: boolean;
|
||||
available: boolean;
|
||||
replicas: number;
|
||||
readyReplicas: number;
|
||||
availableReplicas?: number;
|
||||
error?: string;
|
||||
}>('/api/k8s/workers');
|
||||
}
|
||||
|
||||
async scaleK8sWorkers(replicas: number) {
|
||||
return this.request<{ success: boolean; replicas: number; message?: string; error?: string }>(
|
||||
'/api/k8s/workers/scale',
|
||||
{ method: 'POST', body: JSON.stringify({ replicas }) }
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export const api = new ApiClient(API_URL);
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import { useEffect, useState } from 'react';
|
||||
import { Layout } from '../components/Layout';
|
||||
import { HealthPanel } from '../components/HealthPanel';
|
||||
import { api } from '../lib/api';
|
||||
import { useNavigate } from 'react-router-dom';
|
||||
import {
|
||||
@@ -42,7 +41,6 @@ export function Dashboard() {
|
||||
const [activity, setActivity] = useState<any>(null);
|
||||
const [nationalStats, setNationalStats] = useState<any>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [refreshing, setRefreshing] = useState(false);
|
||||
const [pendingChangesCount, setPendingChangesCount] = useState(0);
|
||||
const [showNotification, setShowNotification] = useState(false);
|
||||
const [taskCounts, setTaskCounts] = useState<Record<string, number> | null>(null);
|
||||
@@ -93,10 +91,7 @@ export function Dashboard() {
|
||||
}
|
||||
};
|
||||
|
||||
const loadData = async (isRefresh = false) => {
|
||||
if (isRefresh) {
|
||||
setRefreshing(true);
|
||||
}
|
||||
const loadData = async () => {
|
||||
try {
|
||||
// Fetch dashboard data (primary data source)
|
||||
const dashboard = await api.getMarketDashboard();
|
||||
@@ -158,7 +153,6 @@ export function Dashboard() {
|
||||
console.error('Failed to load dashboard:', error);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
setRefreshing(false);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -271,24 +265,11 @@ export function Dashboard() {
|
||||
|
||||
<div className="space-y-8">
|
||||
{/* Header */}
|
||||
<div className="flex flex-col sm:flex-row sm:justify-between sm:items-center gap-4">
|
||||
<div>
|
||||
<h1 className="text-xl sm:text-2xl font-semibold text-gray-900">Dashboard</h1>
|
||||
<p className="text-sm text-gray-500 mt-1">Monitor your dispensary data aggregation</p>
|
||||
</div>
|
||||
<button
|
||||
onClick={() => loadData(true)}
|
||||
disabled={refreshing}
|
||||
className="inline-flex items-center justify-center gap-2 px-4 py-2 bg-white border border-gray-200 rounded-lg hover:bg-gray-50 transition-colors text-sm font-medium text-gray-700 self-start sm:self-auto disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
<RefreshCw className={`w-4 h-4 ${refreshing ? 'animate-spin' : ''}`} />
|
||||
{refreshing ? 'Refreshing...' : 'Refresh'}
|
||||
</button>
|
||||
<div>
|
||||
<h1 className="text-xl sm:text-2xl font-semibold text-gray-900">Dashboard</h1>
|
||||
<p className="text-sm text-gray-500 mt-1">Monitor your dispensary data aggregation</p>
|
||||
</div>
|
||||
|
||||
{/* System Health */}
|
||||
<HealthPanel showQueues={false} refreshInterval={60000} />
|
||||
|
||||
{/* Stats Grid */}
|
||||
<div className="grid grid-cols-2 lg:grid-cols-3 gap-3 sm:gap-6">
|
||||
{/* Products */}
|
||||
|
||||
@@ -161,23 +161,6 @@ export function Dispensaries() {
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-700 mb-2">
|
||||
Filter by Status
|
||||
</label>
|
||||
<select
|
||||
value={filterStatus}
|
||||
onChange={(e) => handleStatusFilter(e.target.value)}
|
||||
className={`w-full px-3 py-2 border rounded-lg focus:ring-2 focus:ring-blue-500 focus:border-blue-500 ${
|
||||
filterStatus === 'dropped' ? 'border-red-300 bg-red-50' : 'border-gray-300'
|
||||
}`}
|
||||
>
|
||||
<option value="">All Statuses</option>
|
||||
<option value="open">Open</option>
|
||||
<option value="dropped">Dropped (Needs Review)</option>
|
||||
<option value="closed">Closed</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -204,47 +204,6 @@ export function DispensaryDetail() {
|
||||
Back to Dispensaries
|
||||
</button>
|
||||
|
||||
{/* Update Dropdown */}
|
||||
<div className="relative">
|
||||
<button
|
||||
onClick={() => setShowUpdateDropdown(!showUpdateDropdown)}
|
||||
disabled={isUpdating}
|
||||
className="flex items-center gap-2 px-4 py-2 text-sm font-medium text-white bg-blue-600 hover:bg-blue-700 rounded-lg disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
<RefreshCw className={`w-4 h-4 ${isUpdating ? 'animate-spin' : ''}`} />
|
||||
{isUpdating ? 'Updating...' : 'Update'}
|
||||
{!isUpdating && <ChevronDown className="w-4 h-4" />}
|
||||
</button>
|
||||
|
||||
{showUpdateDropdown && !isUpdating && (
|
||||
<div className="absolute right-0 mt-2 w-48 bg-white rounded-lg shadow-lg border border-gray-200 z-10">
|
||||
<button
|
||||
onClick={() => handleUpdate('products')}
|
||||
className="w-full text-left px-4 py-2 text-sm text-gray-700 hover:bg-gray-100 rounded-t-lg"
|
||||
>
|
||||
Products
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleUpdate('brands')}
|
||||
className="w-full text-left px-4 py-2 text-sm text-gray-700 hover:bg-gray-100"
|
||||
>
|
||||
Brands
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleUpdate('specials')}
|
||||
className="w-full text-left px-4 py-2 text-sm text-gray-700 hover:bg-gray-100"
|
||||
>
|
||||
Specials
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleUpdate('all')}
|
||||
className="w-full text-left px-4 py-2 text-sm text-gray-700 hover:bg-gray-100 rounded-b-lg border-t border-gray-200"
|
||||
>
|
||||
All
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Dispensary Header */}
|
||||
@@ -266,7 +225,7 @@ export function DispensaryDetail() {
|
||||
<div className="flex items-center gap-2 text-sm text-gray-600 bg-gray-50 px-4 py-2 rounded-lg">
|
||||
<Calendar className="w-4 h-4" />
|
||||
<div>
|
||||
<span className="font-medium">Last Crawl Date:</span>
|
||||
<span className="font-medium">Last Updated:</span>
|
||||
<span className="ml-2">
|
||||
{dispensary.last_menu_scrape
|
||||
? new Date(dispensary.last_menu_scrape).toLocaleDateString('en-US', {
|
||||
@@ -331,7 +290,7 @@ export function DispensaryDetail() {
|
||||
</a>
|
||||
)}
|
||||
<Link
|
||||
to="/schedule"
|
||||
to={`/dispensaries/${state}/${city}/${slug}/schedule`}
|
||||
className="flex items-center gap-2 text-sm text-blue-600 hover:text-blue-800"
|
||||
>
|
||||
<Clock className="w-4 h-4" />
|
||||
@@ -533,57 +492,31 @@ export function DispensaryDetail() {
|
||||
`$${product.regular_price}`
|
||||
) : '-'}
|
||||
</td>
|
||||
<td className="text-center whitespace-nowrap">
|
||||
{product.quantity != null ? (
|
||||
<span className={`badge badge-sm ${product.quantity > 0 ? 'badge-info' : 'badge-error'}`}>
|
||||
{product.quantity}
|
||||
</span>
|
||||
) : '-'}
|
||||
<td className="text-center whitespace-nowrap text-sm text-gray-700">
|
||||
{product.quantity != null ? product.quantity : '-'}
|
||||
</td>
|
||||
<td className="text-center whitespace-nowrap">
|
||||
{product.thc_percentage ? (
|
||||
<span className="badge badge-success badge-sm">{product.thc_percentage}%</span>
|
||||
) : '-'}
|
||||
<td className="text-center whitespace-nowrap text-sm text-gray-700">
|
||||
{product.thc_percentage ? `${product.thc_percentage}%` : '-'}
|
||||
</td>
|
||||
<td className="text-center whitespace-nowrap">
|
||||
{product.cbd_percentage ? (
|
||||
<span className="badge badge-info badge-sm">{product.cbd_percentage}%</span>
|
||||
) : '-'}
|
||||
<td className="text-center whitespace-nowrap text-sm text-gray-700">
|
||||
{product.cbd_percentage ? `${product.cbd_percentage}%` : '-'}
|
||||
</td>
|
||||
<td className="text-center whitespace-nowrap">
|
||||
{product.strain_type ? (
|
||||
<span className="badge badge-ghost badge-sm">{product.strain_type}</span>
|
||||
) : '-'}
|
||||
<td className="text-center whitespace-nowrap text-sm text-gray-700">
|
||||
{product.strain_type || '-'}
|
||||
</td>
|
||||
<td className="text-center whitespace-nowrap">
|
||||
{product.in_stock ? (
|
||||
<span className="badge badge-success badge-sm">Yes</span>
|
||||
) : product.in_stock === false ? (
|
||||
<span className="badge badge-error badge-sm">No</span>
|
||||
) : '-'}
|
||||
<td className="text-center whitespace-nowrap text-sm text-gray-700">
|
||||
{product.in_stock ? 'Yes' : product.in_stock === false ? 'No' : '-'}
|
||||
</td>
|
||||
<td className="whitespace-nowrap text-xs text-gray-500">
|
||||
{product.updated_at ? formatDate(product.updated_at) : '-'}
|
||||
</td>
|
||||
<td>
|
||||
<div className="flex gap-1">
|
||||
{product.dutchie_url && (
|
||||
<a
|
||||
href={product.dutchie_url}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="btn btn-xs btn-outline"
|
||||
>
|
||||
Dutchie
|
||||
</a>
|
||||
)}
|
||||
<button
|
||||
onClick={() => navigate(`/products/${product.id}`)}
|
||||
className="btn btn-xs btn-primary"
|
||||
>
|
||||
Details
|
||||
</button>
|
||||
</div>
|
||||
<button
|
||||
onClick={() => navigate(`/products/${product.id}`)}
|
||||
className="btn btn-xs btn-ghost text-gray-500 hover:text-gray-700"
|
||||
>
|
||||
Details
|
||||
</button>
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
|
||||
378
cannaiq/src/pages/DispensarySchedule.tsx
Normal file
378
cannaiq/src/pages/DispensarySchedule.tsx
Normal file
@@ -0,0 +1,378 @@
|
||||
import { useEffect, useState } from 'react';
|
||||
import { useParams, useNavigate, Link } from 'react-router-dom';
|
||||
import { Layout } from '../components/Layout';
|
||||
import { api } from '../lib/api';
|
||||
import {
|
||||
ArrowLeft,
|
||||
Clock,
|
||||
Calendar,
|
||||
CheckCircle,
|
||||
XCircle,
|
||||
AlertCircle,
|
||||
Package,
|
||||
Timer,
|
||||
Building2,
|
||||
} from 'lucide-react';
|
||||
|
||||
interface CrawlHistoryItem {
|
||||
id: number;
|
||||
runId: string | null;
|
||||
profileKey: string | null;
|
||||
crawlerModule: string | null;
|
||||
stateAtStart: string | null;
|
||||
stateAtEnd: string | null;
|
||||
totalSteps: number;
|
||||
durationMs: number | null;
|
||||
success: boolean;
|
||||
errorMessage: string | null;
|
||||
productsFound: number | null;
|
||||
startedAt: string | null;
|
||||
completedAt: string | null;
|
||||
}
|
||||
|
||||
interface NextSchedule {
|
||||
scheduleId: number;
|
||||
jobName: string;
|
||||
enabled: boolean;
|
||||
baseIntervalMinutes: number;
|
||||
jitterMinutes: number;
|
||||
nextRunAt: string | null;
|
||||
lastRunAt: string | null;
|
||||
lastStatus: string | null;
|
||||
}
|
||||
|
||||
interface Dispensary {
|
||||
id: number;
|
||||
name: string;
|
||||
dba_name: string | null;
|
||||
slug: string;
|
||||
state: string;
|
||||
city: string;
|
||||
menu_type: string | null;
|
||||
platform_dispensary_id: string | null;
|
||||
last_menu_scrape: string | null;
|
||||
}
|
||||
|
||||
export function DispensarySchedule() {
|
||||
const { state, city, slug } = useParams();
|
||||
const navigate = useNavigate();
|
||||
const [dispensary, setDispensary] = useState<Dispensary | null>(null);
|
||||
const [history, setHistory] = useState<CrawlHistoryItem[]>([]);
|
||||
const [nextSchedule, setNextSchedule] = useState<NextSchedule | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
|
||||
useEffect(() => {
|
||||
loadScheduleData();
|
||||
}, [slug]);
|
||||
|
||||
const loadScheduleData = async () => {
|
||||
setLoading(true);
|
||||
try {
|
||||
// First get the dispensary to get the ID
|
||||
const dispData = await api.getDispensary(slug!);
|
||||
if (dispData?.id) {
|
||||
const data = await api.getStoreCrawlHistory(dispData.id);
|
||||
setDispensary(data.dispensary);
|
||||
setHistory(data.history || []);
|
||||
setNextSchedule(data.nextSchedule);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Failed to load schedule data:', error);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const formatDate = (dateStr: string | null) => {
|
||||
if (!dateStr) return 'Never';
|
||||
const date = new Date(dateStr);
|
||||
return date.toLocaleDateString('en-US', {
|
||||
year: 'numeric',
|
||||
month: 'short',
|
||||
day: 'numeric',
|
||||
hour: '2-digit',
|
||||
minute: '2-digit',
|
||||
});
|
||||
};
|
||||
|
||||
const formatTimeAgo = (dateStr: string | null) => {
|
||||
if (!dateStr) return 'Never';
|
||||
const date = new Date(dateStr);
|
||||
const now = new Date();
|
||||
const diffMs = now.getTime() - date.getTime();
|
||||
const diffMinutes = Math.floor(diffMs / (1000 * 60));
|
||||
const diffHours = Math.floor(diffMs / (1000 * 60 * 60));
|
||||
const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24));
|
||||
|
||||
if (diffMinutes < 1) return 'Just now';
|
||||
if (diffMinutes < 60) return `${diffMinutes}m ago`;
|
||||
if (diffHours < 24) return `${diffHours}h ago`;
|
||||
if (diffDays === 1) return 'Yesterday';
|
||||
if (diffDays < 7) return `${diffDays} days ago`;
|
||||
return date.toLocaleDateString();
|
||||
};
|
||||
|
||||
const formatTimeUntil = (dateStr: string | null) => {
|
||||
if (!dateStr) return 'Not scheduled';
|
||||
const date = new Date(dateStr);
|
||||
const now = new Date();
|
||||
const diffMs = date.getTime() - now.getTime();
|
||||
|
||||
if (diffMs < 0) return 'Overdue';
|
||||
|
||||
const diffMinutes = Math.floor(diffMs / (1000 * 60));
|
||||
const diffHours = Math.floor(diffMinutes / 60);
|
||||
|
||||
if (diffMinutes < 60) return `in ${diffMinutes}m`;
|
||||
return `in ${diffHours}h ${diffMinutes % 60}m`;
|
||||
};
|
||||
|
||||
const formatDuration = (ms: number | null) => {
|
||||
if (!ms) return '-';
|
||||
if (ms < 1000) return `${ms}ms`;
|
||||
const seconds = Math.floor(ms / 1000);
|
||||
const minutes = Math.floor(seconds / 60);
|
||||
if (minutes < 1) return `${seconds}s`;
|
||||
return `${minutes}m ${seconds % 60}s`;
|
||||
};
|
||||
|
||||
const formatInterval = (baseMinutes: number, jitterMinutes: number) => {
|
||||
const hours = Math.floor(baseMinutes / 60);
|
||||
const mins = baseMinutes % 60;
|
||||
let base = hours > 0 ? `${hours}h` : '';
|
||||
if (mins > 0) base += `${mins}m`;
|
||||
return `Every ${base} (+/- ${jitterMinutes}m jitter)`;
|
||||
};
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
<Layout>
|
||||
<div className="text-center py-12">
|
||||
<div className="inline-block animate-spin rounded-full h-8 w-8 border-4 border-gray-400 border-t-transparent"></div>
|
||||
<p className="mt-2 text-sm text-gray-600">Loading schedule...</p>
|
||||
</div>
|
||||
</Layout>
|
||||
);
|
||||
}
|
||||
|
||||
if (!dispensary) {
|
||||
return (
|
||||
<Layout>
|
||||
<div className="text-center py-12">
|
||||
<p className="text-gray-600">Dispensary not found</p>
|
||||
</div>
|
||||
</Layout>
|
||||
);
|
||||
}
|
||||
|
||||
// Stats from history
|
||||
const successCount = history.filter(h => h.success).length;
|
||||
const failureCount = history.filter(h => !h.success).length;
|
||||
const lastSuccess = history.find(h => h.success);
|
||||
const avgDuration = history.length > 0
|
||||
? Math.round(history.reduce((sum, h) => sum + (h.durationMs || 0), 0) / history.length)
|
||||
: 0;
|
||||
|
||||
return (
|
||||
<Layout>
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between gap-4">
|
||||
<button
|
||||
onClick={() => navigate(`/dispensaries/${state}/${city}/${slug}`)}
|
||||
className="flex items-center gap-2 text-sm text-gray-600 hover:text-gray-900"
|
||||
>
|
||||
<ArrowLeft className="w-4 h-4" />
|
||||
Back to {dispensary.dba_name || dispensary.name}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Dispensary Info */}
|
||||
<div className="bg-white rounded-lg border border-gray-200 p-6">
|
||||
<div className="flex items-start gap-4">
|
||||
<div className="p-3 bg-blue-50 rounded-lg">
|
||||
<Building2 className="w-8 h-8 text-blue-600" />
|
||||
</div>
|
||||
<div>
|
||||
<h1 className="text-2xl font-bold text-gray-900">
|
||||
{dispensary.dba_name || dispensary.name}
|
||||
</h1>
|
||||
<p className="text-sm text-gray-600 mt-1">
|
||||
{dispensary.city}, {dispensary.state} - Crawl Schedule & History
|
||||
</p>
|
||||
<div className="flex items-center gap-4 mt-2 text-sm text-gray-500">
|
||||
<span>Slug: {dispensary.slug}</span>
|
||||
{dispensary.menu_type && (
|
||||
<span className="px-2 py-0.5 bg-gray-100 rounded text-xs">
|
||||
{dispensary.menu_type}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Next Scheduled Crawl */}
|
||||
{nextSchedule && (
|
||||
<div className="bg-white rounded-lg border border-gray-200 p-6">
|
||||
<h2 className="text-lg font-semibold text-gray-900 mb-4 flex items-center gap-2">
|
||||
<Clock className="w-5 h-5 text-blue-500" />
|
||||
Upcoming Schedule
|
||||
</h2>
|
||||
<div className="grid grid-cols-4 gap-6">
|
||||
<div>
|
||||
<p className="text-sm text-gray-500">Next Run</p>
|
||||
<p className="text-xl font-semibold text-blue-600">
|
||||
{formatTimeUntil(nextSchedule.nextRunAt)}
|
||||
</p>
|
||||
<p className="text-xs text-gray-400">
|
||||
{formatDate(nextSchedule.nextRunAt)}
|
||||
</p>
|
||||
</div>
|
||||
<div>
|
||||
<p className="text-sm text-gray-500">Interval</p>
|
||||
<p className="text-lg font-medium">
|
||||
{formatInterval(nextSchedule.baseIntervalMinutes, nextSchedule.jitterMinutes)}
|
||||
</p>
|
||||
</div>
|
||||
<div>
|
||||
<p className="text-sm text-gray-500">Last Run</p>
|
||||
<p className="text-lg font-medium">
|
||||
{formatTimeAgo(nextSchedule.lastRunAt)}
|
||||
</p>
|
||||
</div>
|
||||
<div>
|
||||
<p className="text-sm text-gray-500">Last Status</p>
|
||||
<p className={`text-lg font-medium ${
|
||||
nextSchedule.lastStatus === 'success' ? 'text-green-600' :
|
||||
nextSchedule.lastStatus === 'error' ? 'text-red-600' : 'text-gray-600'
|
||||
}`}>
|
||||
{nextSchedule.lastStatus || '-'}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Stats Summary */}
|
||||
<div className="grid grid-cols-4 gap-4">
|
||||
<div className="bg-white rounded-lg border border-gray-200 p-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<CheckCircle className="w-8 h-8 text-green-500" />
|
||||
<div>
|
||||
<p className="text-sm text-gray-500">Successful Runs</p>
|
||||
<p className="text-2xl font-bold text-green-600">{successCount}</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className="bg-white rounded-lg border border-gray-200 p-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<XCircle className="w-8 h-8 text-red-500" />
|
||||
<div>
|
||||
<p className="text-sm text-gray-500">Failed Runs</p>
|
||||
<p className="text-2xl font-bold text-red-600">{failureCount}</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className="bg-white rounded-lg border border-gray-200 p-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<Timer className="w-8 h-8 text-blue-500" />
|
||||
<div>
|
||||
<p className="text-sm text-gray-500">Avg Duration</p>
|
||||
<p className="text-2xl font-bold">{formatDuration(avgDuration)}</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className="bg-white rounded-lg border border-gray-200 p-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<Package className="w-8 h-8 text-purple-500" />
|
||||
<div>
|
||||
<p className="text-sm text-gray-500">Last Products Found</p>
|
||||
<p className="text-2xl font-bold">
|
||||
{lastSuccess?.productsFound?.toLocaleString() || '-'}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Crawl History Table */}
|
||||
<div className="bg-white rounded-lg border border-gray-200">
|
||||
<div className="p-4 border-b border-gray-200">
|
||||
<h2 className="text-lg font-semibold text-gray-900 flex items-center gap-2">
|
||||
<Calendar className="w-5 h-5 text-gray-500" />
|
||||
Crawl History
|
||||
</h2>
|
||||
</div>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="table table-sm w-full">
|
||||
<thead className="bg-gray-50">
|
||||
<tr>
|
||||
<th>Status</th>
|
||||
<th>Started</th>
|
||||
<th>Duration</th>
|
||||
<th className="text-right">Products</th>
|
||||
<th>State</th>
|
||||
<th>Error</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{history.length === 0 ? (
|
||||
<tr>
|
||||
<td colSpan={6} className="text-center py-8 text-gray-500">
|
||||
No crawl history available
|
||||
</td>
|
||||
</tr>
|
||||
) : (
|
||||
history.map((item) => (
|
||||
<tr key={item.id} className="hover:bg-gray-50">
|
||||
<td>
|
||||
<span className={`inline-flex items-center gap-1 px-2 py-1 rounded text-xs font-medium ${
|
||||
item.success
|
||||
? 'bg-green-100 text-green-700'
|
||||
: 'bg-red-100 text-red-700'
|
||||
}`}>
|
||||
{item.success ? (
|
||||
<CheckCircle className="w-3 h-3" />
|
||||
) : (
|
||||
<XCircle className="w-3 h-3" />
|
||||
)}
|
||||
{item.success ? 'Success' : 'Failed'}
|
||||
</span>
|
||||
</td>
|
||||
<td>
|
||||
<div className="text-sm">{formatDate(item.startedAt)}</div>
|
||||
<div className="text-xs text-gray-400">{formatTimeAgo(item.startedAt)}</div>
|
||||
</td>
|
||||
<td className="font-mono text-sm">
|
||||
{formatDuration(item.durationMs)}
|
||||
</td>
|
||||
<td className="text-right font-mono text-sm">
|
||||
{item.productsFound?.toLocaleString() || '-'}
|
||||
</td>
|
||||
<td className="text-sm text-gray-600">
|
||||
{item.stateAtEnd || item.stateAtStart || '-'}
|
||||
</td>
|
||||
<td className="max-w-[200px]">
|
||||
{item.errorMessage ? (
|
||||
<span
|
||||
className="text-xs text-red-600 truncate block cursor-help"
|
||||
title={item.errorMessage}
|
||||
>
|
||||
{item.errorMessage.substring(0, 50)}...
|
||||
</span>
|
||||
) : '-'}
|
||||
</td>
|
||||
</tr>
|
||||
))
|
||||
)}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</Layout>
|
||||
);
|
||||
}
|
||||
|
||||
export default DispensarySchedule;
|
||||
@@ -3,15 +3,16 @@ import { useNavigate } from 'react-router-dom';
|
||||
import { Layout } from '../components/Layout';
|
||||
import { api } from '../lib/api';
|
||||
import { trackProductClick } from '../lib/analytics';
|
||||
import { useStateFilter } from '../hooks/useStateFilter';
|
||||
import {
|
||||
Building2,
|
||||
MapPin,
|
||||
Package,
|
||||
DollarSign,
|
||||
RefreshCw,
|
||||
Search,
|
||||
TrendingUp,
|
||||
BarChart3,
|
||||
ChevronDown,
|
||||
} from 'lucide-react';
|
||||
|
||||
interface BrandData {
|
||||
@@ -25,19 +26,28 @@ interface BrandData {
|
||||
|
||||
export function IntelligenceBrands() {
|
||||
const navigate = useNavigate();
|
||||
const { selectedState, setSelectedState, stateParam, stateLabel, isAllStates } = useStateFilter();
|
||||
const [availableStates, setAvailableStates] = useState<string[]>([]);
|
||||
const [brands, setBrands] = useState<BrandData[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [searchTerm, setSearchTerm] = useState('');
|
||||
const [sortBy, setSortBy] = useState<'stores' | 'skus' | 'name'>('stores');
|
||||
const [sortBy, setSortBy] = useState<'stores' | 'skus' | 'name' | 'states'>('stores');
|
||||
|
||||
useEffect(() => {
|
||||
loadBrands();
|
||||
}, [stateParam]);
|
||||
|
||||
useEffect(() => {
|
||||
// Load available states
|
||||
api.getOrchestratorStates().then(data => {
|
||||
setAvailableStates(data.states?.map((s: any) => s.state) || []);
|
||||
}).catch(console.error);
|
||||
}, []);
|
||||
|
||||
const loadBrands = async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
const data = await api.getIntelligenceBrands({ limit: 500 });
|
||||
const data = await api.getIntelligenceBrands({ limit: 500, state: stateParam });
|
||||
setBrands(data.brands || []);
|
||||
} catch (error) {
|
||||
console.error('Failed to load brands:', error);
|
||||
@@ -58,6 +68,8 @@ export function IntelligenceBrands() {
|
||||
return b.skuCount - a.skuCount;
|
||||
case 'name':
|
||||
return a.brandName.localeCompare(b.brandName);
|
||||
case 'states':
|
||||
return b.states.length - a.states.length;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
@@ -89,35 +101,60 @@ export function IntelligenceBrands() {
|
||||
<Layout>
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex flex-col gap-4 sm:flex-row sm:items-center sm:justify-between">
|
||||
<div>
|
||||
<h1 className="text-2xl font-bold text-gray-900">Brands Intelligence</h1>
|
||||
<p className="text-sm text-gray-600 mt-1">
|
||||
Brand penetration and pricing analytics across markets
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/pricing')}
|
||||
className="btn btn-sm btn-outline gap-1"
|
||||
>
|
||||
<DollarSign className="w-4 h-4" />
|
||||
Pricing
|
||||
</button>
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/stores')}
|
||||
className="btn btn-sm btn-outline gap-1"
|
||||
>
|
||||
<MapPin className="w-4 h-4" />
|
||||
Stores
|
||||
</button>
|
||||
<button
|
||||
onClick={loadBrands}
|
||||
className="btn btn-sm btn-outline gap-2"
|
||||
>
|
||||
<RefreshCw className="w-4 h-4" />
|
||||
Refresh
|
||||
</button>
|
||||
<div className="flex flex-wrap gap-2 items-center">
|
||||
{/* State Selector */}
|
||||
<div className="dropdown dropdown-end">
|
||||
<button tabIndex={0} className="btn btn-sm gap-2 bg-emerald-50 border-emerald-200 hover:bg-emerald-100">
|
||||
{stateLabel}
|
||||
<ChevronDown className="w-4 h-4" />
|
||||
</button>
|
||||
<ul tabIndex={0} className="dropdown-content z-50 menu p-2 shadow-lg bg-white rounded-box w-44 max-h-60 overflow-y-auto border border-gray-200">
|
||||
<li>
|
||||
<a onClick={() => setSelectedState(null)} className={isAllStates ? 'active bg-emerald-100' : ''}>
|
||||
All States
|
||||
</a>
|
||||
</li>
|
||||
<div className="divider my-1"></div>
|
||||
{availableStates.map((state) => (
|
||||
<li key={state}>
|
||||
<a onClick={() => setSelectedState(state)} className={selectedState === state ? 'active bg-emerald-100' : ''}>
|
||||
{state}
|
||||
</a>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
{/* Page Navigation */}
|
||||
<div className="flex gap-1">
|
||||
<button
|
||||
className="btn btn-sm gap-1 bg-emerald-600 text-white hover:bg-emerald-700 border-emerald-600"
|
||||
>
|
||||
<Building2 className="w-4 h-4" />
|
||||
<span>Brands</span>
|
||||
</button>
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/stores')}
|
||||
className="btn btn-sm gap-1 bg-white border-gray-300 text-gray-700 hover:bg-gray-100"
|
||||
>
|
||||
<MapPin className="w-4 h-4" />
|
||||
<span>Stores</span>
|
||||
</button>
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/pricing')}
|
||||
className="btn btn-sm gap-1 bg-white border-gray-300 text-gray-700 hover:bg-gray-100"
|
||||
>
|
||||
<DollarSign className="w-4 h-4" />
|
||||
<span>Pricing</span>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -169,28 +206,32 @@ export function IntelligenceBrands() {
|
||||
|
||||
{/* Top Brands Chart */}
|
||||
<div className="bg-white rounded-lg border border-gray-200 p-4">
|
||||
<h3 className="text-lg font-semibold text-gray-900 mb-4 flex items-center gap-2">
|
||||
<BarChart3 className="w-5 h-5 text-blue-500" />
|
||||
<h3 className="text-lg font-semibold text-gray-900 flex items-center gap-2 mb-4">
|
||||
<BarChart3 className="w-5 h-5 text-emerald-500" />
|
||||
Top 10 Brands by Store Count
|
||||
</h3>
|
||||
<div className="space-y-2">
|
||||
{topBrands.map((brand, idx) => (
|
||||
<div key={brand.brandName} className="flex items-center gap-3">
|
||||
<span className="text-sm text-gray-500 w-6">{idx + 1}.</span>
|
||||
<span className="text-sm font-medium w-40 truncate" title={brand.brandName}>
|
||||
{brand.brandName}
|
||||
</span>
|
||||
<div className="flex-1 bg-gray-100 rounded-full h-4 relative">
|
||||
<div
|
||||
className="bg-blue-500 rounded-full h-4"
|
||||
style={{ width: `${(brand.storeCount / maxStoreCount) * 100}%` }}
|
||||
/>
|
||||
{topBrands.map((brand) => {
|
||||
const barWidth = Math.min((brand.storeCount / maxStoreCount) * 100, 100);
|
||||
return (
|
||||
<div key={brand.brandName} className="flex items-center gap-3">
|
||||
<span className="text-sm font-medium w-28 truncate shrink-0" title={brand.brandName}>
|
||||
{brand.brandName}
|
||||
</span>
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="bg-gray-100 rounded h-5 overflow-hidden">
|
||||
<div
|
||||
className="bg-gradient-to-r from-emerald-400 to-emerald-500 h-5 rounded transition-all"
|
||||
style={{ width: `${barWidth}%` }}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
<span className="text-sm font-mono font-semibold text-emerald-600 w-16 text-right shrink-0">
|
||||
{brand.storeCount}
|
||||
</span>
|
||||
</div>
|
||||
<span className="text-sm text-gray-600 w-16 text-right">
|
||||
{brand.storeCount} stores
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -213,6 +254,7 @@ export function IntelligenceBrands() {
|
||||
>
|
||||
<option value="stores">Sort by Stores</option>
|
||||
<option value="skus">Sort by SKUs</option>
|
||||
<option value="states">Sort by States</option>
|
||||
<option value="name">Sort by Name</option>
|
||||
</select>
|
||||
<span className="text-sm text-gray-500">
|
||||
|
||||
@@ -2,15 +2,16 @@ import { useEffect, useState } from 'react';
|
||||
import { useNavigate } from 'react-router-dom';
|
||||
import { Layout } from '../components/Layout';
|
||||
import { api } from '../lib/api';
|
||||
import { useStateFilter } from '../hooks/useStateFilter';
|
||||
import {
|
||||
DollarSign,
|
||||
Building2,
|
||||
MapPin,
|
||||
Package,
|
||||
RefreshCw,
|
||||
TrendingUp,
|
||||
TrendingDown,
|
||||
BarChart3,
|
||||
ChevronDown,
|
||||
} from 'lucide-react';
|
||||
|
||||
interface CategoryPricing {
|
||||
@@ -31,18 +32,27 @@ interface OverallPricing {
|
||||
|
||||
export function IntelligencePricing() {
|
||||
const navigate = useNavigate();
|
||||
const { selectedState, setSelectedState, stateParam, stateLabel, isAllStates } = useStateFilter();
|
||||
const [availableStates, setAvailableStates] = useState<string[]>([]);
|
||||
const [categories, setCategories] = useState<CategoryPricing[]>([]);
|
||||
const [overall, setOverall] = useState<OverallPricing | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
|
||||
useEffect(() => {
|
||||
loadPricing();
|
||||
}, [stateParam]);
|
||||
|
||||
useEffect(() => {
|
||||
// Load available states
|
||||
api.getOrchestratorStates().then(data => {
|
||||
setAvailableStates(data.states?.map((s: any) => s.state) || []);
|
||||
}).catch(console.error);
|
||||
}, []);
|
||||
|
||||
const loadPricing = async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
const data = await api.getIntelligencePricing();
|
||||
const data = await api.getIntelligencePricing({ state: stateParam });
|
||||
setCategories(data.byCategory || []);
|
||||
setOverall(data.overall || null);
|
||||
} catch (error) {
|
||||
@@ -76,35 +86,60 @@ export function IntelligencePricing() {
|
||||
<Layout>
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex flex-col gap-4 sm:flex-row sm:items-center sm:justify-between">
|
||||
<div>
|
||||
<h1 className="text-2xl font-bold text-gray-900">Pricing Intelligence</h1>
|
||||
<p className="text-sm text-gray-600 mt-1">
|
||||
Price distribution and trends by category
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/brands')}
|
||||
className="btn btn-sm btn-outline gap-1"
|
||||
>
|
||||
<Building2 className="w-4 h-4" />
|
||||
Brands
|
||||
</button>
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/stores')}
|
||||
className="btn btn-sm btn-outline gap-1"
|
||||
>
|
||||
<MapPin className="w-4 h-4" />
|
||||
Stores
|
||||
</button>
|
||||
<button
|
||||
onClick={loadPricing}
|
||||
className="btn btn-sm btn-outline gap-2"
|
||||
>
|
||||
<RefreshCw className="w-4 h-4" />
|
||||
Refresh
|
||||
</button>
|
||||
<div className="flex flex-wrap gap-2 items-center">
|
||||
{/* State Selector */}
|
||||
<div className="dropdown dropdown-end">
|
||||
<button tabIndex={0} className="btn btn-sm gap-2 bg-emerald-50 border-emerald-200 hover:bg-emerald-100">
|
||||
{stateLabel}
|
||||
<ChevronDown className="w-4 h-4" />
|
||||
</button>
|
||||
<ul tabIndex={0} className="dropdown-content z-50 menu p-2 shadow-lg bg-white rounded-box w-44 max-h-60 overflow-y-auto border border-gray-200">
|
||||
<li>
|
||||
<a onClick={() => setSelectedState(null)} className={isAllStates ? 'active bg-emerald-100' : ''}>
|
||||
All States
|
||||
</a>
|
||||
</li>
|
||||
<div className="divider my-1"></div>
|
||||
{availableStates.map((state) => (
|
||||
<li key={state}>
|
||||
<a onClick={() => setSelectedState(state)} className={selectedState === state ? 'active bg-emerald-100' : ''}>
|
||||
{state}
|
||||
</a>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
{/* Page Navigation */}
|
||||
<div className="flex gap-1">
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/brands')}
|
||||
className="btn btn-sm gap-1 bg-white border-gray-300 text-gray-700 hover:bg-gray-100"
|
||||
>
|
||||
<Building2 className="w-4 h-4" />
|
||||
<span>Brands</span>
|
||||
</button>
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/stores')}
|
||||
className="btn btn-sm gap-1 bg-white border-gray-300 text-gray-700 hover:bg-gray-100"
|
||||
>
|
||||
<MapPin className="w-4 h-4" />
|
||||
<span>Stores</span>
|
||||
</button>
|
||||
<button
|
||||
className="btn btn-sm gap-1 bg-emerald-600 text-white hover:bg-emerald-700 border-emerald-600"
|
||||
>
|
||||
<DollarSign className="w-4 h-4" />
|
||||
<span>Pricing</span>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -150,7 +185,7 @@ export function IntelligencePricing() {
|
||||
<div>
|
||||
<p className="text-sm text-gray-500">Products Priced</p>
|
||||
<p className="text-2xl font-bold">
|
||||
{overall.totalProducts.toLocaleString()}
|
||||
{(overall.totalProducts || 0).toLocaleString()}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
@@ -164,43 +199,29 @@ export function IntelligencePricing() {
|
||||
<BarChart3 className="w-5 h-5 text-green-500" />
|
||||
Average Price by Category
|
||||
</h3>
|
||||
<div className="space-y-3">
|
||||
{sortedCategories.map((cat) => (
|
||||
<div key={cat.category} className="flex items-center gap-3">
|
||||
<span className="text-sm font-medium w-32 truncate" title={cat.category}>
|
||||
{cat.category || 'Unknown'}
|
||||
</span>
|
||||
<div className="flex-1 relative">
|
||||
{/* Price range bar */}
|
||||
<div className="bg-gray-100 rounded-full h-6 relative">
|
||||
{/* Min-Max range */}
|
||||
<div
|
||||
className="absolute top-0 h-6 bg-blue-100 rounded-full"
|
||||
style={{
|
||||
left: `${(cat.minPrice / (overall?.maxPrice || 100)) * 100}%`,
|
||||
width: `${((cat.maxPrice - cat.minPrice) / (overall?.maxPrice || 100)) * 100}%`,
|
||||
}}
|
||||
/>
|
||||
{/* Average marker */}
|
||||
<div
|
||||
className="absolute top-0 h-6 w-1 bg-green-500 rounded"
|
||||
style={{ left: `${(cat.avgPrice / (overall?.maxPrice || 100)) * 100}%` }}
|
||||
/>
|
||||
<div className="space-y-2">
|
||||
{sortedCategories.slice(0, 12).map((cat) => {
|
||||
const maxPrice = Math.max(...sortedCategories.map(c => c.avgPrice || 0), 1);
|
||||
const barWidth = Math.min(((cat.avgPrice || 0) / maxPrice) * 100, 100);
|
||||
return (
|
||||
<div key={cat.category} className="flex items-center gap-3">
|
||||
<span className="text-sm font-medium w-28 truncate shrink-0" title={cat.category}>
|
||||
{cat.category || 'Unknown'}
|
||||
</span>
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="bg-gray-100 rounded h-5 overflow-hidden">
|
||||
<div
|
||||
className="bg-gradient-to-r from-emerald-400 to-emerald-500 h-5 rounded transition-all"
|
||||
style={{ width: `${barWidth}%` }}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex gap-4 text-xs w-48">
|
||||
<span className="text-gray-500">
|
||||
Min: <span className="text-blue-600 font-mono">{formatPrice(cat.minPrice)}</span>
|
||||
</span>
|
||||
<span className="text-gray-500">
|
||||
Avg: <span className="text-green-600 font-mono font-bold">{formatPrice(cat.avgPrice)}</span>
|
||||
</span>
|
||||
<span className="text-gray-500">
|
||||
Max: <span className="text-orange-600 font-mono">{formatPrice(cat.maxPrice)}</span>
|
||||
<span className="text-sm font-mono font-semibold text-emerald-600 w-16 text-right shrink-0">
|
||||
{formatPrice(cat.avgPrice)}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -236,7 +257,7 @@ export function IntelligencePricing() {
|
||||
<span className="font-medium">{cat.category || 'Unknown'}</span>
|
||||
</td>
|
||||
<td className="text-center">
|
||||
<span className="font-mono">{cat.productCount.toLocaleString()}</span>
|
||||
<span className="font-mono">{(cat.productCount || 0).toLocaleString()}</span>
|
||||
</td>
|
||||
<td className="text-right">
|
||||
<span className="font-mono text-blue-600">{formatPrice(cat.minPrice)}</span>
|
||||
|
||||
@@ -8,7 +8,6 @@ import {
|
||||
Building2,
|
||||
DollarSign,
|
||||
Package,
|
||||
RefreshCw,
|
||||
Search,
|
||||
Clock,
|
||||
Activity,
|
||||
@@ -34,12 +33,19 @@ export function IntelligenceStores() {
|
||||
const [stores, setStores] = useState<StoreActivity[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [searchTerm, setSearchTerm] = useState('');
|
||||
const [localStates, setLocalStates] = useState<string[]>([]);
|
||||
const [availableStates, setAvailableStates] = useState<string[]>([]);
|
||||
|
||||
useEffect(() => {
|
||||
loadStores();
|
||||
}, [selectedState]);
|
||||
|
||||
useEffect(() => {
|
||||
// Load available states from orchestrator API
|
||||
api.getOrchestratorStates().then(data => {
|
||||
setAvailableStates(data.states?.map((s: any) => s.state) || []);
|
||||
}).catch(console.error);
|
||||
}, []);
|
||||
|
||||
const loadStores = async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
@@ -48,10 +54,6 @@ export function IntelligenceStores() {
|
||||
limit: 500,
|
||||
});
|
||||
setStores(data.stores || []);
|
||||
|
||||
// Extract unique states from response for dropdown counts
|
||||
const uniqueStates = [...new Set(data.stores.map((s: StoreActivity) => s.state))].sort();
|
||||
setLocalStates(uniqueStates);
|
||||
} catch (error) {
|
||||
console.error('Failed to load stores:', error);
|
||||
} finally {
|
||||
@@ -97,47 +99,72 @@ export function IntelligenceStores() {
|
||||
);
|
||||
}
|
||||
|
||||
// Calculate stats
|
||||
const totalSKUs = stores.reduce((sum, s) => sum + s.skuCount, 0);
|
||||
const totalSnapshots = stores.reduce((sum, s) => sum + s.snapshotCount, 0);
|
||||
const avgFrequency = stores.filter(s => s.crawlFrequencyHours).length > 0
|
||||
? stores.filter(s => s.crawlFrequencyHours).reduce((sum, s) => sum + (s.crawlFrequencyHours || 0), 0) /
|
||||
stores.filter(s => s.crawlFrequencyHours).length
|
||||
// Calculate stats with null safety
|
||||
const totalSKUs = stores.reduce((sum, s) => sum + (s.skuCount || 0), 0);
|
||||
const totalSnapshots = stores.reduce((sum, s) => sum + (s.snapshotCount || 0), 0);
|
||||
const storesWithFrequency = stores.filter(s => s.crawlFrequencyHours != null);
|
||||
const avgFrequency = storesWithFrequency.length > 0
|
||||
? storesWithFrequency.reduce((sum, s) => sum + (s.crawlFrequencyHours || 0), 0) / storesWithFrequency.length
|
||||
: 0;
|
||||
|
||||
return (
|
||||
<Layout>
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex flex-col gap-4 sm:flex-row sm:items-center sm:justify-between">
|
||||
<div>
|
||||
<h1 className="text-2xl font-bold text-gray-900">Store Activity</h1>
|
||||
<p className="text-sm text-gray-600 mt-1">
|
||||
Per-store SKU counts, snapshots, and crawl frequency
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/brands')}
|
||||
className="btn btn-sm btn-outline gap-1"
|
||||
>
|
||||
<Building2 className="w-4 h-4" />
|
||||
Brands
|
||||
</button>
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/pricing')}
|
||||
className="btn btn-sm btn-outline gap-1"
|
||||
>
|
||||
<DollarSign className="w-4 h-4" />
|
||||
Pricing
|
||||
</button>
|
||||
<button
|
||||
onClick={loadStores}
|
||||
className="btn btn-sm btn-outline gap-2"
|
||||
>
|
||||
<RefreshCw className="w-4 h-4" />
|
||||
Refresh
|
||||
</button>
|
||||
<div className="flex flex-wrap gap-2 items-center">
|
||||
{/* State Selector */}
|
||||
<div className="dropdown dropdown-end">
|
||||
<button tabIndex={0} className="btn btn-sm gap-2 bg-emerald-50 border-emerald-200 hover:bg-emerald-100">
|
||||
{stateLabel}
|
||||
<ChevronDown className="w-4 h-4" />
|
||||
</button>
|
||||
<ul tabIndex={0} className="dropdown-content z-50 menu p-2 shadow-lg bg-white rounded-box w-44 max-h-60 overflow-y-auto border border-gray-200">
|
||||
<li>
|
||||
<a onClick={() => setSelectedState(null)} className={isAllStates ? 'active bg-emerald-100' : ''}>
|
||||
All States
|
||||
</a>
|
||||
</li>
|
||||
<div className="divider my-1"></div>
|
||||
{availableStates.map((state) => (
|
||||
<li key={state}>
|
||||
<a onClick={() => setSelectedState(state)} className={selectedState === state ? 'active bg-emerald-100' : ''}>
|
||||
{state}
|
||||
</a>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
{/* Page Navigation */}
|
||||
<div className="flex gap-1">
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/brands')}
|
||||
className="btn btn-sm gap-1 bg-white border-gray-300 text-gray-700 hover:bg-gray-100"
|
||||
>
|
||||
<Building2 className="w-4 h-4" />
|
||||
<span>Brands</span>
|
||||
</button>
|
||||
<button
|
||||
className="btn btn-sm gap-1 bg-emerald-600 text-white hover:bg-emerald-700 border-emerald-600"
|
||||
>
|
||||
<MapPin className="w-4 h-4" />
|
||||
<span>Stores</span>
|
||||
</button>
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/pricing')}
|
||||
className="btn btn-sm gap-1 bg-white border-gray-300 text-gray-700 hover:bg-gray-100"
|
||||
>
|
||||
<DollarSign className="w-4 h-4" />
|
||||
<span>Pricing</span>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -193,26 +220,6 @@ export function IntelligenceStores() {
|
||||
className="input input-bordered input-sm w-full pl-10"
|
||||
/>
|
||||
</div>
|
||||
<div className="dropdown">
|
||||
<button tabIndex={0} className="btn btn-sm btn-outline gap-2">
|
||||
{stateLabel}
|
||||
<ChevronDown className="w-4 h-4" />
|
||||
</button>
|
||||
<ul tabIndex={0} className="dropdown-content z-[1] menu p-2 shadow bg-base-100 rounded-box w-40 max-h-60 overflow-y-auto">
|
||||
<li>
|
||||
<a onClick={() => setSelectedState(null)} className={isAllStates ? 'active' : ''}>
|
||||
All States
|
||||
</a>
|
||||
</li>
|
||||
{localStates.map(state => (
|
||||
<li key={state}>
|
||||
<a onClick={() => setSelectedState(state)} className={selectedState === state ? 'active' : ''}>
|
||||
{state}
|
||||
</a>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
<span className="text-sm text-gray-500">
|
||||
Showing {filteredStores.length} of {stores.length} stores
|
||||
</span>
|
||||
@@ -246,7 +253,7 @@ export function IntelligenceStores() {
|
||||
<tr
|
||||
key={store.id}
|
||||
className="hover:bg-gray-50 cursor-pointer"
|
||||
onClick={() => navigate(`/admin/orchestrator/stores?storeId=${store.id}`)}
|
||||
onClick={() => navigate(`/stores/list/${store.id}`)}
|
||||
>
|
||||
<td>
|
||||
<span className="font-medium">{store.name}</span>
|
||||
@@ -262,10 +269,10 @@ export function IntelligenceStores() {
|
||||
)}
|
||||
</td>
|
||||
<td className="text-center">
|
||||
<span className="font-mono">{store.skuCount.toLocaleString()}</span>
|
||||
<span className="font-mono">{(store.skuCount || 0).toLocaleString()}</span>
|
||||
</td>
|
||||
<td className="text-center">
|
||||
<span className="font-mono">{store.snapshotCount.toLocaleString()}</span>
|
||||
<span className="font-mono">{(store.snapshotCount || 0).toLocaleString()}</span>
|
||||
</td>
|
||||
<td>
|
||||
<span className={store.lastCrawl ? 'text-green-600' : 'text-gray-400'}>
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -8,7 +8,6 @@
|
||||
import { useState, useEffect } from 'react';
|
||||
import { useNavigate } from 'react-router-dom';
|
||||
import { Layout } from '../components/Layout';
|
||||
import { StateBadge } from '../components/StateSelector';
|
||||
import { useStateStore } from '../store/stateStore';
|
||||
import { api } from '../lib/api';
|
||||
import {
|
||||
@@ -21,7 +20,6 @@ import {
|
||||
DollarSign,
|
||||
MapPin,
|
||||
ArrowRight,
|
||||
RefreshCw,
|
||||
AlertCircle
|
||||
} from 'lucide-react';
|
||||
|
||||
@@ -205,7 +203,6 @@ export default function NationalDashboard() {
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [summary, setSummary] = useState<NationalSummary | null>(null);
|
||||
const [refreshing, setRefreshing] = useState(false);
|
||||
|
||||
const fetchData = async () => {
|
||||
setLoading(true);
|
||||
@@ -230,18 +227,6 @@ export default function NationalDashboard() {
|
||||
fetchData();
|
||||
}, []);
|
||||
|
||||
const handleRefreshMetrics = async () => {
|
||||
setRefreshing(true);
|
||||
try {
|
||||
await api.post('/api/admin/states/refresh-metrics');
|
||||
await fetchData();
|
||||
} catch (err) {
|
||||
console.error('Failed to refresh metrics:', err);
|
||||
} finally {
|
||||
setRefreshing(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleStateClick = (stateCode: string) => {
|
||||
setSelectedState(stateCode);
|
||||
navigate(`/national/state/${stateCode}`);
|
||||
@@ -278,24 +263,11 @@ export default function NationalDashboard() {
|
||||
<Layout>
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<h1 className="text-2xl font-bold text-gray-900">National Dashboard</h1>
|
||||
<p className="text-gray-500 mt-1">
|
||||
Multi-state cannabis market intelligence
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-3">
|
||||
<StateBadge />
|
||||
<button
|
||||
onClick={handleRefreshMetrics}
|
||||
disabled={refreshing}
|
||||
className="flex items-center gap-2 px-3 py-2 text-sm text-gray-600 hover:text-gray-900 border border-gray-200 rounded-lg hover:bg-gray-50 disabled:opacity-50"
|
||||
>
|
||||
<RefreshCw className={`w-4 h-4 ${refreshing ? 'animate-spin' : ''}`} />
|
||||
Refresh Metrics
|
||||
</button>
|
||||
</div>
|
||||
<div>
|
||||
<h1 className="text-2xl font-bold text-gray-900">National Dashboard</h1>
|
||||
<p className="text-gray-500 mt-1">
|
||||
Multi-state cannabis market intelligence
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{/* Summary Cards */}
|
||||
@@ -303,7 +275,7 @@ export default function NationalDashboard() {
|
||||
<>
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4">
|
||||
<MetricCard
|
||||
title="Active States"
|
||||
title="Regions (US + CA)"
|
||||
value={summary.activeStates}
|
||||
icon={Globe}
|
||||
/>
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user