Compare commits
1 Commits
fix/api-se
...
feat/steal
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
74981fd399 |
@@ -1,3 +1,6 @@
|
|||||||
|
when:
|
||||||
|
- event: [push, pull_request]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
# ===========================================
|
# ===========================================
|
||||||
# PR VALIDATION: Parallel type checks (PRs only)
|
# PR VALIDATION: Parallel type checks (PRs only)
|
||||||
@@ -42,34 +45,8 @@ steps:
|
|||||||
when:
|
when:
|
||||||
event: pull_request
|
event: pull_request
|
||||||
|
|
||||||
# ===========================================
|
|
||||||
# AUTO-MERGE: Merge PR after all checks pass
|
|
||||||
# ===========================================
|
|
||||||
auto-merge:
|
|
||||||
image: alpine:latest
|
|
||||||
environment:
|
|
||||||
GITEA_TOKEN:
|
|
||||||
from_secret: gitea_token
|
|
||||||
commands:
|
|
||||||
- apk add --no-cache curl
|
|
||||||
- |
|
|
||||||
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
|
|
||||||
curl -s -X POST \
|
|
||||||
-H "Authorization: token $GITEA_TOKEN" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{"Do":"merge"}' \
|
|
||||||
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
|
||||||
depends_on:
|
|
||||||
- typecheck-backend
|
|
||||||
- typecheck-cannaiq
|
|
||||||
- typecheck-findadispo
|
|
||||||
- typecheck-findagram
|
|
||||||
when:
|
|
||||||
event: pull_request
|
|
||||||
|
|
||||||
# ===========================================
|
# ===========================================
|
||||||
# MASTER DEPLOY: Parallel Docker builds
|
# MASTER DEPLOY: Parallel Docker builds
|
||||||
# NOTE: cache_from/cache_to removed due to plugin bug splitting on commas
|
|
||||||
# ===========================================
|
# ===========================================
|
||||||
docker-backend:
|
docker-backend:
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
image: woodpeckerci/plugin-docker-buildx
|
||||||
@@ -88,10 +65,10 @@ steps:
|
|||||||
platforms: linux/amd64
|
platforms: linux/amd64
|
||||||
provenance: false
|
provenance: false
|
||||||
build_args:
|
build_args:
|
||||||
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
|
- APP_BUILD_VERSION=${CI_COMMIT_SHA}
|
||||||
APP_GIT_SHA: ${CI_COMMIT_SHA}
|
- APP_GIT_SHA=${CI_COMMIT_SHA}
|
||||||
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
|
- APP_BUILD_TIME=${CI_PIPELINE_CREATED}
|
||||||
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
|
- CONTAINER_IMAGE_TAG=${CI_COMMIT_SHA:0:8}
|
||||||
depends_on: []
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: master
|
||||||
@@ -161,7 +138,7 @@ steps:
|
|||||||
event: push
|
event: push
|
||||||
|
|
||||||
# ===========================================
|
# ===========================================
|
||||||
# STAGE 3: Deploy and Run Migrations
|
# STAGE 3: Deploy (after Docker builds)
|
||||||
# ===========================================
|
# ===========================================
|
||||||
deploy:
|
deploy:
|
||||||
image: bitnami/kubectl:latest
|
image: bitnami/kubectl:latest
|
||||||
@@ -172,17 +149,12 @@ steps:
|
|||||||
- mkdir -p ~/.kube
|
- mkdir -p ~/.kube
|
||||||
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
||||||
- chmod 600 ~/.kube/config
|
- chmod 600 ~/.kube/config
|
||||||
# Deploy backend first
|
|
||||||
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
|
||||||
# Note: Migrations run automatically at startup via auto-migrate
|
|
||||||
# Deploy remaining services
|
|
||||||
# Resilience: ensure workers are scaled up if at 0
|
|
||||||
- REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
|
|
||||||
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
|
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
||||||
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
||||||
depends_on:
|
depends_on:
|
||||||
- docker-backend
|
- docker-backend
|
||||||
@@ -1,191 +0,0 @@
|
|||||||
steps:
|
|
||||||
# ===========================================
|
|
||||||
# PR VALIDATION: Only typecheck changed projects
|
|
||||||
# ===========================================
|
|
||||||
typecheck-backend:
|
|
||||||
image: code.cannabrands.app/creationshop/node:20
|
|
||||||
commands:
|
|
||||||
- npm config set cache /npm-cache/backend --global
|
|
||||||
- cd backend
|
|
||||||
- npm ci --prefer-offline
|
|
||||||
- npx tsc --noEmit
|
|
||||||
volumes:
|
|
||||||
- npm-cache:/npm-cache
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
event: pull_request
|
|
||||||
path:
|
|
||||||
include: ['backend/**']
|
|
||||||
|
|
||||||
typecheck-cannaiq:
|
|
||||||
image: code.cannabrands.app/creationshop/node:20
|
|
||||||
commands:
|
|
||||||
- npm config set cache /npm-cache/cannaiq --global
|
|
||||||
- cd cannaiq
|
|
||||||
- npm ci --prefer-offline
|
|
||||||
- npx tsc --noEmit
|
|
||||||
volumes:
|
|
||||||
- npm-cache:/npm-cache
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
event: pull_request
|
|
||||||
path:
|
|
||||||
include: ['cannaiq/**']
|
|
||||||
|
|
||||||
# findadispo/findagram typechecks skipped - they have || true anyway
|
|
||||||
|
|
||||||
# ===========================================
|
|
||||||
# AUTO-MERGE: Merge PR after all checks pass
|
|
||||||
# ===========================================
|
|
||||||
auto-merge:
|
|
||||||
image: alpine:latest
|
|
||||||
environment:
|
|
||||||
GITEA_TOKEN:
|
|
||||||
from_secret: gitea_token
|
|
||||||
commands:
|
|
||||||
- apk add --no-cache curl
|
|
||||||
- |
|
|
||||||
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
|
|
||||||
curl -s -X POST \
|
|
||||||
-H "Authorization: token $GITEA_TOKEN" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{"Do":"merge"}' \
|
|
||||||
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
|
||||||
depends_on:
|
|
||||||
- typecheck-backend
|
|
||||||
- typecheck-cannaiq
|
|
||||||
when:
|
|
||||||
event: pull_request
|
|
||||||
|
|
||||||
# ===========================================
|
|
||||||
# MASTER DEPLOY: Parallel Docker builds
|
|
||||||
# ===========================================
|
|
||||||
docker-backend:
|
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
|
||||||
settings:
|
|
||||||
registry: code.cannabrands.app
|
|
||||||
repo: code.cannabrands.app/creationshop/dispensary-scraper
|
|
||||||
tags:
|
|
||||||
- latest
|
|
||||||
- ${CI_COMMIT_SHA:0:8}
|
|
||||||
dockerfile: backend/Dockerfile
|
|
||||||
context: backend
|
|
||||||
username:
|
|
||||||
from_secret: registry_username
|
|
||||||
password:
|
|
||||||
from_secret: registry_password
|
|
||||||
platforms: linux/amd64
|
|
||||||
provenance: false
|
|
||||||
cache_from: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache
|
|
||||||
cache_to: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache,mode=max
|
|
||||||
build_args:
|
|
||||||
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
|
|
||||||
APP_GIT_SHA: ${CI_COMMIT_SHA}
|
|
||||||
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
|
|
||||||
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
branch: master
|
|
||||||
event: push
|
|
||||||
|
|
||||||
docker-cannaiq:
|
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
|
||||||
settings:
|
|
||||||
registry: code.cannabrands.app
|
|
||||||
repo: code.cannabrands.app/creationshop/cannaiq-frontend
|
|
||||||
tags:
|
|
||||||
- latest
|
|
||||||
- ${CI_COMMIT_SHA:0:8}
|
|
||||||
dockerfile: cannaiq/Dockerfile
|
|
||||||
context: cannaiq
|
|
||||||
username:
|
|
||||||
from_secret: registry_username
|
|
||||||
password:
|
|
||||||
from_secret: registry_password
|
|
||||||
platforms: linux/amd64
|
|
||||||
provenance: false
|
|
||||||
cache_from: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache
|
|
||||||
cache_to: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache,mode=max
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
branch: master
|
|
||||||
event: push
|
|
||||||
|
|
||||||
docker-findadispo:
|
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
|
||||||
settings:
|
|
||||||
registry: code.cannabrands.app
|
|
||||||
repo: code.cannabrands.app/creationshop/findadispo-frontend
|
|
||||||
tags:
|
|
||||||
- latest
|
|
||||||
- ${CI_COMMIT_SHA:0:8}
|
|
||||||
dockerfile: findadispo/frontend/Dockerfile
|
|
||||||
context: findadispo/frontend
|
|
||||||
username:
|
|
||||||
from_secret: registry_username
|
|
||||||
password:
|
|
||||||
from_secret: registry_password
|
|
||||||
platforms: linux/amd64
|
|
||||||
provenance: false
|
|
||||||
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache
|
|
||||||
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache,mode=max
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
branch: master
|
|
||||||
event: push
|
|
||||||
|
|
||||||
docker-findagram:
|
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
|
||||||
settings:
|
|
||||||
registry: code.cannabrands.app
|
|
||||||
repo: code.cannabrands.app/creationshop/findagram-frontend
|
|
||||||
tags:
|
|
||||||
- latest
|
|
||||||
- ${CI_COMMIT_SHA:0:8}
|
|
||||||
dockerfile: findagram/frontend/Dockerfile
|
|
||||||
context: findagram/frontend
|
|
||||||
username:
|
|
||||||
from_secret: registry_username
|
|
||||||
password:
|
|
||||||
from_secret: registry_password
|
|
||||||
platforms: linux/amd64
|
|
||||||
provenance: false
|
|
||||||
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache
|
|
||||||
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache,mode=max
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
branch: master
|
|
||||||
event: push
|
|
||||||
|
|
||||||
# ===========================================
|
|
||||||
# STAGE 3: Deploy and Run Migrations
|
|
||||||
# ===========================================
|
|
||||||
deploy:
|
|
||||||
image: bitnami/kubectl:latest
|
|
||||||
environment:
|
|
||||||
KUBECONFIG_CONTENT:
|
|
||||||
from_secret: kubeconfig_data
|
|
||||||
commands:
|
|
||||||
- mkdir -p ~/.kube
|
|
||||||
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
|
||||||
- chmod 600 ~/.kube/config
|
|
||||||
# Deploy backend first
|
|
||||||
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
|
||||||
# Note: Migrations run automatically at startup via auto-migrate
|
|
||||||
# Deploy remaining services
|
|
||||||
# Resilience: ensure workers are scaled up if at 0
|
|
||||||
- REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
|
|
||||||
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
|
||||||
depends_on:
|
|
||||||
- docker-backend
|
|
||||||
- docker-cannaiq
|
|
||||||
- docker-findadispo
|
|
||||||
- docker-findagram
|
|
||||||
when:
|
|
||||||
branch: master
|
|
||||||
event: push
|
|
||||||
@@ -5,7 +5,7 @@ FROM code.cannabrands.app/creationshop/node:20-slim AS builder
|
|||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
RUN npm install
|
RUN npm ci
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
RUN npm run build
|
RUN npm run build
|
||||||
@@ -25,9 +25,8 @@ ENV APP_GIT_SHA=${APP_GIT_SHA}
|
|||||||
ENV APP_BUILD_TIME=${APP_BUILD_TIME}
|
ENV APP_BUILD_TIME=${APP_BUILD_TIME}
|
||||||
ENV CONTAINER_IMAGE_TAG=${CONTAINER_IMAGE_TAG}
|
ENV CONTAINER_IMAGE_TAG=${CONTAINER_IMAGE_TAG}
|
||||||
|
|
||||||
# Install Chromium dependencies and curl for HTTP requests
|
# Install Chromium dependencies
|
||||||
RUN apt-get update && apt-get install -y \
|
RUN apt-get update && apt-get install -y \
|
||||||
curl \
|
|
||||||
chromium \
|
chromium \
|
||||||
fonts-liberation \
|
fonts-liberation \
|
||||||
libnss3 \
|
libnss3 \
|
||||||
@@ -44,13 +43,10 @@ ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
|
|||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
RUN npm install --omit=dev
|
RUN npm ci --omit=dev
|
||||||
|
|
||||||
COPY --from=builder /app/dist ./dist
|
COPY --from=builder /app/dist ./dist
|
||||||
|
|
||||||
# Copy migrations for auto-migrate on startup
|
|
||||||
COPY migrations ./migrations
|
|
||||||
|
|
||||||
# Create local images directory for when MinIO is not configured
|
# Create local images directory for when MinIO is not configured
|
||||||
RUN mkdir -p /app/public/images/products
|
RUN mkdir -p /app/public/images/products
|
||||||
|
|
||||||
|
|||||||
@@ -1,175 +0,0 @@
|
|||||||
# API Security Documentation
|
|
||||||
|
|
||||||
This document describes the authentication and authorization configuration for all CannaiQ API endpoints.
|
|
||||||
|
|
||||||
## Authentication Methods
|
|
||||||
|
|
||||||
### 1. Trusted Origins (No Token Required)
|
|
||||||
|
|
||||||
Requests from trusted sources are automatically authenticated with `internal` role:
|
|
||||||
|
|
||||||
**Trusted IPs:**
|
|
||||||
- `127.0.0.1` (localhost IPv4)
|
|
||||||
- `::1` (localhost IPv6)
|
|
||||||
- `::ffff:127.0.0.1` (IPv4-mapped IPv6)
|
|
||||||
|
|
||||||
**Trusted Domains:**
|
|
||||||
- `https://cannaiq.co`
|
|
||||||
- `https://www.cannaiq.co`
|
|
||||||
- `https://findadispo.com`
|
|
||||||
- `https://www.findadispo.com`
|
|
||||||
- `https://findagram.co`
|
|
||||||
- `https://www.findagram.co`
|
|
||||||
- `http://localhost:3010`
|
|
||||||
- `http://localhost:8080`
|
|
||||||
- `http://localhost:5173`
|
|
||||||
|
|
||||||
**Trusted Patterns:**
|
|
||||||
- `*.cannabrands.app`
|
|
||||||
- `*.cannaiq.co`
|
|
||||||
|
|
||||||
**Internal Header:**
|
|
||||||
- `X-Internal-Request` header matching `INTERNAL_REQUEST_SECRET` env var
|
|
||||||
|
|
||||||
### 2. Bearer Token Authentication
|
|
||||||
|
|
||||||
External requests must include a valid token:
|
|
||||||
|
|
||||||
```
|
|
||||||
Authorization: Bearer <token>
|
|
||||||
```
|
|
||||||
|
|
||||||
**Token Types:**
|
|
||||||
- **JWT Token**: User session tokens (7-day expiry)
|
|
||||||
- **API Token**: Long-lived tokens for integrations (stored in `api_tokens` table)
|
|
||||||
|
|
||||||
## Authorization Levels
|
|
||||||
|
|
||||||
### Public (No Auth)
|
|
||||||
Routes accessible without authentication:
|
|
||||||
- `GET /health` - Health check
|
|
||||||
- `GET /api/health/*` - Comprehensive health endpoints
|
|
||||||
- `GET /outbound-ip` - Server's outbound IP
|
|
||||||
- `GET /api/v1/deals` - Public deals endpoint
|
|
||||||
|
|
||||||
### Authenticated (Trusted Origin or Token)
|
|
||||||
Routes requiring authentication but no specific role:
|
|
||||||
|
|
||||||
| Route | Description |
|
|
||||||
|-------|-------------|
|
|
||||||
| `/api/payloads/*` | Raw crawl payload access |
|
|
||||||
| `/api/workers/*` | Worker monitoring |
|
|
||||||
| `/api/worker-registry/*` | Worker registration and heartbeats |
|
|
||||||
| `/api/stores/*` | Store CRUD |
|
|
||||||
| `/api/products/*` | Product listing |
|
|
||||||
| `/api/dispensaries/*` | Dispensary data |
|
|
||||||
|
|
||||||
### Admin Only (Requires `admin` or `superadmin` role)
|
|
||||||
Routes restricted to administrators:
|
|
||||||
|
|
||||||
| Route | Description |
|
|
||||||
|-------|-------------|
|
|
||||||
| `/api/job-queue/*` | Job queue management |
|
|
||||||
| `/api/k8s/*` | Kubernetes control (scaling) |
|
|
||||||
| `/api/pipeline/*` | Pipeline stage transitions |
|
|
||||||
| `/api/tasks/*` | Task queue management |
|
|
||||||
| `/api/admin/orchestrator/*` | Orchestrator dashboard |
|
|
||||||
| `/api/admin/trusted-origins/*` | Manage trusted origins |
|
|
||||||
| `/api/admin/debug/*` | Debug endpoints |
|
|
||||||
|
|
||||||
**Note:** The `internal` role (localhost/trusted origins) bypasses role checks, granting automatic admin access for local development and internal services.
|
|
||||||
|
|
||||||
## Endpoint Security Matrix
|
|
||||||
|
|
||||||
| Endpoint Group | Auth Required | Role Required | Notes |
|
|
||||||
|----------------|---------------|---------------|-------|
|
|
||||||
| `/api/payloads/*` | Yes | None | Query API for raw crawl data |
|
|
||||||
| `/api/job-queue/*` | Yes | admin | Legacy job queue (deprecated) |
|
|
||||||
| `/api/workers/*` | Yes | None | Worker status monitoring |
|
|
||||||
| `/api/worker-registry/*` | Yes | None | Workers register via trusted IPs |
|
|
||||||
| `/api/k8s/*` | Yes | admin | K8s scaling controls |
|
|
||||||
| `/api/pipeline/*` | Yes | admin | Store pipeline transitions |
|
|
||||||
| `/api/tasks/*` | Yes | admin | Task queue CRUD |
|
|
||||||
| `/api/admin/orchestrator/*` | Yes | admin | Orchestrator metrics/alerts |
|
|
||||||
| `/api/admin/trusted-origins/*` | Yes | admin | Auth bypass management |
|
|
||||||
| `/api/v1/*` | Varies | Varies | Public API (per-endpoint) |
|
|
||||||
| `/api/consumer/*` | Varies | Varies | Consumer features |
|
|
||||||
|
|
||||||
## Implementation Details
|
|
||||||
|
|
||||||
### Middleware Stack
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
// Authentication middleware - validates token or trusted origin
|
|
||||||
import { authMiddleware } from '../auth/middleware';
|
|
||||||
|
|
||||||
// Role requirement middleware - checks user role
|
|
||||||
import { requireRole } from '../auth/middleware';
|
|
||||||
|
|
||||||
// Usage in route files:
|
|
||||||
router.use(authMiddleware); // All routes need auth
|
|
||||||
router.use(requireRole('admin', 'superadmin')); // Admin-only routes
|
|
||||||
```
|
|
||||||
|
|
||||||
### Auth Middleware Flow
|
|
||||||
|
|
||||||
```
|
|
||||||
Request → Check Bearer Token
|
|
||||||
├─ Valid JWT → Set user from token → Continue
|
|
||||||
├─ Valid API Token → Set user as api_token role → Continue
|
|
||||||
└─ No Token → Check Trusted Origin
|
|
||||||
├─ Trusted → Set user as internal role → Continue
|
|
||||||
└─ Not Trusted → 401 Unauthorized
|
|
||||||
```
|
|
||||||
|
|
||||||
### Role Check Flow
|
|
||||||
|
|
||||||
```
|
|
||||||
Request → authMiddleware → requireRole('admin')
|
|
||||||
├─ role === 'internal' → Continue (bypass)
|
|
||||||
├─ role in ['admin', 'superadmin'] → Continue
|
|
||||||
└─ else → 403 Forbidden
|
|
||||||
```
|
|
||||||
|
|
||||||
## Worker Pod Authentication
|
|
||||||
|
|
||||||
Worker pods (in Kubernetes) authenticate via:
|
|
||||||
|
|
||||||
1. **Internal IP**: Pods communicate via cluster IPs, which are trusted
|
|
||||||
2. **Internal Header**: Optional `X-Internal-Request` header for explicit trust
|
|
||||||
|
|
||||||
Endpoints used by workers:
|
|
||||||
- `POST /api/worker-registry/register` - Report for duty
|
|
||||||
- `POST /api/worker-registry/heartbeat` - Stay alive
|
|
||||||
- `POST /api/worker-registry/deregister` - Graceful shutdown
|
|
||||||
- `POST /api/worker-registry/task-completed` - Report task completion
|
|
||||||
|
|
||||||
## API Token Management
|
|
||||||
|
|
||||||
API tokens are managed via:
|
|
||||||
- `GET /api/api-tokens` - List tokens
|
|
||||||
- `POST /api/api-tokens` - Create token
|
|
||||||
- `DELETE /api/api-tokens/:id` - Revoke token
|
|
||||||
|
|
||||||
Token properties:
|
|
||||||
- `token`: The bearer token value
|
|
||||||
- `name`: Human-readable identifier
|
|
||||||
- `rate_limit`: Requests per minute
|
|
||||||
- `expires_at`: Optional expiration
|
|
||||||
- `active`: Enable/disable toggle
|
|
||||||
- `allowed_endpoints`: Optional endpoint restrictions
|
|
||||||
|
|
||||||
## Security Best Practices
|
|
||||||
|
|
||||||
1. **Never expose tokens in URLs** - Use Authorization header
|
|
||||||
2. **Use HTTPS in production** - All traffic encrypted
|
|
||||||
3. **Rotate API tokens periodically** - Set expiration dates
|
|
||||||
4. **Monitor rate limits** - Prevent abuse
|
|
||||||
5. **Audit access logs** - Track API usage via `api_usage_logs` table
|
|
||||||
|
|
||||||
## Related Files
|
|
||||||
|
|
||||||
- `src/auth/middleware.ts` - Auth middleware implementation
|
|
||||||
- `src/routes/api-tokens.ts` - Token management endpoints
|
|
||||||
- `src/middleware/apiTokenTracker.ts` - Usage tracking
|
|
||||||
- `src/middleware/trustedDomains.ts` - Domain trust markers
|
|
||||||
@@ -1,218 +0,0 @@
|
|||||||
# CannaiQ Backend Codebase Map
|
|
||||||
|
|
||||||
**Last Updated:** 2025-12-12
|
|
||||||
**Purpose:** Help Claude and developers understand which code is current vs deprecated
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Quick Reference: What to Use
|
|
||||||
|
|
||||||
### For Crawling/Scraping
|
|
||||||
| Task | Use This | NOT This |
|
|
||||||
|------|----------|----------|
|
|
||||||
| Fetch products | `src/tasks/handlers/payload-fetch.ts` | `src/hydration/*` |
|
|
||||||
| Process products | `src/tasks/handlers/product-refresh.ts` | `src/scraper-v2/*` |
|
|
||||||
| GraphQL client | `src/platforms/dutchie/client.ts` | `src/dutchie-az/services/graphql-client.ts` |
|
|
||||||
| Worker system | `src/tasks/task-worker.ts` | `src/dutchie-az/services/worker.ts` |
|
|
||||||
|
|
||||||
### For Database
|
|
||||||
| Task | Use This | NOT This |
|
|
||||||
|------|----------|----------|
|
|
||||||
| Get DB pool | `src/db/pool.ts` | `src/dutchie-az/db/connection.ts` |
|
|
||||||
| Run migrations | `src/db/migrate.ts` (CLI only) | Never import at runtime |
|
|
||||||
| Query products | `store_products` table | `products`, `dutchie_products` |
|
|
||||||
| Query stores | `dispensaries` table | `stores` table |
|
|
||||||
|
|
||||||
### For Discovery
|
|
||||||
| Task | Use This |
|
|
||||||
|------|----------|
|
|
||||||
| Discover stores | `src/discovery/*.ts` |
|
|
||||||
| Run discovery | `npx tsx src/scripts/run-discovery.ts` |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Directory Status
|
|
||||||
|
|
||||||
### ACTIVE DIRECTORIES (Use These)
|
|
||||||
|
|
||||||
```
|
|
||||||
src/
|
|
||||||
├── auth/ # JWT/session auth, middleware
|
|
||||||
├── db/ # Database pool, migrations
|
|
||||||
├── discovery/ # Dutchie store discovery pipeline
|
|
||||||
├── middleware/ # Express middleware
|
|
||||||
├── multi-state/ # Multi-state query support
|
|
||||||
├── platforms/ # Platform-specific clients (Dutchie, Jane, etc)
|
|
||||||
│ └── dutchie/ # THE Dutchie client - use this one
|
|
||||||
├── routes/ # Express API routes
|
|
||||||
├── services/ # Core services (logger, scheduler, etc)
|
|
||||||
├── tasks/ # Task system (workers, handlers, scheduler)
|
|
||||||
│ └── handlers/ # Task handlers (payload_fetch, product_refresh, etc)
|
|
||||||
├── types/ # TypeScript types
|
|
||||||
└── utils/ # Utilities (storage, image processing)
|
|
||||||
```
|
|
||||||
|
|
||||||
### DEPRECATED DIRECTORIES (DO NOT USE)
|
|
||||||
|
|
||||||
```
|
|
||||||
src/
|
|
||||||
├── hydration/ # DEPRECATED - Old pipeline approach
|
|
||||||
├── scraper-v2/ # DEPRECATED - Old scraper engine
|
|
||||||
├── canonical-hydration/# DEPRECATED - Merged into tasks/handlers
|
|
||||||
├── dutchie-az/ # PARTIAL - Some parts deprecated, some active
|
|
||||||
│ ├── db/ # DEPRECATED - Use src/db/pool.ts
|
|
||||||
│ └── services/ # PARTIAL - worker.ts still runs, graphql-client.ts deprecated
|
|
||||||
├── portals/ # FUTURE - Not yet implemented
|
|
||||||
├── seo/ # PARTIAL - Settings work, templates WIP
|
|
||||||
└── system/ # DEPRECATED - Old orchestration system
|
|
||||||
```
|
|
||||||
|
|
||||||
### DEPRECATED FILES (DO NOT USE)
|
|
||||||
|
|
||||||
```
|
|
||||||
src/dutchie-az/db/connection.ts # Use src/db/pool.ts instead
|
|
||||||
src/dutchie-az/services/graphql-client.ts # Use src/platforms/dutchie/client.ts
|
|
||||||
src/hydration/*.ts # Entire directory deprecated
|
|
||||||
src/scraper-v2/*.ts # Entire directory deprecated
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Key Files Reference
|
|
||||||
|
|
||||||
### Entry Points
|
|
||||||
| File | Purpose | Status |
|
|
||||||
|------|---------|--------|
|
|
||||||
| `src/index.ts` | Main Express server | ACTIVE |
|
|
||||||
| `src/dutchie-az/services/worker.ts` | Worker process entry | ACTIVE |
|
|
||||||
| `src/tasks/task-worker.ts` | Task worker (new system) | ACTIVE |
|
|
||||||
|
|
||||||
### Dutchie Integration
|
|
||||||
| File | Purpose | Status |
|
|
||||||
|------|---------|--------|
|
|
||||||
| `src/platforms/dutchie/client.ts` | GraphQL client, hashes, curl | **PRIMARY** |
|
|
||||||
| `src/platforms/dutchie/queries.ts` | High-level query functions | ACTIVE |
|
|
||||||
| `src/platforms/dutchie/index.ts` | Re-exports | ACTIVE |
|
|
||||||
|
|
||||||
### Task Handlers
|
|
||||||
| File | Purpose | Status |
|
|
||||||
|------|---------|--------|
|
|
||||||
| `src/tasks/handlers/payload-fetch.ts` | Fetch products from Dutchie | **PRIMARY** |
|
|
||||||
| `src/tasks/handlers/product-refresh.ts` | Process payload into DB | **PRIMARY** |
|
|
||||||
| `src/tasks/handlers/menu-detection.ts` | Detect menu type | ACTIVE |
|
|
||||||
| `src/tasks/handlers/id-resolution.ts` | Resolve platform IDs | ACTIVE |
|
|
||||||
| `src/tasks/handlers/image-download.ts` | Download product images | ACTIVE |
|
|
||||||
|
|
||||||
### Database
|
|
||||||
| File | Purpose | Status |
|
|
||||||
|------|---------|--------|
|
|
||||||
| `src/db/pool.ts` | Canonical DB pool | **PRIMARY** |
|
|
||||||
| `src/db/migrate.ts` | Migration runner (CLI only) | CLI ONLY |
|
|
||||||
| `src/db/auto-migrate.ts` | Auto-run migrations on startup | ACTIVE |
|
|
||||||
|
|
||||||
### Configuration
|
|
||||||
| File | Purpose | Status |
|
|
||||||
|------|---------|--------|
|
|
||||||
| `.env` | Environment variables | ACTIVE |
|
|
||||||
| `package.json` | Dependencies | ACTIVE |
|
|
||||||
| `tsconfig.json` | TypeScript config | ACTIVE |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## GraphQL Hashes (CRITICAL)
|
|
||||||
|
|
||||||
The correct hashes are in `src/platforms/dutchie/client.ts`:
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
export const GRAPHQL_HASHES = {
|
|
||||||
FilteredProducts: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0',
|
|
||||||
GetAddressBasedDispensaryData: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b',
|
|
||||||
ConsumerDispensaries: '0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b',
|
|
||||||
GetAllCitiesByState: 'ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6',
|
|
||||||
};
|
|
||||||
```
|
|
||||||
|
|
||||||
**ALWAYS** use `Status: 'Active'` for FilteredProducts (not `null` or `'All'`).
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Scripts Reference
|
|
||||||
|
|
||||||
### Useful Scripts (in `src/scripts/`)
|
|
||||||
| Script | Purpose |
|
|
||||||
|--------|---------|
|
|
||||||
| `run-discovery.ts` | Run Dutchie discovery |
|
|
||||||
| `crawl-single-store.ts` | Test crawl a single store |
|
|
||||||
| `test-dutchie-graphql.ts` | Test GraphQL queries |
|
|
||||||
|
|
||||||
### One-Off Scripts (probably don't need)
|
|
||||||
| Script | Purpose |
|
|
||||||
|--------|---------|
|
|
||||||
| `harmonize-az-dispensaries.ts` | One-time data cleanup |
|
|
||||||
| `bootstrap-stores-for-dispensaries.ts` | One-time migration |
|
|
||||||
| `backfill-*.ts` | Historical backfill scripts |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## API Routes
|
|
||||||
|
|
||||||
### Active Routes (in `src/routes/`)
|
|
||||||
| Route File | Mount Point | Purpose |
|
|
||||||
|------------|-------------|---------|
|
|
||||||
| `auth.ts` | `/api/auth` | Login/logout/session |
|
|
||||||
| `stores.ts` | `/api/stores` | Store CRUD |
|
|
||||||
| `dashboard.ts` | `/api/dashboard` | Dashboard stats |
|
|
||||||
| `workers.ts` | `/api/workers` | Worker monitoring |
|
|
||||||
| `pipeline.ts` | `/api/pipeline` | Crawl triggers |
|
|
||||||
| `discovery.ts` | `/api/discovery` | Discovery management |
|
|
||||||
| `analytics.ts` | `/api/analytics` | Analytics queries |
|
|
||||||
| `wordpress.ts` | `/api/v1/wordpress` | WordPress plugin API |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Documentation Files
|
|
||||||
|
|
||||||
### Current Docs (in `backend/docs/`)
|
|
||||||
| Doc | Purpose | Currency |
|
|
||||||
|-----|---------|----------|
|
|
||||||
| `TASK_WORKFLOW_2024-12-10.md` | Task system architecture | CURRENT |
|
|
||||||
| `WORKER_TASK_ARCHITECTURE.md` | Worker/task design | CURRENT |
|
|
||||||
| `CRAWL_PIPELINE.md` | Crawl pipeline overview | CURRENT |
|
|
||||||
| `ORGANIC_SCRAPING_GUIDE.md` | Browser-based scraping | CURRENT |
|
|
||||||
| `CODEBASE_MAP.md` | This file | CURRENT |
|
|
||||||
| `ANALYTICS_V2_EXAMPLES.md` | Analytics API examples | CURRENT |
|
|
||||||
| `BRAND_INTELLIGENCE_API.md` | Brand API docs | CURRENT |
|
|
||||||
|
|
||||||
### Root Docs
|
|
||||||
| Doc | Purpose | Currency |
|
|
||||||
|-----|---------|----------|
|
|
||||||
| `CLAUDE.md` | Claude instructions | **PRIMARY** |
|
|
||||||
| `README.md` | Project overview | NEEDS UPDATE |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Common Mistakes to Avoid
|
|
||||||
|
|
||||||
1. **Don't use `src/hydration/`** - It's an old approach that was superseded by the task system
|
|
||||||
|
|
||||||
2. **Don't use `src/dutchie-az/db/connection.ts`** - Use `src/db/pool.ts` instead
|
|
||||||
|
|
||||||
3. **Don't import `src/db/migrate.ts` at runtime** - It will crash. Only use for CLI migrations.
|
|
||||||
|
|
||||||
4. **Don't query `stores` table** - It's empty. Use `dispensaries`.
|
|
||||||
|
|
||||||
5. **Don't query `products` table** - It's empty. Use `store_products`.
|
|
||||||
|
|
||||||
6. **Don't use wrong GraphQL hash** - Always get hash from `GRAPHQL_HASHES` in client.ts
|
|
||||||
|
|
||||||
7. **Don't use `Status: null`** - It returns 0 products. Use `Status: 'Active'`.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## When in Doubt
|
|
||||||
|
|
||||||
1. Check if the file is imported in `src/index.ts` - if not, it may be deprecated
|
|
||||||
2. Check the last modified date - older files may be stale
|
|
||||||
3. Look for `DEPRECATED` comments in the code
|
|
||||||
4. Ask: "Is there a newer version of this in `src/tasks/` or `src/platforms/`?"
|
|
||||||
5. Read the relevant doc in `docs/` before modifying code
|
|
||||||
@@ -500,18 +500,17 @@ CREATE TABLE proxies (
|
|||||||
|
|
||||||
Proxies are mandatory. There is no environment variable to disable them. Workers will refuse to start without active proxies in the database.
|
Proxies are mandatory. There is no environment variable to disable them. Workers will refuse to start without active proxies in the database.
|
||||||
|
|
||||||
### User-Agent Generation
|
### Fingerprints Available
|
||||||
|
|
||||||
See `workflow-12102025.md` for full specification.
|
The client includes 6 browser fingerprints:
|
||||||
|
- Chrome 131 on Windows
|
||||||
|
- Chrome 131 on macOS
|
||||||
|
- Chrome 120 on Windows
|
||||||
|
- Firefox 133 on Windows
|
||||||
|
- Safari 17.2 on macOS
|
||||||
|
- Edge 131 on Windows
|
||||||
|
|
||||||
**Summary:**
|
Each includes proper `sec-ch-ua`, `sec-ch-ua-platform`, and `sec-ch-ua-mobile` headers.
|
||||||
- Uses `intoli/user-agents` library (daily-updated market share data)
|
|
||||||
- Device distribution: Mobile 62%, Desktop 36%, Tablet 2%
|
|
||||||
- Browser whitelist: Chrome, Safari, Edge, Firefox only
|
|
||||||
- UA sticks until IP rotates (403 or manual rotation)
|
|
||||||
- Failure = alert admin + stop crawl (no fallback)
|
|
||||||
|
|
||||||
Each fingerprint includes proper `sec-ch-ua`, `sec-ch-ua-platform`, and `sec-ch-ua-mobile` headers.
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -362,148 +362,6 @@ SET status = 'pending', retry_count = retry_count + 1
|
|||||||
WHERE status = 'failed' AND retry_count < max_retries;
|
WHERE status = 'failed' AND retry_count < max_retries;
|
||||||
```
|
```
|
||||||
|
|
||||||
## Concurrent Task Processing (Added 2024-12)
|
|
||||||
|
|
||||||
Workers can now process multiple tasks concurrently within a single worker instance. This improves throughput by utilizing async I/O efficiently.
|
|
||||||
|
|
||||||
### Architecture
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────────────────────────────────────────────┐
|
|
||||||
│ Pod (K8s) │
|
|
||||||
│ │
|
|
||||||
│ ┌─────────────────────────────────────────────────────┐ │
|
|
||||||
│ │ TaskWorker │ │
|
|
||||||
│ │ │ │
|
|
||||||
│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │
|
|
||||||
│ │ │ Task 1 │ │ Task 2 │ │ Task 3 │ (concurrent)│ │
|
|
||||||
│ │ └─────────┘ └─────────┘ └─────────┘ │ │
|
|
||||||
│ │ │ │
|
|
||||||
│ │ Resource Monitor │ │
|
|
||||||
│ │ ├── Memory: 65% (threshold: 85%) │ │
|
|
||||||
│ │ ├── CPU: 45% (threshold: 90%) │ │
|
|
||||||
│ │ └── Status: Normal │ │
|
|
||||||
│ └─────────────────────────────────────────────────────┘ │
|
|
||||||
└─────────────────────────────────────────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
### Environment Variables
|
|
||||||
|
|
||||||
| Variable | Default | Description |
|
|
||||||
|----------|---------|-------------|
|
|
||||||
| `MAX_CONCURRENT_TASKS` | 3 | Maximum tasks a worker will run concurrently |
|
|
||||||
| `MEMORY_BACKOFF_THRESHOLD` | 0.85 | Back off when heap memory exceeds 85% |
|
|
||||||
| `CPU_BACKOFF_THRESHOLD` | 0.90 | Back off when CPU exceeds 90% |
|
|
||||||
| `BACKOFF_DURATION_MS` | 10000 | How long to wait when backing off (10s) |
|
|
||||||
|
|
||||||
### How It Works
|
|
||||||
|
|
||||||
1. **Main Loop**: Worker continuously tries to fill up to `MAX_CONCURRENT_TASKS`
|
|
||||||
2. **Resource Monitoring**: Before claiming a new task, worker checks memory and CPU
|
|
||||||
3. **Backoff**: If resources exceed thresholds, worker pauses and stops claiming new tasks
|
|
||||||
4. **Concurrent Execution**: Tasks run in parallel using `Promise` - they don't block each other
|
|
||||||
5. **Graceful Shutdown**: On SIGTERM/decommission, worker stops claiming but waits for active tasks
|
|
||||||
|
|
||||||
### Resource Monitoring
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
// ResourceStats interface
|
|
||||||
interface ResourceStats {
|
|
||||||
memoryPercent: number; // Current heap usage as decimal (0.0-1.0)
|
|
||||||
memoryMb: number; // Current heap used in MB
|
|
||||||
memoryTotalMb: number; // Total heap available in MB
|
|
||||||
cpuPercent: number; // CPU usage as percentage (0-100)
|
|
||||||
isBackingOff: boolean; // True if worker is in backoff state
|
|
||||||
backoffReason: string; // Why the worker is backing off
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Heartbeat Data
|
|
||||||
|
|
||||||
Workers report the following in their heartbeat:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"worker_id": "worker-abc123",
|
|
||||||
"current_task_id": 456,
|
|
||||||
"current_task_ids": [456, 457, 458],
|
|
||||||
"active_task_count": 3,
|
|
||||||
"max_concurrent_tasks": 3,
|
|
||||||
"status": "active",
|
|
||||||
"resources": {
|
|
||||||
"memory_mb": 256,
|
|
||||||
"memory_total_mb": 512,
|
|
||||||
"memory_rss_mb": 320,
|
|
||||||
"memory_percent": 50,
|
|
||||||
"cpu_user_ms": 12500,
|
|
||||||
"cpu_system_ms": 3200,
|
|
||||||
"cpu_percent": 45,
|
|
||||||
"is_backing_off": false,
|
|
||||||
"backoff_reason": null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Backoff Behavior
|
|
||||||
|
|
||||||
When resources exceed thresholds:
|
|
||||||
|
|
||||||
1. Worker logs the backoff reason:
|
|
||||||
```
|
|
||||||
[TaskWorker] MyWorker backing off: Memory at 87.3% (threshold: 85%)
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Worker stops claiming new tasks but continues existing tasks
|
|
||||||
|
|
||||||
3. After `BACKOFF_DURATION_MS`, worker rechecks resources
|
|
||||||
|
|
||||||
4. When resources return to normal:
|
|
||||||
```
|
|
||||||
[TaskWorker] MyWorker resuming normal operation
|
|
||||||
```
|
|
||||||
|
|
||||||
### UI Display
|
|
||||||
|
|
||||||
The Workers Dashboard shows:
|
|
||||||
|
|
||||||
- **Tasks Column**: `2/3 tasks` (active/max concurrent)
|
|
||||||
- **Resources Column**: Memory % and CPU % with color coding
|
|
||||||
- Green: < 50%
|
|
||||||
- Yellow: 50-74%
|
|
||||||
- Amber: 75-89%
|
|
||||||
- Red: 90%+
|
|
||||||
- **Backing Off**: Orange warning badge when worker is in backoff state
|
|
||||||
|
|
||||||
### Task Count Badge Details
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────────────────────────────┐
|
|
||||||
│ Worker: "MyWorker" │
|
|
||||||
│ Tasks: 2/3 tasks #456, #457 │
|
|
||||||
│ Resources: 🧠 65% 💻 45% │
|
|
||||||
│ Status: ● Active │
|
|
||||||
└─────────────────────────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
### Best Practices
|
|
||||||
|
|
||||||
1. **Start Conservative**: Use `MAX_CONCURRENT_TASKS=3` initially
|
|
||||||
2. **Monitor Resources**: Watch for frequent backoffs in logs
|
|
||||||
3. **Tune Per Workload**: I/O-bound tasks benefit from higher concurrency
|
|
||||||
4. **Scale Horizontally**: Add more pods rather than cranking concurrency too high
|
|
||||||
|
|
||||||
### Code References
|
|
||||||
|
|
||||||
| File | Purpose |
|
|
||||||
|------|---------|
|
|
||||||
| `src/tasks/task-worker.ts:68-71` | Concurrency environment variables |
|
|
||||||
| `src/tasks/task-worker.ts:104-111` | ResourceStats interface |
|
|
||||||
| `src/tasks/task-worker.ts:149-179` | getResourceStats() method |
|
|
||||||
| `src/tasks/task-worker.ts:184-196` | shouldBackOff() method |
|
|
||||||
| `src/tasks/task-worker.ts:462-516` | mainLoop() with concurrent claiming |
|
|
||||||
| `src/routes/worker-registry.ts:148-195` | Heartbeat endpoint handling |
|
|
||||||
| `cannaiq/src/pages/WorkersDashboard.tsx:233-305` | UI components for resources |
|
|
||||||
|
|
||||||
## Monitoring
|
## Monitoring
|
||||||
|
|
||||||
### Logs
|
### Logs
|
||||||
@@ -1,394 +0,0 @@
|
|||||||
# Brand Intelligence API
|
|
||||||
|
|
||||||
## Endpoint
|
|
||||||
|
|
||||||
```
|
|
||||||
GET /api/analytics/v2/brand/:name/intelligence
|
|
||||||
```
|
|
||||||
|
|
||||||
## Query Parameters
|
|
||||||
|
|
||||||
| Param | Type | Default | Description |
|
|
||||||
|-------|------|---------|-------------|
|
|
||||||
| `window` | `7d\|30d\|90d` | `30d` | Time window for trend calculations |
|
|
||||||
| `state` | string | - | Filter by state code (e.g., `AZ`) |
|
|
||||||
| `category` | string | - | Filter by category (e.g., `Flower`) |
|
|
||||||
|
|
||||||
## Response Payload Schema
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
interface BrandIntelligenceResult {
|
|
||||||
brand_name: string;
|
|
||||||
window: '7d' | '30d' | '90d';
|
|
||||||
generated_at: string; // ISO timestamp when data was computed
|
|
||||||
|
|
||||||
performance_snapshot: PerformanceSnapshot;
|
|
||||||
alerts: Alerts;
|
|
||||||
sku_performance: SkuPerformance[];
|
|
||||||
retail_footprint: RetailFootprint;
|
|
||||||
competitive_landscape: CompetitiveLandscape;
|
|
||||||
inventory_health: InventoryHealth;
|
|
||||||
promo_performance: PromoPerformance;
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Section 1: Performance Snapshot
|
|
||||||
|
|
||||||
Summary cards with key brand metrics.
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
interface PerformanceSnapshot {
|
|
||||||
active_skus: number; // Total products in catalog
|
|
||||||
total_revenue_30d: number | null; // Estimated from qty × price
|
|
||||||
total_stores: number; // Active retail partners
|
|
||||||
new_stores_30d: number; // New distribution in window
|
|
||||||
market_share: number | null; // % of category SKUs
|
|
||||||
avg_wholesale_price: number | null;
|
|
||||||
price_position: 'premium' | 'value' | 'competitive';
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**UI Label Mapping:**
|
|
||||||
| Field | User-Facing Label | Helper Text |
|
|
||||||
|-------|-------------------|-------------|
|
|
||||||
| `active_skus` | Active Products | X total in catalog |
|
|
||||||
| `total_revenue_30d` | Monthly Revenue | Estimated from sales |
|
|
||||||
| `total_stores` | Retail Distribution | Active retail partners |
|
|
||||||
| `new_stores_30d` | New Opportunities | X new in last 30 days |
|
|
||||||
| `market_share` | Category Position | % of category |
|
|
||||||
| `avg_wholesale_price` | Avg Wholesale | Per unit |
|
|
||||||
| `price_position` | Pricing Tier | Premium/Value/Market Rate |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Section 2: Alerts
|
|
||||||
|
|
||||||
Issues requiring attention.
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
interface Alerts {
|
|
||||||
lost_stores_30d_count: number;
|
|
||||||
lost_skus_30d_count: number;
|
|
||||||
competitor_takeover_count: number;
|
|
||||||
avg_oos_duration_days: number | null;
|
|
||||||
avg_reorder_lag_days: number | null;
|
|
||||||
items: AlertItem[];
|
|
||||||
}
|
|
||||||
|
|
||||||
interface AlertItem {
|
|
||||||
type: 'lost_store' | 'delisted_sku' | 'shelf_loss' | 'extended_oos';
|
|
||||||
severity: 'critical' | 'warning';
|
|
||||||
store_name?: string;
|
|
||||||
product_name?: string;
|
|
||||||
competitor_brand?: string;
|
|
||||||
days_since?: number;
|
|
||||||
state_code?: string;
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**UI Label Mapping:**
|
|
||||||
| Field | User-Facing Label |
|
|
||||||
|-------|-------------------|
|
|
||||||
| `lost_stores_30d_count` | Accounts at Risk |
|
|
||||||
| `lost_skus_30d_count` | Delisted SKUs |
|
|
||||||
| `competitor_takeover_count` | Shelf Losses |
|
|
||||||
| `avg_oos_duration_days` | Avg Stockout Length |
|
|
||||||
| `avg_reorder_lag_days` | Avg Restock Time |
|
|
||||||
| `severity: critical` | Urgent |
|
|
||||||
| `severity: warning` | Watch |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Section 3: SKU Performance (Product Velocity)
|
|
||||||
|
|
||||||
How fast each SKU sells.
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
interface SkuPerformance {
|
|
||||||
store_product_id: number;
|
|
||||||
product_name: string;
|
|
||||||
category: string | null;
|
|
||||||
daily_velocity: number; // Units/day estimate
|
|
||||||
velocity_status: 'hot' | 'steady' | 'slow' | 'stale';
|
|
||||||
retail_price: number | null;
|
|
||||||
on_sale: boolean;
|
|
||||||
stores_carrying: number;
|
|
||||||
stock_status: 'in_stock' | 'low_stock' | 'out_of_stock';
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**UI Label Mapping:**
|
|
||||||
| Field | User-Facing Label |
|
|
||||||
|-------|-------------------|
|
|
||||||
| `daily_velocity` | Daily Rate |
|
|
||||||
| `velocity_status` | Momentum |
|
|
||||||
| `velocity_status: hot` | Hot |
|
|
||||||
| `velocity_status: steady` | Steady |
|
|
||||||
| `velocity_status: slow` | Slow |
|
|
||||||
| `velocity_status: stale` | Stale |
|
|
||||||
| `retail_price` | Retail Price |
|
|
||||||
| `on_sale` | Promo (badge) |
|
|
||||||
|
|
||||||
**Velocity Thresholds:**
|
|
||||||
- `hot`: >= 5 units/day
|
|
||||||
- `steady`: >= 1 unit/day
|
|
||||||
- `slow`: >= 0.1 units/day
|
|
||||||
- `stale`: < 0.1 units/day
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Section 4: Retail Footprint
|
|
||||||
|
|
||||||
Store placement and coverage.
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
interface RetailFootprint {
|
|
||||||
total_stores: number;
|
|
||||||
in_stock_count: number;
|
|
||||||
out_of_stock_count: number;
|
|
||||||
penetration_by_region: RegionPenetration[];
|
|
||||||
whitespace_stores: WhitespaceStore[];
|
|
||||||
}
|
|
||||||
|
|
||||||
interface RegionPenetration {
|
|
||||||
state_code: string;
|
|
||||||
store_count: number;
|
|
||||||
percent_reached: number; // % of state's dispensaries
|
|
||||||
in_stock: number;
|
|
||||||
out_of_stock: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface WhitespaceStore {
|
|
||||||
store_id: number;
|
|
||||||
store_name: string;
|
|
||||||
state_code: string;
|
|
||||||
city: string | null;
|
|
||||||
category_fit: number; // How many competing brands they carry
|
|
||||||
competitor_brands: string[];
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**UI Label Mapping:**
|
|
||||||
| Field | User-Facing Label |
|
|
||||||
|-------|-------------------|
|
|
||||||
| `penetration_by_region` | Market Coverage by Region |
|
|
||||||
| `percent_reached` | X% reached |
|
|
||||||
| `in_stock` | X stocked |
|
|
||||||
| `out_of_stock` | X out |
|
|
||||||
| `whitespace_stores` | Expansion Opportunities |
|
|
||||||
| `category_fit` | X fit |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Section 5: Competitive Landscape
|
|
||||||
|
|
||||||
Market positioning vs competitors.
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
interface CompetitiveLandscape {
|
|
||||||
brand_price_position: 'premium' | 'value' | 'competitive';
|
|
||||||
market_share_trend: MarketSharePoint[];
|
|
||||||
competitors: Competitor[];
|
|
||||||
head_to_head_skus: HeadToHead[];
|
|
||||||
}
|
|
||||||
|
|
||||||
interface MarketSharePoint {
|
|
||||||
date: string;
|
|
||||||
share_percent: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface Competitor {
|
|
||||||
brand_name: string;
|
|
||||||
store_overlap_percent: number;
|
|
||||||
price_position: 'premium' | 'value' | 'competitive';
|
|
||||||
avg_price: number | null;
|
|
||||||
sku_count: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface HeadToHead {
|
|
||||||
product_name: string;
|
|
||||||
brand_price: number;
|
|
||||||
competitor_brand: string;
|
|
||||||
competitor_price: number;
|
|
||||||
price_diff_percent: number;
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**UI Label Mapping:**
|
|
||||||
| Field | User-Facing Label |
|
|
||||||
|-------|-------------------|
|
|
||||||
| `price_position: premium` | Premium Tier |
|
|
||||||
| `price_position: value` | Value Leader |
|
|
||||||
| `price_position: competitive` | Market Rate |
|
|
||||||
| `market_share_trend` | Share of Shelf Trend |
|
|
||||||
| `head_to_head_skus` | Price Comparison |
|
|
||||||
| `store_overlap_percent` | X% store overlap |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Section 6: Inventory Health
|
|
||||||
|
|
||||||
Stock projections and risk levels.
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
interface InventoryHealth {
|
|
||||||
critical_count: number; // <7 days stock
|
|
||||||
warning_count: number; // 7-14 days stock
|
|
||||||
healthy_count: number; // 14-90 days stock
|
|
||||||
overstocked_count: number; // >90 days stock
|
|
||||||
skus: InventorySku[];
|
|
||||||
overstock_alert: OverstockItem[];
|
|
||||||
}
|
|
||||||
|
|
||||||
interface InventorySku {
|
|
||||||
store_product_id: number;
|
|
||||||
product_name: string;
|
|
||||||
store_name: string;
|
|
||||||
days_of_stock: number | null;
|
|
||||||
risk_level: 'critical' | 'elevated' | 'moderate' | 'healthy';
|
|
||||||
current_quantity: number | null;
|
|
||||||
daily_sell_rate: number | null;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface OverstockItem {
|
|
||||||
product_name: string;
|
|
||||||
store_name: string;
|
|
||||||
excess_units: number;
|
|
||||||
days_of_stock: number;
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**UI Label Mapping:**
|
|
||||||
| Field | User-Facing Label |
|
|
||||||
|-------|-------------------|
|
|
||||||
| `risk_level: critical` | Reorder Now |
|
|
||||||
| `risk_level: elevated` | Low Stock |
|
|
||||||
| `risk_level: moderate` | Monitor |
|
|
||||||
| `risk_level: healthy` | Healthy |
|
|
||||||
| `critical_count` | Urgent (<7 days) |
|
|
||||||
| `warning_count` | Low (7-14 days) |
|
|
||||||
| `overstocked_count` | Excess (>90 days) |
|
|
||||||
| `days_of_stock` | X days remaining |
|
|
||||||
| `overstock_alert` | Overstock Alert |
|
|
||||||
| `excess_units` | X excess units |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Section 7: Promotion Effectiveness
|
|
||||||
|
|
||||||
How promotions impact sales.
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
interface PromoPerformance {
|
|
||||||
avg_baseline_velocity: number | null;
|
|
||||||
avg_promo_velocity: number | null;
|
|
||||||
avg_velocity_lift: number | null; // % increase during promo
|
|
||||||
avg_efficiency_score: number | null; // ROI proxy
|
|
||||||
promotions: Promotion[];
|
|
||||||
}
|
|
||||||
|
|
||||||
interface Promotion {
|
|
||||||
product_name: string;
|
|
||||||
store_name: string;
|
|
||||||
status: 'active' | 'scheduled' | 'ended';
|
|
||||||
start_date: string;
|
|
||||||
end_date: string | null;
|
|
||||||
regular_price: number;
|
|
||||||
promo_price: number;
|
|
||||||
discount_percent: number;
|
|
||||||
baseline_velocity: number | null;
|
|
||||||
promo_velocity: number | null;
|
|
||||||
velocity_lift: number | null;
|
|
||||||
efficiency_score: number | null;
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**UI Label Mapping:**
|
|
||||||
| Field | User-Facing Label |
|
|
||||||
|-------|-------------------|
|
|
||||||
| `avg_baseline_velocity` | Normal Rate |
|
|
||||||
| `avg_promo_velocity` | During Promos |
|
|
||||||
| `avg_velocity_lift` | Avg Sales Lift |
|
|
||||||
| `avg_efficiency_score` | ROI Score |
|
|
||||||
| `velocity_lift` | Sales Lift |
|
|
||||||
| `efficiency_score` | ROI Score |
|
|
||||||
| `status: active` | Live |
|
|
||||||
| `status: scheduled` | Scheduled |
|
|
||||||
| `status: ended` | Ended |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Example Queries
|
|
||||||
|
|
||||||
### Get full payload
|
|
||||||
```javascript
|
|
||||||
const response = await fetch('/api/analytics/v2/brand/Wyld/intelligence?window=30d');
|
|
||||||
const data = await response.json();
|
|
||||||
```
|
|
||||||
|
|
||||||
### Extract summary cards (flattened)
|
|
||||||
```javascript
|
|
||||||
const { performance_snapshot: ps, alerts } = data;
|
|
||||||
|
|
||||||
const summaryCards = {
|
|
||||||
activeProducts: ps.active_skus,
|
|
||||||
monthlyRevenue: ps.total_revenue_30d,
|
|
||||||
retailDistribution: ps.total_stores,
|
|
||||||
newOpportunities: ps.new_stores_30d,
|
|
||||||
categoryPosition: ps.market_share,
|
|
||||||
avgWholesale: ps.avg_wholesale_price,
|
|
||||||
pricingTier: ps.price_position,
|
|
||||||
accountsAtRisk: alerts.lost_stores_30d_count,
|
|
||||||
delistedSkus: alerts.lost_skus_30d_count,
|
|
||||||
shelfLosses: alerts.competitor_takeover_count,
|
|
||||||
};
|
|
||||||
```
|
|
||||||
|
|
||||||
### Get top 10 fastest selling SKUs
|
|
||||||
```javascript
|
|
||||||
const topSkus = data.sku_performance
|
|
||||||
.filter(sku => sku.velocity_status === 'hot' || sku.velocity_status === 'steady')
|
|
||||||
.sort((a, b) => b.daily_velocity - a.daily_velocity)
|
|
||||||
.slice(0, 10);
|
|
||||||
```
|
|
||||||
|
|
||||||
### Get critical inventory alerts only
|
|
||||||
```javascript
|
|
||||||
const criticalInventory = data.inventory_health.skus
|
|
||||||
.filter(sku => sku.risk_level === 'critical');
|
|
||||||
```
|
|
||||||
|
|
||||||
### Get states with <50% penetration
|
|
||||||
```javascript
|
|
||||||
const underPenetrated = data.retail_footprint.penetration_by_region
|
|
||||||
.filter(region => region.percent_reached < 50)
|
|
||||||
.sort((a, b) => a.percent_reached - b.percent_reached);
|
|
||||||
```
|
|
||||||
|
|
||||||
### Get active promotions with positive lift
|
|
||||||
```javascript
|
|
||||||
const effectivePromos = data.promo_performance.promotions
|
|
||||||
.filter(p => p.status === 'active' && p.velocity_lift > 0)
|
|
||||||
.sort((a, b) => b.velocity_lift - a.velocity_lift);
|
|
||||||
```
|
|
||||||
|
|
||||||
### Build chart data for market share trend
|
|
||||||
```javascript
|
|
||||||
const chartData = data.competitive_landscape.market_share_trend.map(point => ({
|
|
||||||
x: new Date(point.date),
|
|
||||||
y: point.share_percent,
|
|
||||||
}));
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Notes for Frontend Implementation
|
|
||||||
|
|
||||||
1. **All fields are snake_case** - transform to camelCase if needed
|
|
||||||
2. **Null values are possible** - handle gracefully in UI
|
|
||||||
3. **Arrays may be empty** - show appropriate empty states
|
|
||||||
4. **Timestamps are ISO format** - parse with `new Date()`
|
|
||||||
5. **Percentages are already computed** - no need to multiply by 100
|
|
||||||
6. **The `window` parameter affects trend calculations** - 7d/30d/90d
|
|
||||||
@@ -1,297 +0,0 @@
|
|||||||
# Organic Browser-Based Scraping Guide
|
|
||||||
|
|
||||||
**Last Updated:** 2025-12-12
|
|
||||||
**Status:** Production-ready proof of concept
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
This document describes the "organic" browser-based approach to scraping Dutchie dispensary menus. Unlike direct curl/axios requests, this method uses a real browser session to make API calls, making requests appear natural and reducing detection risk.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Why Organic Scraping?
|
|
||||||
|
|
||||||
| Approach | Detection Risk | Speed | Complexity |
|
|
||||||
|----------|---------------|-------|------------|
|
|
||||||
| Direct curl | Higher | Fast | Low |
|
|
||||||
| curl-impersonate | Medium | Fast | Medium |
|
|
||||||
| **Browser-based (organic)** | **Lowest** | Slower | Higher |
|
|
||||||
|
|
||||||
Direct curl requests can be fingerprinted via:
|
|
||||||
- TLS fingerprint (cipher suites, extensions)
|
|
||||||
- Header order and values
|
|
||||||
- Missing cookies/session data
|
|
||||||
- Request patterns
|
|
||||||
|
|
||||||
Browser-based requests inherit:
|
|
||||||
- Real Chrome TLS fingerprint
|
|
||||||
- Session cookies from page visit
|
|
||||||
- Natural header order
|
|
||||||
- JavaScript execution environment
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Implementation
|
|
||||||
|
|
||||||
### Dependencies
|
|
||||||
|
|
||||||
```bash
|
|
||||||
npm install puppeteer puppeteer-extra puppeteer-extra-plugin-stealth
|
|
||||||
```
|
|
||||||
|
|
||||||
### Core Script: `test-intercept.js`
|
|
||||||
|
|
||||||
Located at: `backend/test-intercept.js`
|
|
||||||
|
|
||||||
```javascript
|
|
||||||
const puppeteer = require('puppeteer-extra');
|
|
||||||
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
|
|
||||||
const fs = require('fs');
|
|
||||||
|
|
||||||
puppeteer.use(StealthPlugin());
|
|
||||||
|
|
||||||
async function capturePayload(config) {
|
|
||||||
const { dispensaryId, platformId, cName, outputPath } = config;
|
|
||||||
|
|
||||||
const browser = await puppeteer.launch({
|
|
||||||
headless: 'new',
|
|
||||||
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
|
||||||
});
|
|
||||||
|
|
||||||
const page = await browser.newPage();
|
|
||||||
|
|
||||||
// STEP 1: Establish session by visiting the menu
|
|
||||||
const embedUrl = `https://dutchie.com/embedded-menu/${cName}?menuType=rec`;
|
|
||||||
await page.goto(embedUrl, { waitUntil: 'networkidle2', timeout: 60000 });
|
|
||||||
|
|
||||||
// STEP 2: Fetch ALL products using GraphQL from browser context
|
|
||||||
const result = await page.evaluate(async (platformId) => {
|
|
||||||
const allProducts = [];
|
|
||||||
let pageNum = 0;
|
|
||||||
const perPage = 100;
|
|
||||||
let totalCount = 0;
|
|
||||||
const sessionId = 'browser-session-' + Date.now();
|
|
||||||
|
|
||||||
while (pageNum < 30) {
|
|
||||||
const variables = {
|
|
||||||
includeEnterpriseSpecials: false,
|
|
||||||
productsFilter: {
|
|
||||||
dispensaryId: platformId,
|
|
||||||
pricingType: 'rec',
|
|
||||||
Status: 'Active', // CRITICAL: Must be 'Active', not null
|
|
||||||
types: [],
|
|
||||||
useCache: true,
|
|
||||||
isDefaultSort: true,
|
|
||||||
sortBy: 'popularSortIdx',
|
|
||||||
sortDirection: 1,
|
|
||||||
bypassOnlineThresholds: true,
|
|
||||||
isKioskMenu: false,
|
|
||||||
removeProductsBelowOptionThresholds: false,
|
|
||||||
},
|
|
||||||
page: pageNum,
|
|
||||||
perPage: perPage,
|
|
||||||
};
|
|
||||||
|
|
||||||
const extensions = {
|
|
||||||
persistedQuery: {
|
|
||||||
version: 1,
|
|
||||||
sha256Hash: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0'
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const qs = new URLSearchParams({
|
|
||||||
operationName: 'FilteredProducts',
|
|
||||||
variables: JSON.stringify(variables),
|
|
||||||
extensions: JSON.stringify(extensions)
|
|
||||||
});
|
|
||||||
|
|
||||||
const response = await fetch(`https://dutchie.com/api-3/graphql?${qs}`, {
|
|
||||||
method: 'GET',
|
|
||||||
headers: {
|
|
||||||
'Accept': 'application/json',
|
|
||||||
'content-type': 'application/json',
|
|
||||||
'x-dutchie-session': sessionId,
|
|
||||||
'apollographql-client-name': 'Marketplace (production)',
|
|
||||||
},
|
|
||||||
credentials: 'include'
|
|
||||||
});
|
|
||||||
|
|
||||||
const json = await response.json();
|
|
||||||
const data = json?.data?.filteredProducts;
|
|
||||||
if (!data?.products) break;
|
|
||||||
|
|
||||||
allProducts.push(...data.products);
|
|
||||||
if (pageNum === 0) totalCount = data.queryInfo?.totalCount || 0;
|
|
||||||
if (allProducts.length >= totalCount) break;
|
|
||||||
|
|
||||||
pageNum++;
|
|
||||||
await new Promise(r => setTimeout(r, 200)); // Polite delay
|
|
||||||
}
|
|
||||||
|
|
||||||
return { products: allProducts, totalCount };
|
|
||||||
}, platformId);
|
|
||||||
|
|
||||||
await browser.close();
|
|
||||||
|
|
||||||
// STEP 3: Save payload
|
|
||||||
const payload = {
|
|
||||||
dispensaryId,
|
|
||||||
platformId,
|
|
||||||
cName,
|
|
||||||
fetchedAt: new Date().toISOString(),
|
|
||||||
productCount: result.products.length,
|
|
||||||
products: result.products,
|
|
||||||
};
|
|
||||||
|
|
||||||
fs.writeFileSync(outputPath, JSON.stringify(payload, null, 2));
|
|
||||||
return payload;
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Critical Parameters
|
|
||||||
|
|
||||||
### GraphQL Hash (FilteredProducts)
|
|
||||||
|
|
||||||
```
|
|
||||||
ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0
|
|
||||||
```
|
|
||||||
|
|
||||||
**WARNING:** Using the wrong hash returns HTTP 400.
|
|
||||||
|
|
||||||
### Status Parameter
|
|
||||||
|
|
||||||
| Value | Result |
|
|
||||||
|-------|--------|
|
|
||||||
| `'Active'` | Returns in-stock products (1019 in test) |
|
|
||||||
| `null` | Returns 0 products |
|
|
||||||
| `'All'` | Returns HTTP 400 |
|
|
||||||
|
|
||||||
**ALWAYS use `Status: 'Active'`**
|
|
||||||
|
|
||||||
### Required Headers
|
|
||||||
|
|
||||||
```javascript
|
|
||||||
{
|
|
||||||
'Accept': 'application/json',
|
|
||||||
'content-type': 'application/json',
|
|
||||||
'x-dutchie-session': 'unique-session-id',
|
|
||||||
'apollographql-client-name': 'Marketplace (production)',
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Endpoint
|
|
||||||
|
|
||||||
```
|
|
||||||
https://dutchie.com/api-3/graphql
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Performance Benchmarks
|
|
||||||
|
|
||||||
Test store: AZ-Deeply-Rooted (1019 products)
|
|
||||||
|
|
||||||
| Metric | Value |
|
|
||||||
|--------|-------|
|
|
||||||
| Total products | 1019 |
|
|
||||||
| Time | 18.5 seconds |
|
|
||||||
| Payload size | 11.8 MB |
|
|
||||||
| Pages fetched | 11 (100 per page) |
|
|
||||||
| Success rate | 100% |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Payload Format
|
|
||||||
|
|
||||||
The output matches the existing `payload-fetch.ts` handler format:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"dispensaryId": 123,
|
|
||||||
"platformId": "6405ef617056e8014d79101b",
|
|
||||||
"cName": "AZ-Deeply-Rooted",
|
|
||||||
"fetchedAt": "2025-12-12T05:05:19.837Z",
|
|
||||||
"productCount": 1019,
|
|
||||||
"products": [
|
|
||||||
{
|
|
||||||
"id": "6927508db4851262f629a869",
|
|
||||||
"Name": "Product Name",
|
|
||||||
"brand": { "name": "Brand Name", ... },
|
|
||||||
"type": "Flower",
|
|
||||||
"THC": "25%",
|
|
||||||
"Prices": [...],
|
|
||||||
"Options": [...],
|
|
||||||
...
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Integration Points
|
|
||||||
|
|
||||||
### As a Task Handler
|
|
||||||
|
|
||||||
The organic approach can be integrated as an alternative to curl-based fetching:
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
// In src/tasks/handlers/organic-payload-fetch.ts
|
|
||||||
export async function handleOrganicPayloadFetch(ctx: TaskContext): Promise<TaskResult> {
|
|
||||||
// Use puppeteer-based capture
|
|
||||||
// Save to same payload storage
|
|
||||||
// Queue product_refresh task
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Worker Configuration
|
|
||||||
|
|
||||||
Add to job_schedules:
|
|
||||||
```sql
|
|
||||||
INSERT INTO job_schedules (name, role, cron_expression)
|
|
||||||
VALUES ('organic_product_crawl', 'organic_payload_fetch', '0 */6 * * *');
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
### HTTP 400 Bad Request
|
|
||||||
- Check hash is correct: `ee29c060...`
|
|
||||||
- Verify Status is `'Active'` (string, not null)
|
|
||||||
|
|
||||||
### 0 Products Returned
|
|
||||||
- Status was likely `null` or `'All'` - use `'Active'`
|
|
||||||
- Check platformId is valid MongoDB ObjectId
|
|
||||||
|
|
||||||
### Session Not Established
|
|
||||||
- Increase timeout on initial page.goto()
|
|
||||||
- Check cName is valid (matches embedded-menu URL)
|
|
||||||
|
|
||||||
### Detection/Blocking
|
|
||||||
- StealthPlugin should handle most cases
|
|
||||||
- Add random delays between pages
|
|
||||||
- Use headless: 'new' (not true/false)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Files Reference
|
|
||||||
|
|
||||||
| File | Purpose |
|
|
||||||
|------|---------|
|
|
||||||
| `backend/test-intercept.js` | Proof of concept script |
|
|
||||||
| `backend/src/platforms/dutchie/client.ts` | GraphQL hashes, curl implementation |
|
|
||||||
| `backend/src/tasks/handlers/payload-fetch.ts` | Current curl-based handler |
|
|
||||||
| `backend/src/utils/payload-storage.ts` | Payload save/load utilities |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## See Also
|
|
||||||
|
|
||||||
- `DUTCHIE_CRAWL_WORKFLOW.md` - Full crawl pipeline documentation
|
|
||||||
- `TASK_WORKFLOW_2024-12-10.md` - Task system architecture
|
|
||||||
- `CLAUDE.md` - Project rules and constraints
|
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
# ARCHIVED DOCUMENTATION
|
|
||||||
|
|
||||||
**WARNING: These docs may be outdated or inaccurate.**
|
|
||||||
|
|
||||||
The code has evolved significantly. These docs are kept for historical reference only.
|
|
||||||
|
|
||||||
## What to Use Instead
|
|
||||||
|
|
||||||
**The single source of truth is:**
|
|
||||||
- `CLAUDE.md` (root) - Essential rules and quick reference
|
|
||||||
- `docs/CODEBASE_MAP.md` - Current file/directory reference
|
|
||||||
|
|
||||||
## Why Archive?
|
|
||||||
|
|
||||||
These docs were written during development iterations and may reference:
|
|
||||||
- Old file paths that no longer exist
|
|
||||||
- Deprecated approaches (hydration, scraper-v2)
|
|
||||||
- APIs that have changed
|
|
||||||
- Database schemas that evolved
|
|
||||||
|
|
||||||
## If You Need Details
|
|
||||||
|
|
||||||
1. First check CODEBASE_MAP.md for current file locations
|
|
||||||
2. Then read the actual source code
|
|
||||||
3. Only use archive docs as a last resort for historical context
|
|
||||||
@@ -1,584 +0,0 @@
|
|||||||
# Task Workflow Documentation
|
|
||||||
**Date: 2024-12-10**
|
|
||||||
|
|
||||||
This document describes the complete task/job processing architecture after the 2024-12-10 rewrite.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Complete Architecture
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────────────────────────────────────────────────────────────────┐
|
|
||||||
│ KUBERNETES CLUSTER │
|
|
||||||
├─────────────────────────────────────────────────────────────────────────────────┤
|
|
||||||
│ │
|
|
||||||
│ ┌─────────────────────────────────────────────────────────────────────────┐ │
|
|
||||||
│ │ API SERVER POD (scraper) │ │
|
|
||||||
│ │ │ │
|
|
||||||
│ │ ┌──────────────────┐ ┌────────────────────────────────────────┐ │ │
|
|
||||||
│ │ │ Express API │ │ TaskScheduler │ │ │
|
|
||||||
│ │ │ │ │ (src/services/task-scheduler.ts) │ │ │
|
|
||||||
│ │ │ /api/job-queue │ │ │ │ │
|
|
||||||
│ │ │ /api/tasks │ │ • Polls every 60s │ │ │
|
|
||||||
│ │ │ /api/schedules │ │ • Checks task_schedules table │ │ │
|
|
||||||
│ │ └────────┬─────────┘ │ • SELECT FOR UPDATE SKIP LOCKED │ │ │
|
|
||||||
│ │ │ │ • Generates tasks when due │ │ │
|
|
||||||
│ │ │ └──────────────────┬─────────────────────┘ │ │
|
|
||||||
│ │ │ │ │ │
|
|
||||||
│ └────────────┼──────────────────────────────────┼──────────────────────────┘ │
|
|
||||||
│ │ │ │
|
|
||||||
│ │ ┌────────────────────────┘ │
|
|
||||||
│ │ │ │
|
|
||||||
│ ▼ ▼ │
|
|
||||||
│ ┌─────────────────────────────────────────────────────────────────────────┐ │
|
|
||||||
│ │ POSTGRESQL DATABASE │ │
|
|
||||||
│ │ │ │
|
|
||||||
│ │ ┌─────────────────────┐ ┌─────────────────────┐ │ │
|
|
||||||
│ │ │ task_schedules │ │ worker_tasks │ │ │
|
|
||||||
│ │ │ │ │ │ │ │
|
|
||||||
│ │ │ • product_refresh │───────►│ • pending tasks │ │ │
|
|
||||||
│ │ │ • store_discovery │ create │ • claimed tasks │ │ │
|
|
||||||
│ │ │ • analytics_refresh │ tasks │ • running tasks │ │ │
|
|
||||||
│ │ │ │ │ • completed tasks │ │ │
|
|
||||||
│ │ │ next_run_at │ │ │ │ │
|
|
||||||
│ │ │ last_run_at │ │ role, dispensary_id │ │ │
|
|
||||||
│ │ │ interval_hours │ │ priority, status │ │ │
|
|
||||||
│ │ └─────────────────────┘ └──────────┬──────────┘ │ │
|
|
||||||
│ │ │ │ │
|
|
||||||
│ └─────────────────────────────────────────────┼────────────────────────────┘ │
|
|
||||||
│ │ │
|
|
||||||
│ ┌──────────────────────┘ │
|
|
||||||
│ │ Workers poll for tasks │
|
|
||||||
│ │ (SELECT FOR UPDATE SKIP LOCKED) │
|
|
||||||
│ ▼ │
|
|
||||||
│ ┌─────────────────────────────────────────────────────────────────────────┐ │
|
|
||||||
│ │ WORKER PODS (StatefulSet: scraper-worker) │ │
|
|
||||||
│ │ │ │
|
|
||||||
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
|
||||||
│ │ │ Worker 0 │ │ Worker 1 │ │ Worker 2 │ │ Worker N │ │ │
|
|
||||||
│ │ │ │ │ │ │ │ │ │ │ │
|
|
||||||
│ │ │ task-worker │ │ task-worker │ │ task-worker │ │ task-worker │ │ │
|
|
||||||
│ │ │ .ts │ │ .ts │ │ .ts │ │ .ts │ │ │
|
|
||||||
│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │
|
|
||||||
│ │ │ │
|
|
||||||
│ └──────────────────────────────────────────────────────────────────────────┘ │
|
|
||||||
│ │
|
|
||||||
└──────────────────────────────────────────────────────────────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Startup Sequence
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
|
||||||
│ API SERVER STARTUP │
|
|
||||||
├─────────────────────────────────────────────────────────────────────────────┤
|
|
||||||
│ │
|
|
||||||
│ 1. Express app initializes │
|
|
||||||
│ │ │
|
|
||||||
│ ▼ │
|
|
||||||
│ 2. runAutoMigrations() │
|
|
||||||
│ • Runs pending migrations (including 079_task_schedules.sql) │
|
|
||||||
│ │ │
|
|
||||||
│ ▼ │
|
|
||||||
│ 3. initializeMinio() / initializeImageStorage() │
|
|
||||||
│ │ │
|
|
||||||
│ ▼ │
|
|
||||||
│ 4. cleanupOrphanedJobs() │
|
|
||||||
│ │ │
|
|
||||||
│ ▼ │
|
|
||||||
│ 5. taskScheduler.start() ◄─── NEW (per TASK_WORKFLOW_2024-12-10.md) │
|
|
||||||
│ │ │
|
|
||||||
│ ├── Recover stale tasks (workers that died) │
|
|
||||||
│ ├── Ensure default schedules exist in task_schedules │
|
|
||||||
│ ├── Check and run any due schedules immediately │
|
|
||||||
│ └── Start 60-second poll interval │
|
|
||||||
│ │ │
|
|
||||||
│ ▼ │
|
|
||||||
│ 6. app.listen(PORT) │
|
|
||||||
│ │
|
|
||||||
└─────────────────────────────────────────────────────────────────────────────┘
|
|
||||||
|
|
||||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
|
||||||
│ WORKER POD STARTUP │
|
|
||||||
├─────────────────────────────────────────────────────────────────────────────┤
|
|
||||||
│ │
|
|
||||||
│ 1. K8s starts pod from StatefulSet │
|
|
||||||
│ │ │
|
|
||||||
│ ▼ │
|
|
||||||
│ 2. TaskWorker.constructor() │
|
|
||||||
│ • Create DB pool │
|
|
||||||
│ • Create CrawlRotator │
|
|
||||||
│ │ │
|
|
||||||
│ ▼ │
|
|
||||||
│ 3. initializeStealth() │
|
|
||||||
│ • Load proxies from DB (REQUIRED - fails if none) │
|
|
||||||
│ • Wire rotator to Dutchie client │
|
|
||||||
│ │ │
|
|
||||||
│ ▼ │
|
|
||||||
│ 4. register() with API │
|
|
||||||
│ • Optional - continues if fails │
|
|
||||||
│ │ │
|
|
||||||
│ ▼ │
|
|
||||||
│ 5. startRegistryHeartbeat() every 30s │
|
|
||||||
│ │ │
|
|
||||||
│ ▼ │
|
|
||||||
│ 6. processNextTask() loop │
|
|
||||||
│ │ │
|
|
||||||
│ ├── Poll for pending task (FOR UPDATE SKIP LOCKED) │
|
|
||||||
│ ├── Claim task atomically │
|
|
||||||
│ ├── Execute handler (product_refresh, store_discovery, etc.) │
|
|
||||||
│ ├── Mark complete/failed │
|
|
||||||
│ ├── Chain next task if applicable │
|
|
||||||
│ └── Loop │
|
|
||||||
│ │
|
|
||||||
└─────────────────────────────────────────────────────────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Schedule Flow
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
|
||||||
│ SCHEDULER POLL (every 60 seconds) │
|
|
||||||
├─────────────────────────────────────────────────────────────────────────────┤
|
|
||||||
│ │
|
|
||||||
│ BEGIN TRANSACTION │
|
|
||||||
│ │ │
|
|
||||||
│ ▼ │
|
|
||||||
│ SELECT * FROM task_schedules │
|
|
||||||
│ WHERE enabled = true AND next_run_at <= NOW() │
|
|
||||||
│ FOR UPDATE SKIP LOCKED ◄─── Prevents duplicate execution across replicas │
|
|
||||||
│ │ │
|
|
||||||
│ ▼ │
|
|
||||||
│ For each due schedule: │
|
|
||||||
│ │ │
|
|
||||||
│ ├── product_refresh_all │
|
|
||||||
│ │ └─► Query dispensaries needing crawl │
|
|
||||||
│ │ └─► Create product_refresh tasks in worker_tasks │
|
|
||||||
│ │ │
|
|
||||||
│ ├── store_discovery_dutchie │
|
|
||||||
│ │ └─► Create single store_discovery task │
|
|
||||||
│ │ │
|
|
||||||
│ └── analytics_refresh │
|
|
||||||
│ └─► Create single analytics_refresh task │
|
|
||||||
│ │ │
|
|
||||||
│ ▼ │
|
|
||||||
│ UPDATE task_schedules SET │
|
|
||||||
│ last_run_at = NOW(), │
|
|
||||||
│ next_run_at = NOW() + interval_hours │
|
|
||||||
│ │ │
|
|
||||||
│ ▼ │
|
|
||||||
│ COMMIT │
|
|
||||||
│ │
|
|
||||||
└─────────────────────────────────────────────────────────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Task Lifecycle
|
|
||||||
|
|
||||||
```
|
|
||||||
┌──────────┐
|
|
||||||
│ SCHEDULE │
|
|
||||||
│ DUE │
|
|
||||||
└────┬─────┘
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
┌──────────────┐ claim ┌──────────────┐ start ┌──────────────┐
|
|
||||||
│ PENDING │────────────►│ CLAIMED │────────────►│ RUNNING │
|
|
||||||
└──────────────┘ └──────────────┘ └──────┬───────┘
|
|
||||||
▲ │
|
|
||||||
│ ┌──────────────┼──────────────┐
|
|
||||||
│ retry │ │ │
|
|
||||||
│ (if retries < max) ▼ ▼ ▼
|
|
||||||
│ ┌──────────┐ ┌──────────┐ ┌──────────┐
|
|
||||||
└──────────────────────────────────│ FAILED │ │ COMPLETED│ │ STALE │
|
|
||||||
└──────────┘ └──────────┘ └────┬─────┘
|
|
||||||
│
|
|
||||||
recover_stale_tasks()
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
┌──────────┐
|
|
||||||
│ PENDING │
|
|
||||||
└──────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Database Tables
|
|
||||||
|
|
||||||
### task_schedules (NEW - migration 079)
|
|
||||||
|
|
||||||
Stores schedule definitions. Survives restarts.
|
|
||||||
|
|
||||||
```sql
|
|
||||||
CREATE TABLE task_schedules (
|
|
||||||
id SERIAL PRIMARY KEY,
|
|
||||||
name VARCHAR(100) NOT NULL UNIQUE,
|
|
||||||
role VARCHAR(50) NOT NULL, -- product_refresh, store_discovery, etc.
|
|
||||||
enabled BOOLEAN DEFAULT TRUE,
|
|
||||||
interval_hours INTEGER NOT NULL, -- How often to run
|
|
||||||
priority INTEGER DEFAULT 0, -- Task priority when created
|
|
||||||
state_code VARCHAR(2), -- Optional filter
|
|
||||||
last_run_at TIMESTAMPTZ, -- When it last ran
|
|
||||||
next_run_at TIMESTAMPTZ, -- When it's due next
|
|
||||||
last_task_count INTEGER, -- Tasks created last run
|
|
||||||
last_error TEXT -- Error message if failed
|
|
||||||
);
|
|
||||||
```
|
|
||||||
|
|
||||||
### worker_tasks (migration 074)
|
|
||||||
|
|
||||||
The task queue. Workers pull from here.
|
|
||||||
|
|
||||||
```sql
|
|
||||||
CREATE TABLE worker_tasks (
|
|
||||||
id SERIAL PRIMARY KEY,
|
|
||||||
role task_role NOT NULL, -- What type of work
|
|
||||||
dispensary_id INTEGER, -- Which store (if applicable)
|
|
||||||
platform VARCHAR(50), -- Which platform
|
|
||||||
status task_status DEFAULT 'pending',
|
|
||||||
priority INTEGER DEFAULT 0, -- Higher = process first
|
|
||||||
scheduled_for TIMESTAMP, -- Don't process before this time
|
|
||||||
worker_id VARCHAR(100), -- Which worker claimed it
|
|
||||||
claimed_at TIMESTAMP,
|
|
||||||
started_at TIMESTAMP,
|
|
||||||
completed_at TIMESTAMP,
|
|
||||||
last_heartbeat_at TIMESTAMP, -- For stale detection
|
|
||||||
result JSONB,
|
|
||||||
error_message TEXT,
|
|
||||||
retry_count INTEGER DEFAULT 0,
|
|
||||||
max_retries INTEGER DEFAULT 3
|
|
||||||
);
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Default Schedules
|
|
||||||
|
|
||||||
| Name | Role | Interval | Priority | Description |
|
|
||||||
|------|------|----------|----------|-------------|
|
|
||||||
| `payload_fetch_all` | payload_fetch | 4 hours | 0 | Fetch payloads from Dutchie API (chains to product_refresh) |
|
|
||||||
| `store_discovery_dutchie` | store_discovery | 24 hours | 5 | Find new Dutchie stores |
|
|
||||||
| `analytics_refresh` | analytics_refresh | 6 hours | 0 | Refresh MVs |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Task Roles
|
|
||||||
|
|
||||||
| Role | Description | Creates Tasks For |
|
|
||||||
|------|-------------|-------------------|
|
|
||||||
| `payload_fetch` | **NEW** - Fetch from Dutchie API, save to disk | Each dispensary needing crawl |
|
|
||||||
| `product_refresh` | **CHANGED** - Read local payload, normalize, upsert to DB | Chained from payload_fetch |
|
|
||||||
| `store_discovery` | Find new dispensaries, returns newStoreIds[] | Single task per platform |
|
|
||||||
| `entry_point_discovery` | **DEPRECATED** - Resolve platform IDs | No longer used |
|
|
||||||
| `product_discovery` | Initial product fetch for new stores | Chained from store_discovery |
|
|
||||||
| `analytics_refresh` | Refresh MVs | Single global task |
|
|
||||||
|
|
||||||
### Payload/Refresh Separation (2024-12-10)
|
|
||||||
|
|
||||||
The crawl workflow is now split into two phases:
|
|
||||||
|
|
||||||
```
|
|
||||||
payload_fetch (scheduled every 4h)
|
|
||||||
└─► Hit Dutchie GraphQL API
|
|
||||||
└─► Save raw JSON to /storage/payloads/{year}/{month}/{day}/store_{id}_{ts}.json.gz
|
|
||||||
└─► Record metadata in raw_crawl_payloads table
|
|
||||||
└─► Queue product_refresh task with payload_id
|
|
||||||
|
|
||||||
product_refresh (chained from payload_fetch)
|
|
||||||
└─► Load payload from filesystem (NOT from API)
|
|
||||||
└─► Normalize via DutchieNormalizer
|
|
||||||
└─► Upsert to store_products
|
|
||||||
└─► Create snapshots
|
|
||||||
└─► Track missing products
|
|
||||||
└─► Download images
|
|
||||||
```
|
|
||||||
|
|
||||||
**Benefits:**
|
|
||||||
- **Retry-friendly**: If normalize fails, re-run product_refresh without re-crawling
|
|
||||||
- **Replay-able**: Run product_refresh against any historical payload
|
|
||||||
- **Faster refreshes**: Local file read vs network call
|
|
||||||
- **Historical diffs**: Compare payloads to see what changed between crawls
|
|
||||||
- **Less API pressure**: Only payload_fetch hits Dutchie
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Task Chaining
|
|
||||||
|
|
||||||
Tasks automatically queue follow-up tasks upon successful completion. This creates two main flows:
|
|
||||||
|
|
||||||
### Discovery Flow (New Stores)
|
|
||||||
|
|
||||||
When `store_discovery` finds new dispensaries, they automatically get their initial product data:
|
|
||||||
|
|
||||||
```
|
|
||||||
store_discovery
|
|
||||||
└─► Discovers new locations via Dutchie GraphQL
|
|
||||||
└─► Auto-promotes valid locations to dispensaries table
|
|
||||||
└─► Collects newDispensaryIds[] from promotions
|
|
||||||
└─► Returns { newStoreIds: [...] } in result
|
|
||||||
|
|
||||||
chainNextTask() detects newStoreIds
|
|
||||||
└─► Creates product_discovery task for each new store
|
|
||||||
|
|
||||||
product_discovery
|
|
||||||
└─► Calls handlePayloadFetch() internally
|
|
||||||
└─► payload_fetch hits Dutchie API
|
|
||||||
└─► Saves raw JSON to /storage/payloads/
|
|
||||||
└─► Queues product_refresh task with payload_id
|
|
||||||
|
|
||||||
product_refresh
|
|
||||||
└─► Loads payload from filesystem
|
|
||||||
└─► Normalizes and upserts to store_products
|
|
||||||
└─► Creates snapshots, downloads images
|
|
||||||
```
|
|
||||||
|
|
||||||
**Complete Discovery Chain:**
|
|
||||||
```
|
|
||||||
store_discovery → product_discovery → payload_fetch → product_refresh
|
|
||||||
(internal call) (queues next)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Scheduled Flow (Existing Stores)
|
|
||||||
|
|
||||||
For existing stores, `payload_fetch_all` schedule runs every 4 hours:
|
|
||||||
|
|
||||||
```
|
|
||||||
TaskScheduler (every 60s)
|
|
||||||
└─► Checks task_schedules for due schedules
|
|
||||||
└─► payload_fetch_all is due
|
|
||||||
└─► Generates payload_fetch task for each dispensary
|
|
||||||
|
|
||||||
payload_fetch
|
|
||||||
└─► Hits Dutchie GraphQL API
|
|
||||||
└─► Saves raw JSON to /storage/payloads/
|
|
||||||
└─► Queues product_refresh task with payload_id
|
|
||||||
|
|
||||||
product_refresh
|
|
||||||
└─► Loads payload from filesystem (NOT API)
|
|
||||||
└─► Normalizes via DutchieNormalizer
|
|
||||||
└─► Upserts to store_products
|
|
||||||
└─► Creates snapshots
|
|
||||||
```
|
|
||||||
|
|
||||||
**Complete Scheduled Chain:**
|
|
||||||
```
|
|
||||||
payload_fetch → product_refresh
|
|
||||||
(queues) (reads local)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Chaining Implementation
|
|
||||||
|
|
||||||
Task chaining is handled in two places:
|
|
||||||
|
|
||||||
1. **Internal chaining (handler calls handler):**
|
|
||||||
- `product_discovery` calls `handlePayloadFetch()` directly
|
|
||||||
|
|
||||||
2. **External chaining (chainNextTask() in task-service.ts):**
|
|
||||||
- Called after task completion
|
|
||||||
- `store_discovery` → queues `product_discovery` for each newStoreId
|
|
||||||
|
|
||||||
3. **Queue-based chaining (taskService.createTask):**
|
|
||||||
- `payload_fetch` queues `product_refresh` with `payload: { payload_id }`
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Payload API Endpoints
|
|
||||||
|
|
||||||
Raw crawl payloads can be accessed via the Payloads API:
|
|
||||||
|
|
||||||
| Endpoint | Method | Description |
|
|
||||||
|----------|--------|-------------|
|
|
||||||
| `GET /api/payloads` | GET | List payload metadata (paginated) |
|
|
||||||
| `GET /api/payloads/:id` | GET | Get payload metadata by ID |
|
|
||||||
| `GET /api/payloads/:id/data` | GET | Get full payload JSON (decompressed) |
|
|
||||||
| `GET /api/payloads/store/:dispensaryId` | GET | List payloads for a store |
|
|
||||||
| `GET /api/payloads/store/:dispensaryId/latest` | GET | Get latest payload for a store |
|
|
||||||
| `GET /api/payloads/store/:dispensaryId/diff` | GET | Diff two payloads for changes |
|
|
||||||
|
|
||||||
### Payload Diff Response
|
|
||||||
|
|
||||||
The diff endpoint returns:
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"success": true,
|
|
||||||
"from": { "id": 123, "fetchedAt": "...", "productCount": 100 },
|
|
||||||
"to": { "id": 456, "fetchedAt": "...", "productCount": 105 },
|
|
||||||
"diff": {
|
|
||||||
"added": 10,
|
|
||||||
"removed": 5,
|
|
||||||
"priceChanges": 8,
|
|
||||||
"stockChanges": 12
|
|
||||||
},
|
|
||||||
"details": {
|
|
||||||
"added": [...],
|
|
||||||
"removed": [...],
|
|
||||||
"priceChanges": [...],
|
|
||||||
"stockChanges": [...]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## API Endpoints
|
|
||||||
|
|
||||||
### Schedules (NEW)
|
|
||||||
|
|
||||||
| Endpoint | Method | Description |
|
|
||||||
|----------|--------|-------------|
|
|
||||||
| `GET /api/schedules` | GET | List all schedules |
|
|
||||||
| `PUT /api/schedules/:id` | PUT | Update schedule |
|
|
||||||
| `POST /api/schedules/:id/trigger` | POST | Run schedule immediately |
|
|
||||||
|
|
||||||
### Task Creation (rewired 2024-12-10)
|
|
||||||
|
|
||||||
| Endpoint | Method | Description |
|
|
||||||
|----------|--------|-------------|
|
|
||||||
| `POST /api/job-queue/enqueue` | POST | Create single task |
|
|
||||||
| `POST /api/job-queue/enqueue-batch` | POST | Create batch tasks |
|
|
||||||
| `POST /api/job-queue/enqueue-state` | POST | Create tasks for state |
|
|
||||||
| `POST /api/tasks` | POST | Direct task creation |
|
|
||||||
|
|
||||||
### Task Management
|
|
||||||
|
|
||||||
| Endpoint | Method | Description |
|
|
||||||
|----------|--------|-------------|
|
|
||||||
| `GET /api/tasks` | GET | List tasks |
|
|
||||||
| `GET /api/tasks/:id` | GET | Get single task |
|
|
||||||
| `GET /api/tasks/counts` | GET | Task counts by status |
|
|
||||||
| `POST /api/tasks/recover-stale` | POST | Recover stale tasks |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Key Files
|
|
||||||
|
|
||||||
| File | Purpose |
|
|
||||||
|------|---------|
|
|
||||||
| `src/services/task-scheduler.ts` | **NEW** - DB-driven scheduler |
|
|
||||||
| `src/tasks/task-worker.ts` | Worker that processes tasks |
|
|
||||||
| `src/tasks/task-service.ts` | Task CRUD operations |
|
|
||||||
| `src/tasks/handlers/payload-fetch.ts` | **NEW** - Fetches from API, saves to disk |
|
|
||||||
| `src/tasks/handlers/product-refresh.ts` | **CHANGED** - Reads from disk, processes to DB |
|
|
||||||
| `src/utils/payload-storage.ts` | **NEW** - Payload save/load utilities |
|
|
||||||
| `src/routes/tasks.ts` | Task API endpoints |
|
|
||||||
| `src/routes/job-queue.ts` | Job Queue UI endpoints (rewired) |
|
|
||||||
| `migrations/079_task_schedules.sql` | Schedule table |
|
|
||||||
| `migrations/080_raw_crawl_payloads.sql` | Payload metadata table |
|
|
||||||
| `migrations/081_payload_fetch_columns.sql` | payload, last_fetch_at columns |
|
|
||||||
| `migrations/074_worker_task_queue.sql` | Task queue table |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Legacy Code (DEPRECATED)
|
|
||||||
|
|
||||||
| File | Status | Replacement |
|
|
||||||
|------|--------|-------------|
|
|
||||||
| `src/services/scheduler.ts` | DEPRECATED | `task-scheduler.ts` |
|
|
||||||
| `dispensary_crawl_jobs` table | ORPHANED | `worker_tasks` |
|
|
||||||
| `job_schedules` table | LEGACY | `task_schedules` |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Dashboard Integration
|
|
||||||
|
|
||||||
Both pages remain wired to the dashboard:
|
|
||||||
|
|
||||||
| Page | Data Source | Actions |
|
|
||||||
|------|-------------|---------|
|
|
||||||
| **Job Queue** | `worker_tasks`, `task_schedules` | Create tasks, view schedules |
|
|
||||||
| **Task Queue** | `worker_tasks` | View tasks, recover stale |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Multi-Replica Safety
|
|
||||||
|
|
||||||
The scheduler uses `SELECT FOR UPDATE SKIP LOCKED` to ensure:
|
|
||||||
|
|
||||||
1. **Only one replica** executes a schedule at a time
|
|
||||||
2. **No duplicate tasks** created
|
|
||||||
3. **Survives pod restarts** - state in DB, not memory
|
|
||||||
4. **Self-healing** - recovers stale tasks on startup
|
|
||||||
|
|
||||||
```sql
|
|
||||||
-- This query is atomic across all API server replicas
|
|
||||||
SELECT * FROM task_schedules
|
|
||||||
WHERE enabled = true AND next_run_at <= NOW()
|
|
||||||
FOR UPDATE SKIP LOCKED
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Worker Scaling (K8s)
|
|
||||||
|
|
||||||
Workers run as a StatefulSet in Kubernetes. You can scale from the admin UI or CLI.
|
|
||||||
|
|
||||||
### From Admin UI
|
|
||||||
|
|
||||||
The Workers page (`/admin/workers`) provides:
|
|
||||||
- Current replica count display
|
|
||||||
- Scale up/down buttons
|
|
||||||
- Target replica input
|
|
||||||
|
|
||||||
### API Endpoints
|
|
||||||
|
|
||||||
| Endpoint | Method | Description |
|
|
||||||
|----------|--------|-------------|
|
|
||||||
| `GET /api/workers/k8s/replicas` | GET | Get current/desired replica counts |
|
|
||||||
| `POST /api/workers/k8s/scale` | POST | Scale to N replicas (body: `{ replicas: N }`) |
|
|
||||||
|
|
||||||
### From CLI
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# View current replicas
|
|
||||||
kubectl get statefulset scraper-worker -n dispensary-scraper
|
|
||||||
|
|
||||||
# Scale to 10 workers
|
|
||||||
kubectl scale statefulset scraper-worker -n dispensary-scraper --replicas=10
|
|
||||||
|
|
||||||
# Scale down to 3 workers
|
|
||||||
kubectl scale statefulset scraper-worker -n dispensary-scraper --replicas=3
|
|
||||||
```
|
|
||||||
|
|
||||||
### Configuration
|
|
||||||
|
|
||||||
Environment variables for the API server:
|
|
||||||
|
|
||||||
| Variable | Default | Description |
|
|
||||||
|----------|---------|-------------|
|
|
||||||
| `K8S_NAMESPACE` | `dispensary-scraper` | Kubernetes namespace |
|
|
||||||
| `K8S_WORKER_STATEFULSET` | `scraper-worker` | StatefulSet name |
|
|
||||||
|
|
||||||
### RBAC Requirements
|
|
||||||
|
|
||||||
The API server pod needs these K8s permissions:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: Role
|
|
||||||
metadata:
|
|
||||||
name: worker-scaler
|
|
||||||
namespace: dispensary-scraper
|
|
||||||
rules:
|
|
||||||
- apiGroups: ["apps"]
|
|
||||||
resources: ["statefulsets"]
|
|
||||||
verbs: ["get", "patch"]
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: RoleBinding
|
|
||||||
metadata:
|
|
||||||
name: scraper-worker-scaler
|
|
||||||
namespace: dispensary-scraper
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: default
|
|
||||||
namespace: dispensary-scraper
|
|
||||||
roleRef:
|
|
||||||
kind: Role
|
|
||||||
name: worker-scaler
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
```
|
|
||||||
@@ -1,77 +0,0 @@
|
|||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: scraper-worker
|
|
||||||
namespace: dispensary-scraper
|
|
||||||
labels:
|
|
||||||
app: scraper-worker
|
|
||||||
spec:
|
|
||||||
clusterIP: None # Headless service required for StatefulSet
|
|
||||||
selector:
|
|
||||||
app: scraper-worker
|
|
||||||
ports:
|
|
||||||
- port: 3010
|
|
||||||
name: http
|
|
||||||
---
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: StatefulSet
|
|
||||||
metadata:
|
|
||||||
name: scraper-worker
|
|
||||||
namespace: dispensary-scraper
|
|
||||||
spec:
|
|
||||||
serviceName: scraper-worker
|
|
||||||
replicas: 8
|
|
||||||
podManagementPolicy: Parallel # Start all pods at once
|
|
||||||
updateStrategy:
|
|
||||||
type: OnDelete # Pods only update when manually deleted - no automatic restarts
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: scraper-worker
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: scraper-worker
|
|
||||||
spec:
|
|
||||||
terminationGracePeriodSeconds: 60
|
|
||||||
imagePullSecrets:
|
|
||||||
- name: regcred
|
|
||||||
containers:
|
|
||||||
- name: worker
|
|
||||||
image: code.cannabrands.app/creationshop/dispensary-scraper:latest
|
|
||||||
imagePullPolicy: Always
|
|
||||||
command: ["node"]
|
|
||||||
args: ["dist/tasks/task-worker.js"]
|
|
||||||
env:
|
|
||||||
- name: WORKER_MODE
|
|
||||||
value: "true"
|
|
||||||
- name: POD_NAME
|
|
||||||
valueFrom:
|
|
||||||
fieldRef:
|
|
||||||
fieldPath: metadata.name
|
|
||||||
- name: MAX_CONCURRENT_TASKS
|
|
||||||
value: "50"
|
|
||||||
- name: API_BASE_URL
|
|
||||||
value: http://scraper
|
|
||||||
- name: NODE_OPTIONS
|
|
||||||
value: --max-old-space-size=1500
|
|
||||||
envFrom:
|
|
||||||
- configMapRef:
|
|
||||||
name: scraper-config
|
|
||||||
- secretRef:
|
|
||||||
name: scraper-secrets
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
cpu: 100m
|
|
||||||
memory: 1Gi
|
|
||||||
limits:
|
|
||||||
cpu: 500m
|
|
||||||
memory: 2Gi
|
|
||||||
livenessProbe:
|
|
||||||
exec:
|
|
||||||
command:
|
|
||||||
- /bin/sh
|
|
||||||
- -c
|
|
||||||
- pgrep -f 'task-worker' > /dev/null
|
|
||||||
initialDelaySeconds: 10
|
|
||||||
periodSeconds: 30
|
|
||||||
failureThreshold: 3
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
-- Migration: Worker Commands Table
|
|
||||||
-- Purpose: Store commands for workers (decommission, etc.)
|
|
||||||
-- Workers poll this table after each task to check for commands
|
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS worker_commands (
|
|
||||||
id SERIAL PRIMARY KEY,
|
|
||||||
worker_id TEXT NOT NULL,
|
|
||||||
command TEXT NOT NULL, -- 'decommission', 'pause', 'resume'
|
|
||||||
reason TEXT,
|
|
||||||
issued_by TEXT,
|
|
||||||
issued_at TIMESTAMPTZ DEFAULT NOW(),
|
|
||||||
acknowledged_at TIMESTAMPTZ,
|
|
||||||
executed_at TIMESTAMPTZ,
|
|
||||||
status TEXT DEFAULT 'pending' -- 'pending', 'acknowledged', 'executed', 'cancelled'
|
|
||||||
);
|
|
||||||
|
|
||||||
-- Index for worker lookups
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_worker_commands_worker_id ON worker_commands(worker_id);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_worker_commands_pending ON worker_commands(worker_id, status) WHERE status = 'pending';
|
|
||||||
|
|
||||||
-- Add decommission_requested column to worker_registry for quick checks
|
|
||||||
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested BOOLEAN DEFAULT FALSE;
|
|
||||||
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_reason TEXT;
|
|
||||||
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested_at TIMESTAMPTZ;
|
|
||||||
|
|
||||||
-- Comment
|
|
||||||
COMMENT ON TABLE worker_commands IS 'Commands issued to workers (decommission after task, pause, etc.)';
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
-- Migration 078: Add consecutive_403_count to proxies table
|
|
||||||
-- Per workflow-12102025.md: Track consecutive 403s per proxy
|
|
||||||
-- After 3 consecutive 403s with different fingerprints → disable proxy
|
|
||||||
|
|
||||||
ALTER TABLE proxies ADD COLUMN IF NOT EXISTS consecutive_403_count INTEGER DEFAULT 0;
|
|
||||||
|
|
||||||
-- Add comment explaining the column
|
|
||||||
COMMENT ON COLUMN proxies.consecutive_403_count IS 'Tracks consecutive 403 blocks. Reset to 0 on success. Proxy disabled at 3.';
|
|
||||||
@@ -1,49 +0,0 @@
|
|||||||
-- Migration 079: Task Schedules for Database-Driven Scheduler
|
|
||||||
-- Per TASK_WORKFLOW_2024-12-10.md: Replaces node-cron with DB-driven scheduling
|
|
||||||
--
|
|
||||||
-- 2024-12-10: Created for reliable, multi-replica-safe task scheduling
|
|
||||||
|
|
||||||
-- task_schedules: Stores schedule definitions and state
|
|
||||||
CREATE TABLE IF NOT EXISTS task_schedules (
|
|
||||||
id SERIAL PRIMARY KEY,
|
|
||||||
name VARCHAR(100) NOT NULL UNIQUE,
|
|
||||||
role VARCHAR(50) NOT NULL, -- TaskRole: product_refresh, store_discovery, etc.
|
|
||||||
description TEXT,
|
|
||||||
|
|
||||||
-- Schedule configuration
|
|
||||||
enabled BOOLEAN DEFAULT TRUE,
|
|
||||||
interval_hours INTEGER NOT NULL DEFAULT 4,
|
|
||||||
priority INTEGER DEFAULT 0,
|
|
||||||
|
|
||||||
-- Optional scope filters
|
|
||||||
state_code VARCHAR(2), -- NULL = all states
|
|
||||||
platform VARCHAR(50), -- NULL = all platforms
|
|
||||||
|
|
||||||
-- Execution state (updated by scheduler)
|
|
||||||
last_run_at TIMESTAMPTZ,
|
|
||||||
next_run_at TIMESTAMPTZ,
|
|
||||||
last_task_count INTEGER DEFAULT 0,
|
|
||||||
last_error TEXT,
|
|
||||||
|
|
||||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
||||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
||||||
);
|
|
||||||
|
|
||||||
-- Indexes for scheduler queries
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_task_schedules_enabled ON task_schedules(enabled) WHERE enabled = TRUE;
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_task_schedules_next_run ON task_schedules(next_run_at) WHERE enabled = TRUE;
|
|
||||||
|
|
||||||
-- Insert default schedules
|
|
||||||
INSERT INTO task_schedules (name, role, interval_hours, priority, description, next_run_at)
|
|
||||||
VALUES
|
|
||||||
('product_refresh_all', 'product_refresh', 4, 0, 'Generate product refresh tasks for all crawl-enabled stores every 4 hours', NOW()),
|
|
||||||
('store_discovery_dutchie', 'store_discovery', 24, 5, 'Discover new Dutchie stores daily', NOW()),
|
|
||||||
('analytics_refresh', 'analytics_refresh', 6, 0, 'Refresh analytics materialized views every 6 hours', NOW())
|
|
||||||
ON CONFLICT (name) DO NOTHING;
|
|
||||||
|
|
||||||
-- Comment for documentation
|
|
||||||
COMMENT ON TABLE task_schedules IS 'Database-driven task scheduler configuration. Per TASK_WORKFLOW_2024-12-10.md:
|
|
||||||
- Schedules persist in DB (survive restarts)
|
|
||||||
- Uses SELECT FOR UPDATE SKIP LOCKED for multi-replica safety
|
|
||||||
- Scheduler polls every 60s and executes due schedules
|
|
||||||
- Creates tasks in worker_tasks for task-worker.ts to process';
|
|
||||||
@@ -1,58 +0,0 @@
|
|||||||
-- Migration 080: Raw Crawl Payloads Metadata Table
|
|
||||||
-- Per TASK_WORKFLOW_2024-12-10.md: Store full GraphQL payloads for historical analysis
|
|
||||||
--
|
|
||||||
-- Design Pattern: Metadata/Payload Separation
|
|
||||||
-- - Metadata (this table): Small, indexed, queryable
|
|
||||||
-- - Payload (filesystem): Gzipped JSON at storage_path
|
|
||||||
--
|
|
||||||
-- Benefits:
|
|
||||||
-- - Compare any two crawls to see what changed
|
|
||||||
-- - Replay/re-normalize historical data if logic changes
|
|
||||||
-- - Debug issues by seeing exactly what the API returned
|
|
||||||
-- - DB stays small, backups stay fast
|
|
||||||
--
|
|
||||||
-- Storage location: /storage/payloads/{year}/{month}/{day}/store_{id}_{timestamp}.json.gz
|
|
||||||
-- Compression: ~90% reduction (1.5MB -> 150KB per crawl)
|
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS raw_crawl_payloads (
|
|
||||||
id SERIAL PRIMARY KEY,
|
|
||||||
|
|
||||||
-- Links to crawl tracking
|
|
||||||
crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL,
|
|
||||||
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
|
||||||
|
|
||||||
-- File location (gzipped JSON)
|
|
||||||
storage_path TEXT NOT NULL,
|
|
||||||
|
|
||||||
-- Metadata for quick queries without loading file
|
|
||||||
product_count INTEGER NOT NULL DEFAULT 0,
|
|
||||||
size_bytes INTEGER, -- Compressed size
|
|
||||||
size_bytes_raw INTEGER, -- Uncompressed size
|
|
||||||
|
|
||||||
-- Timestamps
|
|
||||||
fetched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
||||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
||||||
|
|
||||||
-- Optional: checksum for integrity verification
|
|
||||||
checksum_sha256 VARCHAR(64)
|
|
||||||
);
|
|
||||||
|
|
||||||
-- Indexes for common queries
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_dispensary
|
|
||||||
ON raw_crawl_payloads(dispensary_id);
|
|
||||||
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_dispensary_fetched
|
|
||||||
ON raw_crawl_payloads(dispensary_id, fetched_at DESC);
|
|
||||||
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_fetched
|
|
||||||
ON raw_crawl_payloads(fetched_at DESC);
|
|
||||||
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_crawl_run
|
|
||||||
ON raw_crawl_payloads(crawl_run_id)
|
|
||||||
WHERE crawl_run_id IS NOT NULL;
|
|
||||||
|
|
||||||
-- Comments
|
|
||||||
COMMENT ON TABLE raw_crawl_payloads IS 'Metadata for raw GraphQL payloads stored on filesystem. Per TASK_WORKFLOW_2024-12-10.md: Full payloads enable historical diffs and replay.';
|
|
||||||
COMMENT ON COLUMN raw_crawl_payloads.storage_path IS 'Path to gzipped JSON file, e.g. /storage/payloads/2024/12/10/store_123_1702234567.json.gz';
|
|
||||||
COMMENT ON COLUMN raw_crawl_payloads.size_bytes IS 'Compressed file size in bytes';
|
|
||||||
COMMENT ON COLUMN raw_crawl_payloads.size_bytes_raw IS 'Uncompressed payload size in bytes';
|
|
||||||
@@ -1,37 +0,0 @@
|
|||||||
-- Migration 081: Payload Fetch Columns
|
|
||||||
-- Per TASK_WORKFLOW_2024-12-10.md: Separates API fetch from data processing
|
|
||||||
--
|
|
||||||
-- New architecture:
|
|
||||||
-- - payload_fetch: Hits Dutchie API, saves raw payload to disk
|
|
||||||
-- - product_refresh: Reads local payload, normalizes, upserts to DB
|
|
||||||
--
|
|
||||||
-- This migration adds:
|
|
||||||
-- 1. payload column to worker_tasks (for task chaining data)
|
|
||||||
-- 2. processed_at column to raw_crawl_payloads (track when payload was processed)
|
|
||||||
-- 3. last_fetch_at column to dispensaries (track when last payload was fetched)
|
|
||||||
|
|
||||||
-- Add payload column to worker_tasks for task chaining
|
|
||||||
-- Used by payload_fetch to pass payload_id to product_refresh
|
|
||||||
ALTER TABLE worker_tasks
|
|
||||||
ADD COLUMN IF NOT EXISTS payload JSONB DEFAULT NULL;
|
|
||||||
|
|
||||||
COMMENT ON COLUMN worker_tasks.payload IS 'Per TASK_WORKFLOW_2024-12-10.md: Task chaining data (e.g., payload_id from payload_fetch to product_refresh)';
|
|
||||||
|
|
||||||
-- Add processed_at to raw_crawl_payloads
|
|
||||||
-- Tracks when the payload was processed by product_refresh
|
|
||||||
ALTER TABLE raw_crawl_payloads
|
|
||||||
ADD COLUMN IF NOT EXISTS processed_at TIMESTAMPTZ DEFAULT NULL;
|
|
||||||
|
|
||||||
COMMENT ON COLUMN raw_crawl_payloads.processed_at IS 'When this payload was processed by product_refresh handler';
|
|
||||||
|
|
||||||
-- Index for finding unprocessed payloads
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_unprocessed
|
|
||||||
ON raw_crawl_payloads(dispensary_id, fetched_at DESC)
|
|
||||||
WHERE processed_at IS NULL;
|
|
||||||
|
|
||||||
-- Add last_fetch_at to dispensaries
|
|
||||||
-- Tracks when the last payload was fetched (separate from last_crawl_at which is when processing completed)
|
|
||||||
ALTER TABLE dispensaries
|
|
||||||
ADD COLUMN IF NOT EXISTS last_fetch_at TIMESTAMPTZ DEFAULT NULL;
|
|
||||||
|
|
||||||
COMMENT ON COLUMN dispensaries.last_fetch_at IS 'Per TASK_WORKFLOW_2024-12-10.md: When last payload was fetched from API (separate from last_crawl_at which is when processing completed)';
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
-- Migration: 082_proxy_notification_trigger
|
|
||||||
-- Date: 2024-12-11
|
|
||||||
-- Description: Add PostgreSQL NOTIFY trigger to alert workers when proxies are added
|
|
||||||
|
|
||||||
-- Create function to notify workers when active proxy is added/activated
|
|
||||||
CREATE OR REPLACE FUNCTION notify_proxy_added()
|
|
||||||
RETURNS TRIGGER AS $$
|
|
||||||
BEGIN
|
|
||||||
-- Only notify if proxy is active
|
|
||||||
IF NEW.active = true THEN
|
|
||||||
PERFORM pg_notify('proxy_added', NEW.id::text);
|
|
||||||
END IF;
|
|
||||||
RETURN NEW;
|
|
||||||
END;
|
|
||||||
$$ LANGUAGE plpgsql;
|
|
||||||
|
|
||||||
-- Drop existing trigger if any
|
|
||||||
DROP TRIGGER IF EXISTS proxy_added_trigger ON proxies;
|
|
||||||
|
|
||||||
-- Create trigger on insert and update of active column
|
|
||||||
CREATE TRIGGER proxy_added_trigger
|
|
||||||
AFTER INSERT OR UPDATE OF active ON proxies
|
|
||||||
FOR EACH ROW
|
|
||||||
EXECUTE FUNCTION notify_proxy_added();
|
|
||||||
|
|
||||||
COMMENT ON FUNCTION notify_proxy_added() IS
|
|
||||||
'Sends PostgreSQL NOTIFY to proxy_added channel when an active proxy is added or activated. Workers LISTEN on this channel to wake up immediately.';
|
|
||||||
@@ -1,88 +0,0 @@
|
|||||||
-- Migration 083: Discovery Run Tracking
|
|
||||||
-- Tracks progress of store discovery runs step-by-step
|
|
||||||
|
|
||||||
-- Main discovery runs table
|
|
||||||
CREATE TABLE IF NOT EXISTS discovery_runs (
|
|
||||||
id SERIAL PRIMARY KEY,
|
|
||||||
platform VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
|
||||||
status VARCHAR(20) NOT NULL DEFAULT 'running', -- running, completed, failed
|
|
||||||
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
||||||
finished_at TIMESTAMPTZ,
|
|
||||||
task_id INTEGER REFERENCES worker_task_queue(id),
|
|
||||||
|
|
||||||
-- Totals
|
|
||||||
states_total INTEGER DEFAULT 0,
|
|
||||||
states_completed INTEGER DEFAULT 0,
|
|
||||||
locations_discovered INTEGER DEFAULT 0,
|
|
||||||
locations_promoted INTEGER DEFAULT 0,
|
|
||||||
new_store_ids INTEGER[] DEFAULT '{}',
|
|
||||||
|
|
||||||
-- Error info
|
|
||||||
error_message TEXT,
|
|
||||||
|
|
||||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
||||||
);
|
|
||||||
|
|
||||||
-- Per-state progress within a run
|
|
||||||
CREATE TABLE IF NOT EXISTS discovery_run_states (
|
|
||||||
id SERIAL PRIMARY KEY,
|
|
||||||
run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
|
|
||||||
state_code VARCHAR(2) NOT NULL,
|
|
||||||
status VARCHAR(20) NOT NULL DEFAULT 'pending', -- pending, running, completed, failed
|
|
||||||
started_at TIMESTAMPTZ,
|
|
||||||
finished_at TIMESTAMPTZ,
|
|
||||||
|
|
||||||
-- Results
|
|
||||||
cities_found INTEGER DEFAULT 0,
|
|
||||||
locations_found INTEGER DEFAULT 0,
|
|
||||||
locations_upserted INTEGER DEFAULT 0,
|
|
||||||
new_dispensary_ids INTEGER[] DEFAULT '{}',
|
|
||||||
|
|
||||||
-- Error info
|
|
||||||
error_message TEXT,
|
|
||||||
|
|
||||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
||||||
|
|
||||||
UNIQUE(run_id, state_code)
|
|
||||||
);
|
|
||||||
|
|
||||||
-- Step-by-step log for detailed progress tracking
|
|
||||||
CREATE TABLE IF NOT EXISTS discovery_run_steps (
|
|
||||||
id SERIAL PRIMARY KEY,
|
|
||||||
run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
|
|
||||||
state_code VARCHAR(2),
|
|
||||||
step_name VARCHAR(100) NOT NULL,
|
|
||||||
status VARCHAR(20) NOT NULL DEFAULT 'started', -- started, completed, failed
|
|
||||||
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
||||||
finished_at TIMESTAMPTZ,
|
|
||||||
|
|
||||||
-- Details (JSON for flexibility)
|
|
||||||
details JSONB DEFAULT '{}',
|
|
||||||
|
|
||||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
||||||
);
|
|
||||||
|
|
||||||
-- Indexes for querying
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_discovery_runs_status ON discovery_runs(status);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_discovery_runs_platform ON discovery_runs(platform);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_discovery_runs_started_at ON discovery_runs(started_at DESC);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_discovery_run_states_run_id ON discovery_run_states(run_id);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_discovery_run_steps_run_id ON discovery_run_steps(run_id);
|
|
||||||
|
|
||||||
-- View for latest run status per platform
|
|
||||||
CREATE OR REPLACE VIEW v_latest_discovery_runs AS
|
|
||||||
SELECT DISTINCT ON (platform)
|
|
||||||
id,
|
|
||||||
platform,
|
|
||||||
status,
|
|
||||||
started_at,
|
|
||||||
finished_at,
|
|
||||||
states_total,
|
|
||||||
states_completed,
|
|
||||||
locations_discovered,
|
|
||||||
locations_promoted,
|
|
||||||
array_length(new_store_ids, 1) as new_stores_count,
|
|
||||||
error_message,
|
|
||||||
EXTRACT(EPOCH FROM (COALESCE(finished_at, NOW()) - started_at)) as duration_seconds
|
|
||||||
FROM discovery_runs
|
|
||||||
ORDER BY platform, started_at DESC;
|
|
||||||
@@ -1,253 +0,0 @@
|
|||||||
-- Migration 084: Dual Transport Preflight System
|
|
||||||
-- Workers run both curl and http (Puppeteer) preflights on startup
|
|
||||||
-- Tasks can require a specific transport method
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 1: Add preflight columns to worker_registry
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
-- Preflight status for curl/axios transport (proxy-based)
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS preflight_curl_status VARCHAR(20) DEFAULT 'pending';
|
|
||||||
|
|
||||||
-- Preflight status for http/Puppeteer transport (browser-based)
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS preflight_http_status VARCHAR(20) DEFAULT 'pending';
|
|
||||||
|
|
||||||
-- Timestamps for when each preflight completed
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS preflight_curl_at TIMESTAMPTZ;
|
|
||||||
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS preflight_http_at TIMESTAMPTZ;
|
|
||||||
|
|
||||||
-- Error messages for failed preflights
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS preflight_curl_error TEXT;
|
|
||||||
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS preflight_http_error TEXT;
|
|
||||||
|
|
||||||
-- Response time for successful preflights (ms)
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS preflight_curl_ms INTEGER;
|
|
||||||
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS preflight_http_ms INTEGER;
|
|
||||||
|
|
||||||
-- Constraints for preflight status values
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
DROP CONSTRAINT IF EXISTS valid_preflight_curl_status;
|
|
||||||
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD CONSTRAINT valid_preflight_curl_status
|
|
||||||
CHECK (preflight_curl_status IN ('pending', 'passed', 'failed', 'skipped'));
|
|
||||||
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
DROP CONSTRAINT IF EXISTS valid_preflight_http_status;
|
|
||||||
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD CONSTRAINT valid_preflight_http_status
|
|
||||||
CHECK (preflight_http_status IN ('pending', 'passed', 'failed', 'skipped'));
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 2: Add method column to worker_tasks
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
-- Transport method requirement for the task
|
|
||||||
-- NULL = no preference (any worker can claim)
|
|
||||||
-- 'curl' = requires curl/axios transport (proxy-based, fast)
|
|
||||||
-- 'http' = requires http/Puppeteer transport (browser-based, anti-detect)
|
|
||||||
ALTER TABLE worker_tasks
|
|
||||||
ADD COLUMN IF NOT EXISTS method VARCHAR(10);
|
|
||||||
|
|
||||||
-- Constraint for valid method values
|
|
||||||
ALTER TABLE worker_tasks
|
|
||||||
DROP CONSTRAINT IF EXISTS valid_task_method;
|
|
||||||
|
|
||||||
ALTER TABLE worker_tasks
|
|
||||||
ADD CONSTRAINT valid_task_method
|
|
||||||
CHECK (method IS NULL OR method IN ('curl', 'http'));
|
|
||||||
|
|
||||||
-- Index for method-based task claiming
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_worker_tasks_method
|
|
||||||
ON worker_tasks(method)
|
|
||||||
WHERE status = 'pending';
|
|
||||||
|
|
||||||
-- Set default method for all existing pending tasks to 'http'
|
|
||||||
-- ALL current tasks require Puppeteer/browser-based transport
|
|
||||||
UPDATE worker_tasks
|
|
||||||
SET method = 'http'
|
|
||||||
WHERE method IS NULL;
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 3: Update claim_task function for method compatibility
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION claim_task(
|
|
||||||
p_role VARCHAR(50),
|
|
||||||
p_worker_id VARCHAR(100),
|
|
||||||
p_curl_passed BOOLEAN DEFAULT TRUE,
|
|
||||||
p_http_passed BOOLEAN DEFAULT FALSE
|
|
||||||
) RETURNS worker_tasks AS $$
|
|
||||||
DECLARE
|
|
||||||
claimed_task worker_tasks;
|
|
||||||
BEGIN
|
|
||||||
UPDATE worker_tasks
|
|
||||||
SET
|
|
||||||
status = 'claimed',
|
|
||||||
worker_id = p_worker_id,
|
|
||||||
claimed_at = NOW(),
|
|
||||||
updated_at = NOW()
|
|
||||||
WHERE id = (
|
|
||||||
SELECT id FROM worker_tasks
|
|
||||||
WHERE role = p_role
|
|
||||||
AND status = 'pending'
|
|
||||||
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
|
|
||||||
-- Method compatibility: worker must have passed the required preflight
|
|
||||||
AND (
|
|
||||||
method IS NULL -- No preference, any worker can claim
|
|
||||||
OR (method = 'curl' AND p_curl_passed = TRUE)
|
|
||||||
OR (method = 'http' AND p_http_passed = TRUE)
|
|
||||||
)
|
|
||||||
-- Exclude stores that already have an active task
|
|
||||||
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
|
|
||||||
SELECT dispensary_id FROM worker_tasks
|
|
||||||
WHERE status IN ('claimed', 'running')
|
|
||||||
AND dispensary_id IS NOT NULL
|
|
||||||
))
|
|
||||||
ORDER BY priority DESC, created_at ASC
|
|
||||||
LIMIT 1
|
|
||||||
FOR UPDATE SKIP LOCKED
|
|
||||||
)
|
|
||||||
RETURNING * INTO claimed_task;
|
|
||||||
|
|
||||||
RETURN claimed_task;
|
|
||||||
END;
|
|
||||||
$$ LANGUAGE plpgsql;
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 4: Update v_active_workers view
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
DROP VIEW IF EXISTS v_active_workers;
|
|
||||||
|
|
||||||
CREATE VIEW v_active_workers AS
|
|
||||||
SELECT
|
|
||||||
wr.id,
|
|
||||||
wr.worker_id,
|
|
||||||
wr.friendly_name,
|
|
||||||
wr.role,
|
|
||||||
wr.status,
|
|
||||||
wr.pod_name,
|
|
||||||
wr.hostname,
|
|
||||||
wr.started_at,
|
|
||||||
wr.last_heartbeat_at,
|
|
||||||
wr.last_task_at,
|
|
||||||
wr.tasks_completed,
|
|
||||||
wr.tasks_failed,
|
|
||||||
wr.current_task_id,
|
|
||||||
-- Preflight status
|
|
||||||
wr.preflight_curl_status,
|
|
||||||
wr.preflight_http_status,
|
|
||||||
wr.preflight_curl_at,
|
|
||||||
wr.preflight_http_at,
|
|
||||||
wr.preflight_curl_error,
|
|
||||||
wr.preflight_http_error,
|
|
||||||
wr.preflight_curl_ms,
|
|
||||||
wr.preflight_http_ms,
|
|
||||||
-- Computed fields
|
|
||||||
EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
|
|
||||||
CASE
|
|
||||||
WHEN wr.status = 'offline' THEN 'offline'
|
|
||||||
WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
|
||||||
WHEN wr.current_task_id IS NOT NULL THEN 'busy'
|
|
||||||
ELSE 'ready'
|
|
||||||
END as health_status,
|
|
||||||
-- Capability flags (can this worker handle curl/http tasks?)
|
|
||||||
(wr.preflight_curl_status = 'passed') as can_curl,
|
|
||||||
(wr.preflight_http_status = 'passed') as can_http
|
|
||||||
FROM worker_registry wr
|
|
||||||
WHERE wr.status != 'terminated'
|
|
||||||
ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 5: View for task queue with method info
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
DROP VIEW IF EXISTS v_task_history;
|
|
||||||
|
|
||||||
CREATE VIEW v_task_history AS
|
|
||||||
SELECT
|
|
||||||
t.id,
|
|
||||||
t.role,
|
|
||||||
t.dispensary_id,
|
|
||||||
d.name as dispensary_name,
|
|
||||||
t.platform,
|
|
||||||
t.status,
|
|
||||||
t.priority,
|
|
||||||
t.method,
|
|
||||||
t.worker_id,
|
|
||||||
t.scheduled_for,
|
|
||||||
t.claimed_at,
|
|
||||||
t.started_at,
|
|
||||||
t.completed_at,
|
|
||||||
t.error_message,
|
|
||||||
t.retry_count,
|
|
||||||
t.created_at,
|
|
||||||
EXTRACT(EPOCH FROM (t.completed_at - t.started_at)) as duration_sec
|
|
||||||
FROM worker_tasks t
|
|
||||||
LEFT JOIN dispensaries d ON d.id = t.dispensary_id
|
|
||||||
ORDER BY t.created_at DESC;
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 6: Helper function to update worker preflight status
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION update_worker_preflight(
|
|
||||||
p_worker_id VARCHAR(100),
|
|
||||||
p_transport VARCHAR(10), -- 'curl' or 'http'
|
|
||||||
p_status VARCHAR(20), -- 'passed', 'failed', 'skipped'
|
|
||||||
p_response_ms INTEGER DEFAULT NULL,
|
|
||||||
p_error TEXT DEFAULT NULL
|
|
||||||
) RETURNS VOID AS $$
|
|
||||||
BEGIN
|
|
||||||
IF p_transport = 'curl' THEN
|
|
||||||
UPDATE worker_registry
|
|
||||||
SET
|
|
||||||
preflight_curl_status = p_status,
|
|
||||||
preflight_curl_at = NOW(),
|
|
||||||
preflight_curl_ms = p_response_ms,
|
|
||||||
preflight_curl_error = p_error,
|
|
||||||
updated_at = NOW()
|
|
||||||
WHERE worker_id = p_worker_id;
|
|
||||||
ELSIF p_transport = 'http' THEN
|
|
||||||
UPDATE worker_registry
|
|
||||||
SET
|
|
||||||
preflight_http_status = p_status,
|
|
||||||
preflight_http_at = NOW(),
|
|
||||||
preflight_http_ms = p_response_ms,
|
|
||||||
preflight_http_error = p_error,
|
|
||||||
updated_at = NOW()
|
|
||||||
WHERE worker_id = p_worker_id;
|
|
||||||
END IF;
|
|
||||||
END;
|
|
||||||
$$ LANGUAGE plpgsql;
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- Comments
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
COMMENT ON COLUMN worker_registry.preflight_curl_status IS 'Status of curl/axios preflight: pending, passed, failed, skipped';
|
|
||||||
COMMENT ON COLUMN worker_registry.preflight_http_status IS 'Status of http/Puppeteer preflight: pending, passed, failed, skipped';
|
|
||||||
COMMENT ON COLUMN worker_registry.preflight_curl_at IS 'When curl preflight completed';
|
|
||||||
COMMENT ON COLUMN worker_registry.preflight_http_at IS 'When http preflight completed';
|
|
||||||
COMMENT ON COLUMN worker_registry.preflight_curl_error IS 'Error message if curl preflight failed';
|
|
||||||
COMMENT ON COLUMN worker_registry.preflight_http_error IS 'Error message if http preflight failed';
|
|
||||||
COMMENT ON COLUMN worker_registry.preflight_curl_ms IS 'Response time of successful curl preflight (ms)';
|
|
||||||
COMMENT ON COLUMN worker_registry.preflight_http_ms IS 'Response time of successful http preflight (ms)';
|
|
||||||
|
|
||||||
COMMENT ON COLUMN worker_tasks.method IS 'Transport method required: NULL=any, curl=proxy-based, http=browser-based';
|
|
||||||
|
|
||||||
COMMENT ON FUNCTION claim_task IS 'Atomically claim a task, respecting method requirements and per-store locking';
|
|
||||||
COMMENT ON FUNCTION update_worker_preflight IS 'Update a workers preflight status for a given transport';
|
|
||||||
@@ -1,168 +0,0 @@
|
|||||||
-- Migration 085: Add IP and fingerprint columns for preflight reporting
|
|
||||||
-- These columns were missing from migration 084
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 1: Add IP address columns to worker_registry
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
-- IP address detected during curl/axios preflight
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS curl_ip VARCHAR(45);
|
|
||||||
|
|
||||||
-- IP address detected during http/Puppeteer preflight
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS http_ip VARCHAR(45);
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 2: Add fingerprint data column
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
-- Browser fingerprint data captured during Puppeteer preflight
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS fingerprint_data JSONB;
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 3: Add combined preflight status/timestamp for convenience
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
-- Overall preflight status (computed from both transports)
|
|
||||||
-- Values: 'pending', 'passed', 'partial', 'failed'
|
|
||||||
-- - 'pending': neither transport tested
|
|
||||||
-- - 'passed': both transports passed (or http passed for browser-only)
|
|
||||||
-- - 'partial': at least one passed
|
|
||||||
-- - 'failed': no transport passed
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS preflight_status VARCHAR(20) DEFAULT 'pending';
|
|
||||||
|
|
||||||
-- Most recent preflight completion timestamp
|
|
||||||
ALTER TABLE worker_registry
|
|
||||||
ADD COLUMN IF NOT EXISTS preflight_at TIMESTAMPTZ;
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 4: Update function to set preflight status
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION update_worker_preflight(
|
|
||||||
p_worker_id VARCHAR(100),
|
|
||||||
p_transport VARCHAR(10), -- 'curl' or 'http'
|
|
||||||
p_status VARCHAR(20), -- 'passed', 'failed', 'skipped'
|
|
||||||
p_ip VARCHAR(45) DEFAULT NULL,
|
|
||||||
p_response_ms INTEGER DEFAULT NULL,
|
|
||||||
p_error TEXT DEFAULT NULL,
|
|
||||||
p_fingerprint JSONB DEFAULT NULL
|
|
||||||
) RETURNS VOID AS $$
|
|
||||||
DECLARE
|
|
||||||
v_curl_status VARCHAR(20);
|
|
||||||
v_http_status VARCHAR(20);
|
|
||||||
v_overall_status VARCHAR(20);
|
|
||||||
BEGIN
|
|
||||||
IF p_transport = 'curl' THEN
|
|
||||||
UPDATE worker_registry
|
|
||||||
SET
|
|
||||||
preflight_curl_status = p_status,
|
|
||||||
preflight_curl_at = NOW(),
|
|
||||||
preflight_curl_ms = p_response_ms,
|
|
||||||
preflight_curl_error = p_error,
|
|
||||||
curl_ip = p_ip,
|
|
||||||
updated_at = NOW()
|
|
||||||
WHERE worker_id = p_worker_id;
|
|
||||||
ELSIF p_transport = 'http' THEN
|
|
||||||
UPDATE worker_registry
|
|
||||||
SET
|
|
||||||
preflight_http_status = p_status,
|
|
||||||
preflight_http_at = NOW(),
|
|
||||||
preflight_http_ms = p_response_ms,
|
|
||||||
preflight_http_error = p_error,
|
|
||||||
http_ip = p_ip,
|
|
||||||
fingerprint_data = COALESCE(p_fingerprint, fingerprint_data),
|
|
||||||
updated_at = NOW()
|
|
||||||
WHERE worker_id = p_worker_id;
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
-- Update overall preflight status
|
|
||||||
SELECT preflight_curl_status, preflight_http_status
|
|
||||||
INTO v_curl_status, v_http_status
|
|
||||||
FROM worker_registry
|
|
||||||
WHERE worker_id = p_worker_id;
|
|
||||||
|
|
||||||
-- Compute overall status
|
|
||||||
IF v_curl_status = 'passed' AND v_http_status = 'passed' THEN
|
|
||||||
v_overall_status := 'passed';
|
|
||||||
ELSIF v_curl_status = 'passed' OR v_http_status = 'passed' THEN
|
|
||||||
v_overall_status := 'partial';
|
|
||||||
ELSIF v_curl_status = 'failed' OR v_http_status = 'failed' THEN
|
|
||||||
v_overall_status := 'failed';
|
|
||||||
ELSE
|
|
||||||
v_overall_status := 'pending';
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
UPDATE worker_registry
|
|
||||||
SET
|
|
||||||
preflight_status = v_overall_status,
|
|
||||||
preflight_at = NOW()
|
|
||||||
WHERE worker_id = p_worker_id;
|
|
||||||
END;
|
|
||||||
$$ LANGUAGE plpgsql;
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- PART 5: Update v_active_workers view
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
DROP VIEW IF EXISTS v_active_workers;
|
|
||||||
|
|
||||||
CREATE VIEW v_active_workers AS
|
|
||||||
SELECT
|
|
||||||
wr.id,
|
|
||||||
wr.worker_id,
|
|
||||||
wr.friendly_name,
|
|
||||||
wr.role,
|
|
||||||
wr.status,
|
|
||||||
wr.pod_name,
|
|
||||||
wr.hostname,
|
|
||||||
wr.started_at,
|
|
||||||
wr.last_heartbeat_at,
|
|
||||||
wr.last_task_at,
|
|
||||||
wr.tasks_completed,
|
|
||||||
wr.tasks_failed,
|
|
||||||
wr.current_task_id,
|
|
||||||
-- IP addresses from preflights
|
|
||||||
wr.curl_ip,
|
|
||||||
wr.http_ip,
|
|
||||||
-- Combined preflight status
|
|
||||||
wr.preflight_status,
|
|
||||||
wr.preflight_at,
|
|
||||||
-- Detailed preflight status per transport
|
|
||||||
wr.preflight_curl_status,
|
|
||||||
wr.preflight_http_status,
|
|
||||||
wr.preflight_curl_at,
|
|
||||||
wr.preflight_http_at,
|
|
||||||
wr.preflight_curl_error,
|
|
||||||
wr.preflight_http_error,
|
|
||||||
wr.preflight_curl_ms,
|
|
||||||
wr.preflight_http_ms,
|
|
||||||
-- Fingerprint data
|
|
||||||
wr.fingerprint_data,
|
|
||||||
-- Computed fields
|
|
||||||
EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
|
|
||||||
CASE
|
|
||||||
WHEN wr.status = 'offline' THEN 'offline'
|
|
||||||
WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
|
||||||
WHEN wr.current_task_id IS NOT NULL THEN 'busy'
|
|
||||||
ELSE 'ready'
|
|
||||||
END as health_status,
|
|
||||||
-- Capability flags (can this worker handle curl/http tasks?)
|
|
||||||
(wr.preflight_curl_status = 'passed') as can_curl,
|
|
||||||
(wr.preflight_http_status = 'passed') as can_http
|
|
||||||
FROM worker_registry wr
|
|
||||||
WHERE wr.status != 'terminated'
|
|
||||||
ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
|
|
||||||
|
|
||||||
-- ===================================================================
|
|
||||||
-- Comments
|
|
||||||
-- ===================================================================
|
|
||||||
|
|
||||||
COMMENT ON COLUMN worker_registry.curl_ip IS 'IP address detected during curl/axios preflight';
|
|
||||||
COMMENT ON COLUMN worker_registry.http_ip IS 'IP address detected during Puppeteer preflight';
|
|
||||||
COMMENT ON COLUMN worker_registry.fingerprint_data IS 'Browser fingerprint captured during Puppeteer preflight';
|
|
||||||
COMMENT ON COLUMN worker_registry.preflight_status IS 'Overall preflight status: pending, passed, partial, failed';
|
|
||||||
COMMENT ON COLUMN worker_registry.preflight_at IS 'Most recent preflight completion timestamp';
|
|
||||||
286
backend/node_modules/.package-lock.json
generated
vendored
286
backend/node_modules/.package-lock.json
generated
vendored
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "dutchie-menus-backend",
|
"name": "dutchie-menus-backend",
|
||||||
"version": "1.6.0",
|
"version": "1.5.1",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
@@ -46,97 +46,6 @@
|
|||||||
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
|
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
|
||||||
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
|
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
|
||||||
},
|
},
|
||||||
"node_modules/@jsep-plugin/assignment": {
|
|
||||||
"version": "1.3.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.3.0.tgz",
|
|
||||||
"integrity": "sha512-VVgV+CXrhbMI3aSusQyclHkenWSAm95WaiKrMxRFam3JSUiIaQjoMIw2sEs/OX4XifnqeQUN4DYbJjlA8EfktQ==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">= 10.16.0"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"jsep": "^0.4.0||^1.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@jsep-plugin/regex": {
|
|
||||||
"version": "1.0.4",
|
|
||||||
"resolved": "https://registry.npmjs.org/@jsep-plugin/regex/-/regex-1.0.4.tgz",
|
|
||||||
"integrity": "sha512-q7qL4Mgjs1vByCaTnDFcBnV9HS7GVPJX5vyVoCgZHNSC9rjwIlmbXG5sUuorR5ndfHAIlJ8pVStxvjXHbNvtUg==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">= 10.16.0"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"jsep": "^0.4.0||^1.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@kubernetes/client-node": {
|
|
||||||
"version": "1.4.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/@kubernetes/client-node/-/client-node-1.4.0.tgz",
|
|
||||||
"integrity": "sha512-Zge3YvF7DJi264dU1b3wb/GmzR99JhUpqTvp+VGHfwZT+g7EOOYNScDJNZwXy9cszyIGPIs0VHr+kk8e95qqrA==",
|
|
||||||
"dependencies": {
|
|
||||||
"@types/js-yaml": "^4.0.1",
|
|
||||||
"@types/node": "^24.0.0",
|
|
||||||
"@types/node-fetch": "^2.6.13",
|
|
||||||
"@types/stream-buffers": "^3.0.3",
|
|
||||||
"form-data": "^4.0.0",
|
|
||||||
"hpagent": "^1.2.0",
|
|
||||||
"isomorphic-ws": "^5.0.0",
|
|
||||||
"js-yaml": "^4.1.0",
|
|
||||||
"jsonpath-plus": "^10.3.0",
|
|
||||||
"node-fetch": "^2.7.0",
|
|
||||||
"openid-client": "^6.1.3",
|
|
||||||
"rfc4648": "^1.3.0",
|
|
||||||
"socks-proxy-agent": "^8.0.4",
|
|
||||||
"stream-buffers": "^3.0.2",
|
|
||||||
"tar-fs": "^3.0.9",
|
|
||||||
"ws": "^8.18.2"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@kubernetes/client-node/node_modules/@types/node": {
|
|
||||||
"version": "24.10.3",
|
|
||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.3.tgz",
|
|
||||||
"integrity": "sha512-gqkrWUsS8hcm0r44yn7/xZeV1ERva/nLgrLxFRUGb7aoNMIJfZJ3AC261zDQuOAKC7MiXai1WCpYc48jAHoShQ==",
|
|
||||||
"dependencies": {
|
|
||||||
"undici-types": "~7.16.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@kubernetes/client-node/node_modules/tar-fs": {
|
|
||||||
"version": "3.1.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz",
|
|
||||||
"integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==",
|
|
||||||
"dependencies": {
|
|
||||||
"pump": "^3.0.0",
|
|
||||||
"tar-stream": "^3.1.5"
|
|
||||||
},
|
|
||||||
"optionalDependencies": {
|
|
||||||
"bare-fs": "^4.0.1",
|
|
||||||
"bare-path": "^3.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@kubernetes/client-node/node_modules/undici-types": {
|
|
||||||
"version": "7.16.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
|
|
||||||
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="
|
|
||||||
},
|
|
||||||
"node_modules/@kubernetes/client-node/node_modules/ws": {
|
|
||||||
"version": "8.18.3",
|
|
||||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
|
|
||||||
"integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">=10.0.0"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"bufferutil": "^4.0.1",
|
|
||||||
"utf-8-validate": ">=5.0.2"
|
|
||||||
},
|
|
||||||
"peerDependenciesMeta": {
|
|
||||||
"bufferutil": {
|
|
||||||
"optional": true
|
|
||||||
},
|
|
||||||
"utf-8-validate": {
|
|
||||||
"optional": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@mapbox/node-pre-gyp": {
|
"node_modules/@mapbox/node-pre-gyp": {
|
||||||
"version": "1.0.11",
|
"version": "1.0.11",
|
||||||
"resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
|
"resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
|
||||||
@@ -342,11 +251,6 @@
|
|||||||
"integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
|
"integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"node_modules/@types/js-yaml": {
|
|
||||||
"version": "4.0.9",
|
|
||||||
"resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
|
|
||||||
"integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="
|
|
||||||
},
|
|
||||||
"node_modules/@types/jsonwebtoken": {
|
"node_modules/@types/jsonwebtoken": {
|
||||||
"version": "9.0.10",
|
"version": "9.0.10",
|
||||||
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
|
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
|
||||||
@@ -372,6 +276,7 @@
|
|||||||
"version": "20.19.25",
|
"version": "20.19.25",
|
||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
|
||||||
"integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
|
"integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
|
||||||
|
"devOptional": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"undici-types": "~6.21.0"
|
"undici-types": "~6.21.0"
|
||||||
}
|
}
|
||||||
@@ -382,15 +287,6 @@
|
|||||||
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"node_modules/@types/node-fetch": {
|
|
||||||
"version": "2.6.13",
|
|
||||||
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
|
|
||||||
"integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
|
|
||||||
"dependencies": {
|
|
||||||
"@types/node": "*",
|
|
||||||
"form-data": "^4.0.4"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@types/pg": {
|
"node_modules/@types/pg": {
|
||||||
"version": "8.15.6",
|
"version": "8.15.6",
|
||||||
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
|
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
|
||||||
@@ -444,14 +340,6 @@
|
|||||||
"@types/node": "*"
|
"@types/node": "*"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@types/stream-buffers": {
|
|
||||||
"version": "3.0.8",
|
|
||||||
"resolved": "https://registry.npmjs.org/@types/stream-buffers/-/stream-buffers-3.0.8.tgz",
|
|
||||||
"integrity": "sha512-J+7VaHKNvlNPJPEJXX/fKa9DZtR/xPMwuIbe+yNOwp1YB+ApUOBv2aUpEoBJEi8nJgbgs1x8e73ttg0r1rSUdw==",
|
|
||||||
"dependencies": {
|
|
||||||
"@types/node": "*"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@types/uuid": {
|
"node_modules/@types/uuid": {
|
||||||
"version": "9.0.8",
|
"version": "9.0.8",
|
||||||
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
||||||
@@ -632,78 +520,6 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/bare-fs": {
|
|
||||||
"version": "4.5.2",
|
|
||||||
"resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz",
|
|
||||||
"integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==",
|
|
||||||
"optional": true,
|
|
||||||
"dependencies": {
|
|
||||||
"bare-events": "^2.5.4",
|
|
||||||
"bare-path": "^3.0.0",
|
|
||||||
"bare-stream": "^2.6.4",
|
|
||||||
"bare-url": "^2.2.2",
|
|
||||||
"fast-fifo": "^1.3.2"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"bare": ">=1.16.0"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"bare-buffer": "*"
|
|
||||||
},
|
|
||||||
"peerDependenciesMeta": {
|
|
||||||
"bare-buffer": {
|
|
||||||
"optional": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/bare-os": {
|
|
||||||
"version": "3.6.2",
|
|
||||||
"resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz",
|
|
||||||
"integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==",
|
|
||||||
"optional": true,
|
|
||||||
"engines": {
|
|
||||||
"bare": ">=1.14.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/bare-path": {
|
|
||||||
"version": "3.0.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
|
|
||||||
"integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
|
|
||||||
"optional": true,
|
|
||||||
"dependencies": {
|
|
||||||
"bare-os": "^3.0.1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/bare-stream": {
|
|
||||||
"version": "2.7.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz",
|
|
||||||
"integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==",
|
|
||||||
"optional": true,
|
|
||||||
"dependencies": {
|
|
||||||
"streamx": "^2.21.0"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"bare-buffer": "*",
|
|
||||||
"bare-events": "*"
|
|
||||||
},
|
|
||||||
"peerDependenciesMeta": {
|
|
||||||
"bare-buffer": {
|
|
||||||
"optional": true
|
|
||||||
},
|
|
||||||
"bare-events": {
|
|
||||||
"optional": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/bare-url": {
|
|
||||||
"version": "2.3.2",
|
|
||||||
"resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz",
|
|
||||||
"integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==",
|
|
||||||
"optional": true,
|
|
||||||
"dependencies": {
|
|
||||||
"bare-path": "^3.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/base64-js": {
|
"node_modules/base64-js": {
|
||||||
"version": "1.5.1",
|
"version": "1.5.1",
|
||||||
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
||||||
@@ -2203,14 +2019,6 @@
|
|||||||
"node": ">=16.0.0"
|
"node": ">=16.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/hpagent": {
|
|
||||||
"version": "1.2.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/hpagent/-/hpagent-1.2.0.tgz",
|
|
||||||
"integrity": "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">=14"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/htmlparser2": {
|
"node_modules/htmlparser2": {
|
||||||
"version": "10.0.0",
|
"version": "10.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
|
||||||
@@ -2574,22 +2382,6 @@
|
|||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/isomorphic-ws": {
|
|
||||||
"version": "5.0.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/isomorphic-ws/-/isomorphic-ws-5.0.0.tgz",
|
|
||||||
"integrity": "sha512-muId7Zzn9ywDsyXgTIafTry2sV3nySZeUDe6YedVd1Hvuuep5AsIlqK+XefWpYTyJG5e503F2xIuT2lcU6rCSw==",
|
|
||||||
"peerDependencies": {
|
|
||||||
"ws": "*"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/jose": {
|
|
||||||
"version": "6.1.3",
|
|
||||||
"resolved": "https://registry.npmjs.org/jose/-/jose-6.1.3.tgz",
|
|
||||||
"integrity": "sha512-0TpaTfihd4QMNwrz/ob2Bp7X04yuxJkjRGi4aKmOqwhov54i6u79oCv7T+C7lo70MKH6BesI3vscD1yb/yzKXQ==",
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/panva"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/js-tokens": {
|
"node_modules/js-tokens": {
|
||||||
"version": "4.0.0",
|
"version": "4.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
||||||
@@ -2606,14 +2398,6 @@
|
|||||||
"js-yaml": "bin/js-yaml.js"
|
"js-yaml": "bin/js-yaml.js"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/jsep": {
|
|
||||||
"version": "1.4.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/jsep/-/jsep-1.4.0.tgz",
|
|
||||||
"integrity": "sha512-B7qPcEVE3NVkmSJbaYxvv4cHkVW7DQsZz13pUMrfS8z8Q/BuShN+gcTXrUlPiGqM2/t/EEaI030bpxMqY8gMlw==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">= 10.16.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/json-parse-even-better-errors": {
|
"node_modules/json-parse-even-better-errors": {
|
||||||
"version": "2.3.1",
|
"version": "2.3.1",
|
||||||
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
|
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
|
||||||
@@ -2635,23 +2419,6 @@
|
|||||||
"graceful-fs": "^4.1.6"
|
"graceful-fs": "^4.1.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/jsonpath-plus": {
|
|
||||||
"version": "10.3.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.3.0.tgz",
|
|
||||||
"integrity": "sha512-8TNmfeTCk2Le33A3vRRwtuworG/L5RrgMvdjhKZxvyShO+mBu2fP50OWUjRLNtvw344DdDarFh9buFAZs5ujeA==",
|
|
||||||
"dependencies": {
|
|
||||||
"@jsep-plugin/assignment": "^1.3.0",
|
|
||||||
"@jsep-plugin/regex": "^1.0.4",
|
|
||||||
"jsep": "^1.4.0"
|
|
||||||
},
|
|
||||||
"bin": {
|
|
||||||
"jsonpath": "bin/jsonpath-cli.js",
|
|
||||||
"jsonpath-plus": "bin/jsonpath-cli.js"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=18.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/jsonwebtoken": {
|
"node_modules/jsonwebtoken": {
|
||||||
"version": "9.0.2",
|
"version": "9.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
|
||||||
@@ -2726,11 +2493,6 @@
|
|||||||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
||||||
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
|
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
|
||||||
},
|
},
|
||||||
"node_modules/lodash.clonedeep": {
|
|
||||||
"version": "4.5.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
|
|
||||||
"integrity": "sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ=="
|
|
||||||
},
|
|
||||||
"node_modules/lodash.defaults": {
|
"node_modules/lodash.defaults": {
|
||||||
"version": "4.2.0",
|
"version": "4.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
||||||
@@ -3180,14 +2942,6 @@
|
|||||||
"url": "https://github.com/fb55/nth-check?sponsor=1"
|
"url": "https://github.com/fb55/nth-check?sponsor=1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/oauth4webapi": {
|
|
||||||
"version": "3.8.3",
|
|
||||||
"resolved": "https://registry.npmjs.org/oauth4webapi/-/oauth4webapi-3.8.3.tgz",
|
|
||||||
"integrity": "sha512-pQ5BsX3QRTgnt5HxgHwgunIRaDXBdkT23tf8dfzmtTIL2LTpdmxgbpbBm0VgFWAIDlezQvQCTgnVIUmHupXHxw==",
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/panva"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/object-assign": {
|
"node_modules/object-assign": {
|
||||||
"version": "4.1.1",
|
"version": "4.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
||||||
@@ -3226,18 +2980,6 @@
|
|||||||
"wrappy": "1"
|
"wrappy": "1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/openid-client": {
|
|
||||||
"version": "6.8.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.1.tgz",
|
|
||||||
"integrity": "sha512-VoYT6enBo6Vj2j3Q5Ec0AezS+9YGzQo1f5Xc42lreMGlfP4ljiXPKVDvCADh+XHCV/bqPu/wWSiCVXbJKvrODw==",
|
|
||||||
"dependencies": {
|
|
||||||
"jose": "^6.1.0",
|
|
||||||
"oauth4webapi": "^3.8.2"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/panva"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/pac-proxy-agent": {
|
"node_modules/pac-proxy-agent": {
|
||||||
"version": "7.2.0",
|
"version": "7.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
|
||||||
@@ -4141,11 +3883,6 @@
|
|||||||
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/rfc4648": {
|
|
||||||
"version": "1.5.4",
|
|
||||||
"resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.4.tgz",
|
|
||||||
"integrity": "sha512-rRg/6Lb+IGfJqO05HZkN50UtY7K/JhxJag1kP23+zyMfrvoB0B7RWv06MbOzoc79RgCdNTiUaNsTT1AJZ7Z+cg=="
|
|
||||||
},
|
|
||||||
"node_modules/rimraf": {
|
"node_modules/rimraf": {
|
||||||
"version": "3.0.2",
|
"version": "3.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
|
||||||
@@ -4576,14 +4313,6 @@
|
|||||||
"node": ">= 0.8"
|
"node": ">= 0.8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/stream-buffers": {
|
|
||||||
"version": "3.0.3",
|
|
||||||
"resolved": "https://registry.npmjs.org/stream-buffers/-/stream-buffers-3.0.3.tgz",
|
|
||||||
"integrity": "sha512-pqMqwQCso0PBJt2PQmDO0cFj0lyqmiwOMiMSkVtRokl7e+ZTRYgDHKnuZNbqjiJXgsg4nuqtD/zxuo9KqTp0Yw==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">= 0.10.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/streamx": {
|
"node_modules/streamx": {
|
||||||
"version": "2.23.0",
|
"version": "2.23.0",
|
||||||
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
|
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
|
||||||
@@ -4803,7 +4532,8 @@
|
|||||||
"node_modules/undici-types": {
|
"node_modules/undici-types": {
|
||||||
"version": "6.21.0",
|
"version": "6.21.0",
|
||||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
||||||
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="
|
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
|
||||||
|
"devOptional": true
|
||||||
},
|
},
|
||||||
"node_modules/universalify": {
|
"node_modules/universalify": {
|
||||||
"version": "2.0.1",
|
"version": "2.0.1",
|
||||||
@@ -4826,14 +4556,6 @@
|
|||||||
"resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
|
||||||
"integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
|
"integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
|
||||||
},
|
},
|
||||||
"node_modules/user-agents": {
|
|
||||||
"version": "1.1.669",
|
|
||||||
"resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.1.669.tgz",
|
|
||||||
"integrity": "sha512-pbIzG+AOqCaIpySKJ4IAm1l0VyE4jMnK4y1thV8lm8PYxI+7X5uWcppOK7zY79TCKKTAnJH3/4gaVIZHsjrmJA==",
|
|
||||||
"dependencies": {
|
|
||||||
"lodash.clonedeep": "^4.5.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/util": {
|
"node_modules/util": {
|
||||||
"version": "0.12.5",
|
"version": "0.12.5",
|
||||||
"resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
|
"resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
|
||||||
|
|||||||
290
backend/package-lock.json
generated
290
backend/package-lock.json
generated
@@ -1,14 +1,13 @@
|
|||||||
{
|
{
|
||||||
"name": "dutchie-menus-backend",
|
"name": "dutchie-menus-backend",
|
||||||
"version": "1.6.0",
|
"version": "1.5.1",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "dutchie-menus-backend",
|
"name": "dutchie-menus-backend",
|
||||||
"version": "1.6.0",
|
"version": "1.5.1",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@kubernetes/client-node": "^1.4.0",
|
|
||||||
"@types/bcryptjs": "^3.0.0",
|
"@types/bcryptjs": "^3.0.0",
|
||||||
"axios": "^1.6.2",
|
"axios": "^1.6.2",
|
||||||
"bcrypt": "^5.1.1",
|
"bcrypt": "^5.1.1",
|
||||||
@@ -35,7 +34,6 @@
|
|||||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||||
"sharp": "^0.32.0",
|
"sharp": "^0.32.0",
|
||||||
"socks-proxy-agent": "^8.0.2",
|
"socks-proxy-agent": "^8.0.2",
|
||||||
"user-agents": "^1.1.669",
|
|
||||||
"uuid": "^9.0.1",
|
"uuid": "^9.0.1",
|
||||||
"zod": "^3.22.4"
|
"zod": "^3.22.4"
|
||||||
},
|
},
|
||||||
@@ -494,97 +492,6 @@
|
|||||||
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
|
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
|
||||||
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
|
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
|
||||||
},
|
},
|
||||||
"node_modules/@jsep-plugin/assignment": {
|
|
||||||
"version": "1.3.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.3.0.tgz",
|
|
||||||
"integrity": "sha512-VVgV+CXrhbMI3aSusQyclHkenWSAm95WaiKrMxRFam3JSUiIaQjoMIw2sEs/OX4XifnqeQUN4DYbJjlA8EfktQ==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">= 10.16.0"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"jsep": "^0.4.0||^1.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@jsep-plugin/regex": {
|
|
||||||
"version": "1.0.4",
|
|
||||||
"resolved": "https://registry.npmjs.org/@jsep-plugin/regex/-/regex-1.0.4.tgz",
|
|
||||||
"integrity": "sha512-q7qL4Mgjs1vByCaTnDFcBnV9HS7GVPJX5vyVoCgZHNSC9rjwIlmbXG5sUuorR5ndfHAIlJ8pVStxvjXHbNvtUg==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">= 10.16.0"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"jsep": "^0.4.0||^1.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@kubernetes/client-node": {
|
|
||||||
"version": "1.4.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/@kubernetes/client-node/-/client-node-1.4.0.tgz",
|
|
||||||
"integrity": "sha512-Zge3YvF7DJi264dU1b3wb/GmzR99JhUpqTvp+VGHfwZT+g7EOOYNScDJNZwXy9cszyIGPIs0VHr+kk8e95qqrA==",
|
|
||||||
"dependencies": {
|
|
||||||
"@types/js-yaml": "^4.0.1",
|
|
||||||
"@types/node": "^24.0.0",
|
|
||||||
"@types/node-fetch": "^2.6.13",
|
|
||||||
"@types/stream-buffers": "^3.0.3",
|
|
||||||
"form-data": "^4.0.0",
|
|
||||||
"hpagent": "^1.2.0",
|
|
||||||
"isomorphic-ws": "^5.0.0",
|
|
||||||
"js-yaml": "^4.1.0",
|
|
||||||
"jsonpath-plus": "^10.3.0",
|
|
||||||
"node-fetch": "^2.7.0",
|
|
||||||
"openid-client": "^6.1.3",
|
|
||||||
"rfc4648": "^1.3.0",
|
|
||||||
"socks-proxy-agent": "^8.0.4",
|
|
||||||
"stream-buffers": "^3.0.2",
|
|
||||||
"tar-fs": "^3.0.9",
|
|
||||||
"ws": "^8.18.2"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@kubernetes/client-node/node_modules/@types/node": {
|
|
||||||
"version": "24.10.3",
|
|
||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.3.tgz",
|
|
||||||
"integrity": "sha512-gqkrWUsS8hcm0r44yn7/xZeV1ERva/nLgrLxFRUGb7aoNMIJfZJ3AC261zDQuOAKC7MiXai1WCpYc48jAHoShQ==",
|
|
||||||
"dependencies": {
|
|
||||||
"undici-types": "~7.16.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@kubernetes/client-node/node_modules/tar-fs": {
|
|
||||||
"version": "3.1.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz",
|
|
||||||
"integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==",
|
|
||||||
"dependencies": {
|
|
||||||
"pump": "^3.0.0",
|
|
||||||
"tar-stream": "^3.1.5"
|
|
||||||
},
|
|
||||||
"optionalDependencies": {
|
|
||||||
"bare-fs": "^4.0.1",
|
|
||||||
"bare-path": "^3.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@kubernetes/client-node/node_modules/undici-types": {
|
|
||||||
"version": "7.16.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
|
|
||||||
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="
|
|
||||||
},
|
|
||||||
"node_modules/@kubernetes/client-node/node_modules/ws": {
|
|
||||||
"version": "8.18.3",
|
|
||||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
|
|
||||||
"integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">=10.0.0"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"bufferutil": "^4.0.1",
|
|
||||||
"utf-8-validate": ">=5.0.2"
|
|
||||||
},
|
|
||||||
"peerDependenciesMeta": {
|
|
||||||
"bufferutil": {
|
|
||||||
"optional": true
|
|
||||||
},
|
|
||||||
"utf-8-validate": {
|
|
||||||
"optional": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@mapbox/node-pre-gyp": {
|
"node_modules/@mapbox/node-pre-gyp": {
|
||||||
"version": "1.0.11",
|
"version": "1.0.11",
|
||||||
"resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
|
"resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
|
||||||
@@ -850,11 +757,6 @@
|
|||||||
"integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
|
"integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"node_modules/@types/js-yaml": {
|
|
||||||
"version": "4.0.9",
|
|
||||||
"resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
|
|
||||||
"integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="
|
|
||||||
},
|
|
||||||
"node_modules/@types/jsonwebtoken": {
|
"node_modules/@types/jsonwebtoken": {
|
||||||
"version": "9.0.10",
|
"version": "9.0.10",
|
||||||
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
|
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
|
||||||
@@ -880,6 +782,7 @@
|
|||||||
"version": "20.19.25",
|
"version": "20.19.25",
|
||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
|
||||||
"integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
|
"integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
|
||||||
|
"devOptional": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"undici-types": "~6.21.0"
|
"undici-types": "~6.21.0"
|
||||||
}
|
}
|
||||||
@@ -890,15 +793,6 @@
|
|||||||
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"node_modules/@types/node-fetch": {
|
|
||||||
"version": "2.6.13",
|
|
||||||
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
|
|
||||||
"integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
|
|
||||||
"dependencies": {
|
|
||||||
"@types/node": "*",
|
|
||||||
"form-data": "^4.0.4"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@types/pg": {
|
"node_modules/@types/pg": {
|
||||||
"version": "8.15.6",
|
"version": "8.15.6",
|
||||||
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
|
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
|
||||||
@@ -952,14 +846,6 @@
|
|||||||
"@types/node": "*"
|
"@types/node": "*"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@types/stream-buffers": {
|
|
||||||
"version": "3.0.8",
|
|
||||||
"resolved": "https://registry.npmjs.org/@types/stream-buffers/-/stream-buffers-3.0.8.tgz",
|
|
||||||
"integrity": "sha512-J+7VaHKNvlNPJPEJXX/fKa9DZtR/xPMwuIbe+yNOwp1YB+ApUOBv2aUpEoBJEi8nJgbgs1x8e73ttg0r1rSUdw==",
|
|
||||||
"dependencies": {
|
|
||||||
"@types/node": "*"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/@types/uuid": {
|
"node_modules/@types/uuid": {
|
||||||
"version": "9.0.8",
|
"version": "9.0.8",
|
||||||
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
||||||
@@ -1140,78 +1026,6 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/bare-fs": {
|
|
||||||
"version": "4.5.2",
|
|
||||||
"resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz",
|
|
||||||
"integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==",
|
|
||||||
"optional": true,
|
|
||||||
"dependencies": {
|
|
||||||
"bare-events": "^2.5.4",
|
|
||||||
"bare-path": "^3.0.0",
|
|
||||||
"bare-stream": "^2.6.4",
|
|
||||||
"bare-url": "^2.2.2",
|
|
||||||
"fast-fifo": "^1.3.2"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"bare": ">=1.16.0"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"bare-buffer": "*"
|
|
||||||
},
|
|
||||||
"peerDependenciesMeta": {
|
|
||||||
"bare-buffer": {
|
|
||||||
"optional": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/bare-os": {
|
|
||||||
"version": "3.6.2",
|
|
||||||
"resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz",
|
|
||||||
"integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==",
|
|
||||||
"optional": true,
|
|
||||||
"engines": {
|
|
||||||
"bare": ">=1.14.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/bare-path": {
|
|
||||||
"version": "3.0.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
|
|
||||||
"integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
|
|
||||||
"optional": true,
|
|
||||||
"dependencies": {
|
|
||||||
"bare-os": "^3.0.1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/bare-stream": {
|
|
||||||
"version": "2.7.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz",
|
|
||||||
"integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==",
|
|
||||||
"optional": true,
|
|
||||||
"dependencies": {
|
|
||||||
"streamx": "^2.21.0"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"bare-buffer": "*",
|
|
||||||
"bare-events": "*"
|
|
||||||
},
|
|
||||||
"peerDependenciesMeta": {
|
|
||||||
"bare-buffer": {
|
|
||||||
"optional": true
|
|
||||||
},
|
|
||||||
"bare-events": {
|
|
||||||
"optional": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/bare-url": {
|
|
||||||
"version": "2.3.2",
|
|
||||||
"resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz",
|
|
||||||
"integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==",
|
|
||||||
"optional": true,
|
|
||||||
"dependencies": {
|
|
||||||
"bare-path": "^3.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/base64-js": {
|
"node_modules/base64-js": {
|
||||||
"version": "1.5.1",
|
"version": "1.5.1",
|
||||||
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
||||||
@@ -2725,14 +2539,6 @@
|
|||||||
"node": ">=16.0.0"
|
"node": ">=16.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/hpagent": {
|
|
||||||
"version": "1.2.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/hpagent/-/hpagent-1.2.0.tgz",
|
|
||||||
"integrity": "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">=14"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/htmlparser2": {
|
"node_modules/htmlparser2": {
|
||||||
"version": "10.0.0",
|
"version": "10.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
|
||||||
@@ -3096,22 +2902,6 @@
|
|||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/isomorphic-ws": {
|
|
||||||
"version": "5.0.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/isomorphic-ws/-/isomorphic-ws-5.0.0.tgz",
|
|
||||||
"integrity": "sha512-muId7Zzn9ywDsyXgTIafTry2sV3nySZeUDe6YedVd1Hvuuep5AsIlqK+XefWpYTyJG5e503F2xIuT2lcU6rCSw==",
|
|
||||||
"peerDependencies": {
|
|
||||||
"ws": "*"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/jose": {
|
|
||||||
"version": "6.1.3",
|
|
||||||
"resolved": "https://registry.npmjs.org/jose/-/jose-6.1.3.tgz",
|
|
||||||
"integrity": "sha512-0TpaTfihd4QMNwrz/ob2Bp7X04yuxJkjRGi4aKmOqwhov54i6u79oCv7T+C7lo70MKH6BesI3vscD1yb/yzKXQ==",
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/panva"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/js-tokens": {
|
"node_modules/js-tokens": {
|
||||||
"version": "4.0.0",
|
"version": "4.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
||||||
@@ -3128,14 +2918,6 @@
|
|||||||
"js-yaml": "bin/js-yaml.js"
|
"js-yaml": "bin/js-yaml.js"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/jsep": {
|
|
||||||
"version": "1.4.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/jsep/-/jsep-1.4.0.tgz",
|
|
||||||
"integrity": "sha512-B7qPcEVE3NVkmSJbaYxvv4cHkVW7DQsZz13pUMrfS8z8Q/BuShN+gcTXrUlPiGqM2/t/EEaI030bpxMqY8gMlw==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">= 10.16.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/json-parse-even-better-errors": {
|
"node_modules/json-parse-even-better-errors": {
|
||||||
"version": "2.3.1",
|
"version": "2.3.1",
|
||||||
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
|
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
|
||||||
@@ -3157,23 +2939,6 @@
|
|||||||
"graceful-fs": "^4.1.6"
|
"graceful-fs": "^4.1.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/jsonpath-plus": {
|
|
||||||
"version": "10.3.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.3.0.tgz",
|
|
||||||
"integrity": "sha512-8TNmfeTCk2Le33A3vRRwtuworG/L5RrgMvdjhKZxvyShO+mBu2fP50OWUjRLNtvw344DdDarFh9buFAZs5ujeA==",
|
|
||||||
"dependencies": {
|
|
||||||
"@jsep-plugin/assignment": "^1.3.0",
|
|
||||||
"@jsep-plugin/regex": "^1.0.4",
|
|
||||||
"jsep": "^1.4.0"
|
|
||||||
},
|
|
||||||
"bin": {
|
|
||||||
"jsonpath": "bin/jsonpath-cli.js",
|
|
||||||
"jsonpath-plus": "bin/jsonpath-cli.js"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=18.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/jsonwebtoken": {
|
"node_modules/jsonwebtoken": {
|
||||||
"version": "9.0.2",
|
"version": "9.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
|
||||||
@@ -3248,11 +3013,6 @@
|
|||||||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
||||||
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
|
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
|
||||||
},
|
},
|
||||||
"node_modules/lodash.clonedeep": {
|
|
||||||
"version": "4.5.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
|
|
||||||
"integrity": "sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ=="
|
|
||||||
},
|
|
||||||
"node_modules/lodash.defaults": {
|
"node_modules/lodash.defaults": {
|
||||||
"version": "4.2.0",
|
"version": "4.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
||||||
@@ -3702,14 +3462,6 @@
|
|||||||
"url": "https://github.com/fb55/nth-check?sponsor=1"
|
"url": "https://github.com/fb55/nth-check?sponsor=1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/oauth4webapi": {
|
|
||||||
"version": "3.8.3",
|
|
||||||
"resolved": "https://registry.npmjs.org/oauth4webapi/-/oauth4webapi-3.8.3.tgz",
|
|
||||||
"integrity": "sha512-pQ5BsX3QRTgnt5HxgHwgunIRaDXBdkT23tf8dfzmtTIL2LTpdmxgbpbBm0VgFWAIDlezQvQCTgnVIUmHupXHxw==",
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/panva"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/object-assign": {
|
"node_modules/object-assign": {
|
||||||
"version": "4.1.1",
|
"version": "4.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
||||||
@@ -3748,18 +3500,6 @@
|
|||||||
"wrappy": "1"
|
"wrappy": "1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/openid-client": {
|
|
||||||
"version": "6.8.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.1.tgz",
|
|
||||||
"integrity": "sha512-VoYT6enBo6Vj2j3Q5Ec0AezS+9YGzQo1f5Xc42lreMGlfP4ljiXPKVDvCADh+XHCV/bqPu/wWSiCVXbJKvrODw==",
|
|
||||||
"dependencies": {
|
|
||||||
"jose": "^6.1.0",
|
|
||||||
"oauth4webapi": "^3.8.2"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/panva"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/pac-proxy-agent": {
|
"node_modules/pac-proxy-agent": {
|
||||||
"version": "7.2.0",
|
"version": "7.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
|
||||||
@@ -4676,11 +4416,6 @@
|
|||||||
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/rfc4648": {
|
|
||||||
"version": "1.5.4",
|
|
||||||
"resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.4.tgz",
|
|
||||||
"integrity": "sha512-rRg/6Lb+IGfJqO05HZkN50UtY7K/JhxJag1kP23+zyMfrvoB0B7RWv06MbOzoc79RgCdNTiUaNsTT1AJZ7Z+cg=="
|
|
||||||
},
|
|
||||||
"node_modules/rimraf": {
|
"node_modules/rimraf": {
|
||||||
"version": "3.0.2",
|
"version": "3.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
|
||||||
@@ -5111,14 +4846,6 @@
|
|||||||
"node": ">= 0.8"
|
"node": ">= 0.8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/stream-buffers": {
|
|
||||||
"version": "3.0.3",
|
|
||||||
"resolved": "https://registry.npmjs.org/stream-buffers/-/stream-buffers-3.0.3.tgz",
|
|
||||||
"integrity": "sha512-pqMqwQCso0PBJt2PQmDO0cFj0lyqmiwOMiMSkVtRokl7e+ZTRYgDHKnuZNbqjiJXgsg4nuqtD/zxuo9KqTp0Yw==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">= 0.10.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/streamx": {
|
"node_modules/streamx": {
|
||||||
"version": "2.23.0",
|
"version": "2.23.0",
|
||||||
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
|
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
|
||||||
@@ -5338,7 +5065,8 @@
|
|||||||
"node_modules/undici-types": {
|
"node_modules/undici-types": {
|
||||||
"version": "6.21.0",
|
"version": "6.21.0",
|
||||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
||||||
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="
|
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
|
||||||
|
"devOptional": true
|
||||||
},
|
},
|
||||||
"node_modules/universalify": {
|
"node_modules/universalify": {
|
||||||
"version": "2.0.1",
|
"version": "2.0.1",
|
||||||
@@ -5361,14 +5089,6 @@
|
|||||||
"resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
|
||||||
"integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
|
"integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
|
||||||
},
|
},
|
||||||
"node_modules/user-agents": {
|
|
||||||
"version": "1.1.669",
|
|
||||||
"resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.1.669.tgz",
|
|
||||||
"integrity": "sha512-pbIzG+AOqCaIpySKJ4IAm1l0VyE4jMnK4y1thV8lm8PYxI+7X5uWcppOK7zY79TCKKTAnJH3/4gaVIZHsjrmJA==",
|
|
||||||
"dependencies": {
|
|
||||||
"lodash.clonedeep": "^4.5.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/util": {
|
"node_modules/util": {
|
||||||
"version": "0.12.5",
|
"version": "0.12.5",
|
||||||
"resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
|
"resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "dutchie-menus-backend",
|
"name": "dutchie-menus-backend",
|
||||||
"version": "1.6.0",
|
"version": "1.5.1",
|
||||||
"description": "Backend API for Dutchie Menus scraper and management",
|
"description": "Backend API for Dutchie Menus scraper and management",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
@@ -22,7 +22,6 @@
|
|||||||
"seed:dt:cities:bulk": "tsx src/scripts/seed-dt-cities-bulk.ts"
|
"seed:dt:cities:bulk": "tsx src/scripts/seed-dt-cities-bulk.ts"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@kubernetes/client-node": "^1.4.0",
|
|
||||||
"@types/bcryptjs": "^3.0.0",
|
"@types/bcryptjs": "^3.0.0",
|
||||||
"axios": "^1.6.2",
|
"axios": "^1.6.2",
|
||||||
"bcrypt": "^5.1.1",
|
"bcrypt": "^5.1.1",
|
||||||
@@ -49,7 +48,6 @@
|
|||||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||||
"sharp": "^0.32.0",
|
"sharp": "^0.32.0",
|
||||||
"socks-proxy-agent": "^8.0.2",
|
"socks-proxy-agent": "^8.0.2",
|
||||||
"user-agents": "^1.1.669",
|
|
||||||
"uuid": "^9.0.1",
|
"uuid": "^9.0.1",
|
||||||
"zod": "^3.22.4"
|
"zod": "^3.22.4"
|
||||||
},
|
},
|
||||||
|
|||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
cannaiq-menus-1.6.0.zip
|
|
||||||
@@ -1,46 +0,0 @@
|
|||||||
# DEPRECATED CODE - DO NOT USE
|
|
||||||
|
|
||||||
**These directories contain OLD, ABANDONED code.**
|
|
||||||
|
|
||||||
## What's Here
|
|
||||||
|
|
||||||
| Directory | What It Was | Why Deprecated |
|
|
||||||
|-----------|-------------|----------------|
|
|
||||||
| `hydration/` | Old pipeline for processing crawl data | Replaced by `src/tasks/handlers/` |
|
|
||||||
| `scraper-v2/` | Old Puppeteer-based scraper engine | Replaced by curl-based `src/platforms/dutchie/client.ts` |
|
|
||||||
| `canonical-hydration/` | Intermediate step toward canonical schema | Merged into task handlers |
|
|
||||||
|
|
||||||
## What to Use Instead
|
|
||||||
|
|
||||||
| Old (DONT USE) | New (USE THIS) |
|
|
||||||
|----------------|----------------|
|
|
||||||
| `hydration/normalizers/dutchie.ts` | `src/tasks/handlers/product-refresh.ts` |
|
|
||||||
| `hydration/producer.ts` | `src/tasks/handlers/payload-fetch.ts` |
|
|
||||||
| `scraper-v2/engine.ts` | `src/platforms/dutchie/client.ts` |
|
|
||||||
| `scraper-v2/scheduler.ts` | `src/services/task-scheduler.ts` |
|
|
||||||
|
|
||||||
## Why Keep This Code?
|
|
||||||
|
|
||||||
- Historical reference only
|
|
||||||
- Some patterns may be useful for debugging
|
|
||||||
- Will be deleted once confirmed not needed
|
|
||||||
|
|
||||||
## Claude Instructions
|
|
||||||
|
|
||||||
**IF YOU ARE CLAUDE:**
|
|
||||||
|
|
||||||
1. NEVER import from `src/_deprecated/`
|
|
||||||
2. NEVER reference these files as examples
|
|
||||||
3. NEVER try to "fix" or "update" code in here
|
|
||||||
4. If you see imports from these directories, suggest replacing them
|
|
||||||
|
|
||||||
**Correct imports:**
|
|
||||||
```typescript
|
|
||||||
// GOOD
|
|
||||||
import { executeGraphQL } from '../platforms/dutchie/client';
|
|
||||||
import { pool } from '../db/pool';
|
|
||||||
|
|
||||||
// BAD - DO NOT USE
|
|
||||||
import { something } from '../_deprecated/hydration/...';
|
|
||||||
import { something } from '../_deprecated/scraper-v2/...';
|
|
||||||
```
|
|
||||||
@@ -1,584 +0,0 @@
|
|||||||
/**
|
|
||||||
* System API Routes
|
|
||||||
*
|
|
||||||
* Provides REST API endpoints for system monitoring and control:
|
|
||||||
* - /api/system/sync/* - Sync orchestrator
|
|
||||||
* - /api/system/dlq/* - Dead-letter queue
|
|
||||||
* - /api/system/integrity/* - Integrity checks
|
|
||||||
* - /api/system/fix/* - Auto-fix routines
|
|
||||||
* - /api/system/alerts/* - System alerts
|
|
||||||
* - /metrics - Prometheus metrics
|
|
||||||
*
|
|
||||||
* Phase 5: Full Production Sync + Monitoring
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { Router, Request, Response } from 'express';
|
|
||||||
import { Pool } from 'pg';
|
|
||||||
import {
|
|
||||||
SyncOrchestrator,
|
|
||||||
MetricsService,
|
|
||||||
DLQService,
|
|
||||||
AlertService,
|
|
||||||
IntegrityService,
|
|
||||||
AutoFixService,
|
|
||||||
} from '../services';
|
|
||||||
|
|
||||||
export function createSystemRouter(pool: Pool): Router {
|
|
||||||
const router = Router();
|
|
||||||
|
|
||||||
// Initialize services
|
|
||||||
const metrics = new MetricsService(pool);
|
|
||||||
const dlq = new DLQService(pool);
|
|
||||||
const alerts = new AlertService(pool);
|
|
||||||
const integrity = new IntegrityService(pool, alerts);
|
|
||||||
const autoFix = new AutoFixService(pool, alerts);
|
|
||||||
const orchestrator = new SyncOrchestrator(pool, metrics, dlq, alerts);
|
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// SYNC ORCHESTRATOR ENDPOINTS
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/sync/status
|
|
||||||
* Get current sync status
|
|
||||||
*/
|
|
||||||
router.get('/sync/status', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const status = await orchestrator.getStatus();
|
|
||||||
res.json(status);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Sync status error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get sync status' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/sync/run
|
|
||||||
* Trigger a sync run
|
|
||||||
*/
|
|
||||||
router.post('/sync/run', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const triggeredBy = req.body.triggeredBy || 'api';
|
|
||||||
const result = await orchestrator.runSync();
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
triggeredBy,
|
|
||||||
metrics: result,
|
|
||||||
});
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Sync run error:', error);
|
|
||||||
res.status(500).json({
|
|
||||||
success: false,
|
|
||||||
error: error instanceof Error ? error.message : 'Sync run failed',
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/sync/queue-depth
|
|
||||||
* Get queue depth information
|
|
||||||
*/
|
|
||||||
router.get('/sync/queue-depth', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const depth = await orchestrator.getQueueDepth();
|
|
||||||
res.json(depth);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Queue depth error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get queue depth' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/sync/health
|
|
||||||
* Get sync health status
|
|
||||||
*/
|
|
||||||
router.get('/sync/health', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const health = await orchestrator.getHealth();
|
|
||||||
res.status(health.healthy ? 200 : 503).json(health);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Health check error:', error);
|
|
||||||
res.status(500).json({ healthy: false, error: 'Health check failed' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/sync/pause
|
|
||||||
* Pause the orchestrator
|
|
||||||
*/
|
|
||||||
router.post('/sync/pause', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const reason = req.body.reason || 'Manual pause';
|
|
||||||
await orchestrator.pause(reason);
|
|
||||||
res.json({ success: true, message: 'Orchestrator paused' });
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Pause error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to pause orchestrator' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/sync/resume
|
|
||||||
* Resume the orchestrator
|
|
||||||
*/
|
|
||||||
router.post('/sync/resume', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
await orchestrator.resume();
|
|
||||||
res.json({ success: true, message: 'Orchestrator resumed' });
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Resume error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to resume orchestrator' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// DLQ ENDPOINTS
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/dlq
|
|
||||||
* List DLQ payloads
|
|
||||||
*/
|
|
||||||
router.get('/dlq', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const options = {
|
|
||||||
status: req.query.status as string,
|
|
||||||
errorType: req.query.errorType as string,
|
|
||||||
dispensaryId: req.query.dispensaryId ? parseInt(req.query.dispensaryId as string) : undefined,
|
|
||||||
limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
|
|
||||||
offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
const result = await dlq.listPayloads(options);
|
|
||||||
res.json(result);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] DLQ list error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to list DLQ payloads' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/dlq/stats
|
|
||||||
* Get DLQ statistics
|
|
||||||
*/
|
|
||||||
router.get('/dlq/stats', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const stats = await dlq.getStats();
|
|
||||||
res.json(stats);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] DLQ stats error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get DLQ stats' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/dlq/summary
|
|
||||||
* Get DLQ summary by error type
|
|
||||||
*/
|
|
||||||
router.get('/dlq/summary', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const summary = await dlq.getSummary();
|
|
||||||
res.json(summary);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] DLQ summary error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get DLQ summary' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/dlq/:id
|
|
||||||
* Get a specific DLQ payload
|
|
||||||
*/
|
|
||||||
router.get('/dlq/:id', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const payload = await dlq.getPayload(req.params.id);
|
|
||||||
if (!payload) {
|
|
||||||
return res.status(404).json({ error: 'Payload not found' });
|
|
||||||
}
|
|
||||||
res.json(payload);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] DLQ get error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get DLQ payload' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/dlq/:id/retry
|
|
||||||
* Retry a DLQ payload
|
|
||||||
*/
|
|
||||||
router.post('/dlq/:id/retry', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const result = await dlq.retryPayload(req.params.id);
|
|
||||||
if (result.success) {
|
|
||||||
res.json(result);
|
|
||||||
} else {
|
|
||||||
res.status(400).json(result);
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] DLQ retry error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to retry payload' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/dlq/:id/abandon
|
|
||||||
* Abandon a DLQ payload
|
|
||||||
*/
|
|
||||||
router.post('/dlq/:id/abandon', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const reason = req.body.reason || 'Manually abandoned';
|
|
||||||
const abandonedBy = req.body.abandonedBy || 'api';
|
|
||||||
const success = await dlq.abandonPayload(req.params.id, reason, abandonedBy);
|
|
||||||
res.json({ success });
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] DLQ abandon error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to abandon payload' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/dlq/bulk-retry
|
|
||||||
* Bulk retry payloads by error type
|
|
||||||
*/
|
|
||||||
router.post('/dlq/bulk-retry', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const { errorType } = req.body;
|
|
||||||
if (!errorType) {
|
|
||||||
return res.status(400).json({ error: 'errorType is required' });
|
|
||||||
}
|
|
||||||
const result = await dlq.bulkRetryByErrorType(errorType);
|
|
||||||
res.json(result);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] DLQ bulk retry error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to bulk retry' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// INTEGRITY CHECK ENDPOINTS
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/integrity/run
|
|
||||||
* Run all integrity checks
|
|
||||||
*/
|
|
||||||
router.post('/integrity/run', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const triggeredBy = req.body.triggeredBy || 'api';
|
|
||||||
const result = await integrity.runAllChecks(triggeredBy);
|
|
||||||
res.json(result);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Integrity run error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to run integrity checks' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/integrity/runs
|
|
||||||
* Get recent integrity check runs
|
|
||||||
*/
|
|
||||||
router.get('/integrity/runs', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const limit = req.query.limit ? parseInt(req.query.limit as string) : 10;
|
|
||||||
const runs = await integrity.getRecentRuns(limit);
|
|
||||||
res.json(runs);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Integrity runs error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get integrity runs' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/integrity/runs/:runId
|
|
||||||
* Get results for a specific integrity run
|
|
||||||
*/
|
|
||||||
router.get('/integrity/runs/:runId', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const results = await integrity.getRunResults(req.params.runId);
|
|
||||||
res.json(results);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Integrity run results error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get run results' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// AUTO-FIX ENDPOINTS
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/fix/routines
|
|
||||||
* Get available fix routines
|
|
||||||
*/
|
|
||||||
router.get('/fix/routines', (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const routines = autoFix.getAvailableRoutines();
|
|
||||||
res.json(routines);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Get routines error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get routines' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/fix/:routine
|
|
||||||
* Run a fix routine
|
|
||||||
*/
|
|
||||||
router.post('/fix/:routine', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const routineName = req.params.routine;
|
|
||||||
const dryRun = req.body.dryRun === true;
|
|
||||||
const triggeredBy = req.body.triggeredBy || 'api';
|
|
||||||
|
|
||||||
const result = await autoFix.runRoutine(routineName as any, triggeredBy, { dryRun });
|
|
||||||
res.json(result);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Fix routine error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to run fix routine' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/fix/runs
|
|
||||||
* Get recent fix runs
|
|
||||||
*/
|
|
||||||
router.get('/fix/runs', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const limit = req.query.limit ? parseInt(req.query.limit as string) : 20;
|
|
||||||
const runs = await autoFix.getRecentRuns(limit);
|
|
||||||
res.json(runs);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Fix runs error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get fix runs' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// ALERTS ENDPOINTS
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/alerts
|
|
||||||
* List alerts
|
|
||||||
*/
|
|
||||||
router.get('/alerts', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const options = {
|
|
||||||
status: req.query.status as any,
|
|
||||||
severity: req.query.severity as any,
|
|
||||||
type: req.query.type as string,
|
|
||||||
limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
|
|
||||||
offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
const result = await alerts.listAlerts(options);
|
|
||||||
res.json(result);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Alerts list error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to list alerts' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/alerts/active
|
|
||||||
* Get active alerts
|
|
||||||
*/
|
|
||||||
router.get('/alerts/active', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const activeAlerts = await alerts.getActiveAlerts();
|
|
||||||
res.json(activeAlerts);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Active alerts error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get active alerts' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/alerts/summary
|
|
||||||
* Get alert summary
|
|
||||||
*/
|
|
||||||
router.get('/alerts/summary', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const summary = await alerts.getSummary();
|
|
||||||
res.json(summary);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Alerts summary error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get alerts summary' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/alerts/:id/acknowledge
|
|
||||||
* Acknowledge an alert
|
|
||||||
*/
|
|
||||||
router.post('/alerts/:id/acknowledge', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const alertId = parseInt(req.params.id);
|
|
||||||
const acknowledgedBy = req.body.acknowledgedBy || 'api';
|
|
||||||
const success = await alerts.acknowledgeAlert(alertId, acknowledgedBy);
|
|
||||||
res.json({ success });
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Acknowledge alert error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to acknowledge alert' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/alerts/:id/resolve
|
|
||||||
* Resolve an alert
|
|
||||||
*/
|
|
||||||
router.post('/alerts/:id/resolve', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const alertId = parseInt(req.params.id);
|
|
||||||
const resolvedBy = req.body.resolvedBy || 'api';
|
|
||||||
const success = await alerts.resolveAlert(alertId, resolvedBy);
|
|
||||||
res.json({ success });
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Resolve alert error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to resolve alert' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/alerts/bulk-acknowledge
|
|
||||||
* Bulk acknowledge alerts
|
|
||||||
*/
|
|
||||||
router.post('/alerts/bulk-acknowledge', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const { ids, acknowledgedBy } = req.body;
|
|
||||||
if (!ids || !Array.isArray(ids)) {
|
|
||||||
return res.status(400).json({ error: 'ids array is required' });
|
|
||||||
}
|
|
||||||
const count = await alerts.bulkAcknowledge(ids, acknowledgedBy || 'api');
|
|
||||||
res.json({ acknowledged: count });
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Bulk acknowledge error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to bulk acknowledge' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// METRICS ENDPOINTS
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/metrics
|
|
||||||
* Get all current metrics
|
|
||||||
*/
|
|
||||||
router.get('/metrics', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const allMetrics = await metrics.getAllMetrics();
|
|
||||||
res.json(allMetrics);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Metrics error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get metrics' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/metrics/:name
|
|
||||||
* Get a specific metric
|
|
||||||
*/
|
|
||||||
router.get('/metrics/:name', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const metric = await metrics.getMetric(req.params.name);
|
|
||||||
if (!metric) {
|
|
||||||
return res.status(404).json({ error: 'Metric not found' });
|
|
||||||
}
|
|
||||||
res.json(metric);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Metric error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get metric' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/metrics/:name/history
|
|
||||||
* Get metric time series
|
|
||||||
*/
|
|
||||||
router.get('/metrics/:name/history', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const hours = req.query.hours ? parseInt(req.query.hours as string) : 24;
|
|
||||||
const history = await metrics.getMetricHistory(req.params.name, hours);
|
|
||||||
res.json(history);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Metric history error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get metric history' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/errors
|
|
||||||
* Get error summary
|
|
||||||
*/
|
|
||||||
router.get('/errors', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const summary = await metrics.getErrorSummary();
|
|
||||||
res.json(summary);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Error summary error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get error summary' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/system/errors/recent
|
|
||||||
* Get recent errors
|
|
||||||
*/
|
|
||||||
router.get('/errors/recent', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const limit = req.query.limit ? parseInt(req.query.limit as string) : 50;
|
|
||||||
const errorType = req.query.type as string;
|
|
||||||
const errors = await metrics.getRecentErrors(limit, errorType);
|
|
||||||
res.json(errors);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Recent errors error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to get recent errors' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/system/errors/acknowledge
|
|
||||||
* Acknowledge errors
|
|
||||||
*/
|
|
||||||
router.post('/errors/acknowledge', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const { ids, acknowledgedBy } = req.body;
|
|
||||||
if (!ids || !Array.isArray(ids)) {
|
|
||||||
return res.status(400).json({ error: 'ids array is required' });
|
|
||||||
}
|
|
||||||
const count = await metrics.acknowledgeErrors(ids, acknowledgedBy || 'api');
|
|
||||||
res.json({ acknowledged: count });
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[System] Acknowledge errors error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to acknowledge errors' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
return router;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create Prometheus metrics endpoint (standalone)
|
|
||||||
*/
|
|
||||||
export function createPrometheusRouter(pool: Pool): Router {
|
|
||||||
const router = Router();
|
|
||||||
const metrics = new MetricsService(pool);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /metrics
|
|
||||||
* Prometheus-compatible metrics endpoint
|
|
||||||
*/
|
|
||||||
router.get('/', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const prometheusOutput = await metrics.getPrometheusMetrics();
|
|
||||||
res.set('Content-Type', 'text/plain; version=0.0.4');
|
|
||||||
res.send(prometheusOutput);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[Prometheus] Metrics error:', error);
|
|
||||||
res.status(500).send('# Error generating metrics');
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
return router;
|
|
||||||
}
|
|
||||||
@@ -32,7 +32,6 @@ const TRUSTED_ORIGINS = [
|
|||||||
// Pattern-based trusted origins (wildcards)
|
// Pattern-based trusted origins (wildcards)
|
||||||
const TRUSTED_ORIGIN_PATTERNS = [
|
const TRUSTED_ORIGIN_PATTERNS = [
|
||||||
/^https:\/\/.*\.cannabrands\.app$/, // *.cannabrands.app
|
/^https:\/\/.*\.cannabrands\.app$/, // *.cannabrands.app
|
||||||
/^https:\/\/.*\.cannaiq\.co$/, // *.cannaiq.co
|
|
||||||
];
|
];
|
||||||
|
|
||||||
// Trusted IPs for internal pod-to-pod communication
|
// Trusted IPs for internal pod-to-pod communication
|
||||||
@@ -153,53 +152,7 @@ export async function authenticateUser(email: string, password: string): Promise
|
|||||||
}
|
}
|
||||||
|
|
||||||
export async function authMiddleware(req: AuthRequest, res: Response, next: NextFunction) {
|
export async function authMiddleware(req: AuthRequest, res: Response, next: NextFunction) {
|
||||||
const authHeader = req.headers.authorization;
|
// Allow trusted origins/IPs to bypass auth (internal services, same-origin)
|
||||||
|
|
||||||
// If a Bearer token is provided, always try to use it first (logged-in user)
|
|
||||||
if (authHeader && authHeader.startsWith('Bearer ')) {
|
|
||||||
const token = authHeader.substring(7);
|
|
||||||
|
|
||||||
// Try JWT first
|
|
||||||
const jwtUser = verifyToken(token);
|
|
||||||
|
|
||||||
if (jwtUser) {
|
|
||||||
req.user = jwtUser;
|
|
||||||
return next();
|
|
||||||
}
|
|
||||||
|
|
||||||
// If JWT fails, try API token
|
|
||||||
try {
|
|
||||||
const result = await pool.query(`
|
|
||||||
SELECT id, name, rate_limit, active, expires_at, allowed_endpoints
|
|
||||||
FROM api_tokens
|
|
||||||
WHERE token = $1
|
|
||||||
`, [token]);
|
|
||||||
|
|
||||||
if (result.rows.length > 0) {
|
|
||||||
const apiToken = result.rows[0];
|
|
||||||
if (!apiToken.active) {
|
|
||||||
return res.status(401).json({ error: 'API token is inactive' });
|
|
||||||
}
|
|
||||||
if (apiToken.expires_at && new Date(apiToken.expires_at) < new Date()) {
|
|
||||||
return res.status(401).json({ error: 'API token has expired' });
|
|
||||||
}
|
|
||||||
req.user = {
|
|
||||||
id: 0,
|
|
||||||
email: `api:${apiToken.name}`,
|
|
||||||
role: 'api_token'
|
|
||||||
};
|
|
||||||
req.apiToken = apiToken;
|
|
||||||
return next();
|
|
||||||
}
|
|
||||||
} catch (err) {
|
|
||||||
console.error('API token lookup error:', err);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Token provided but invalid
|
|
||||||
return res.status(401).json({ error: 'Invalid token' });
|
|
||||||
}
|
|
||||||
|
|
||||||
// No token provided - check trusted origins for API access (WordPress, etc.)
|
|
||||||
if (isTrustedRequest(req)) {
|
if (isTrustedRequest(req)) {
|
||||||
req.user = {
|
req.user = {
|
||||||
id: 0,
|
id: 0,
|
||||||
@@ -209,9 +162,79 @@ export async function authMiddleware(req: AuthRequest, res: Response, next: Next
|
|||||||
return next();
|
return next();
|
||||||
}
|
}
|
||||||
|
|
||||||
return res.status(401).json({ error: 'No token provided' });
|
const authHeader = req.headers.authorization;
|
||||||
}
|
|
||||||
|
|
||||||
|
if (!authHeader || !authHeader.startsWith('Bearer ')) {
|
||||||
|
return res.status(401).json({ error: 'No token provided' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const token = authHeader.substring(7);
|
||||||
|
|
||||||
|
// Try JWT first
|
||||||
|
const jwtUser = verifyToken(token);
|
||||||
|
|
||||||
|
if (jwtUser) {
|
||||||
|
req.user = jwtUser;
|
||||||
|
return next();
|
||||||
|
}
|
||||||
|
|
||||||
|
// If JWT fails, try API token
|
||||||
|
try {
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT id, name, rate_limit, active, expires_at, allowed_endpoints
|
||||||
|
FROM api_tokens
|
||||||
|
WHERE token = $1
|
||||||
|
`, [token]);
|
||||||
|
|
||||||
|
if (result.rows.length === 0) {
|
||||||
|
return res.status(401).json({ error: 'Invalid token' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const apiToken = result.rows[0];
|
||||||
|
|
||||||
|
// Check if token is active
|
||||||
|
if (!apiToken.active) {
|
||||||
|
return res.status(401).json({ error: 'Token is disabled' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if token is expired
|
||||||
|
if (apiToken.expires_at && new Date(apiToken.expires_at) < new Date()) {
|
||||||
|
return res.status(401).json({ error: 'Token has expired' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check allowed endpoints
|
||||||
|
if (apiToken.allowed_endpoints && apiToken.allowed_endpoints.length > 0) {
|
||||||
|
const isAllowed = apiToken.allowed_endpoints.some((pattern: string) => {
|
||||||
|
// Simple wildcard matching
|
||||||
|
const regex = new RegExp('^' + pattern.replace('*', '.*') + '$');
|
||||||
|
return regex.test(req.path);
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!isAllowed) {
|
||||||
|
return res.status(403).json({ error: 'Endpoint not allowed for this token' });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set API token on request for tracking
|
||||||
|
req.apiToken = {
|
||||||
|
id: apiToken.id,
|
||||||
|
name: apiToken.name,
|
||||||
|
rate_limit: apiToken.rate_limit
|
||||||
|
};
|
||||||
|
|
||||||
|
// Set a generic user for compatibility with existing code
|
||||||
|
req.user = {
|
||||||
|
id: apiToken.id,
|
||||||
|
email: `api-token-${apiToken.id}@system`,
|
||||||
|
role: 'api'
|
||||||
|
};
|
||||||
|
|
||||||
|
next();
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error verifying API token:', error);
|
||||||
|
return res.status(500).json({ error: 'Authentication failed' });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Require specific role(s) to access endpoint.
|
* Require specific role(s) to access endpoint.
|
||||||
|
|||||||
@@ -172,9 +172,6 @@ export async function runFullDiscovery(
|
|||||||
console.log(`Errors: ${totalErrors}`);
|
console.log(`Errors: ${totalErrors}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
|
|
||||||
let newDispensaryIds: number[] = [];
|
|
||||||
|
|
||||||
// Step 4: Auto-validate and promote discovered locations
|
// Step 4: Auto-validate and promote discovered locations
|
||||||
if (!dryRun && totalLocationsUpserted > 0) {
|
if (!dryRun && totalLocationsUpserted > 0) {
|
||||||
console.log('\n[Discovery] Step 4: Auto-promoting discovered locations...');
|
console.log('\n[Discovery] Step 4: Auto-promoting discovered locations...');
|
||||||
@@ -183,13 +180,6 @@ export async function runFullDiscovery(
|
|||||||
console.log(` Created: ${promotionResult.created} new dispensaries`);
|
console.log(` Created: ${promotionResult.created} new dispensaries`);
|
||||||
console.log(` Updated: ${promotionResult.updated} existing dispensaries`);
|
console.log(` Updated: ${promotionResult.updated} existing dispensaries`);
|
||||||
console.log(` Rejected: ${promotionResult.rejected} (validation failed)`);
|
console.log(` Rejected: ${promotionResult.rejected} (validation failed)`);
|
||||||
|
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: Capture new IDs for task chaining
|
|
||||||
newDispensaryIds = promotionResult.newDispensaryIds;
|
|
||||||
if (newDispensaryIds.length > 0) {
|
|
||||||
console.log(` New store IDs for crawl: [${newDispensaryIds.join(', ')}]`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (promotionResult.rejectedRecords.length > 0) {
|
if (promotionResult.rejectedRecords.length > 0) {
|
||||||
console.log(` Rejection reasons:`);
|
console.log(` Rejection reasons:`);
|
||||||
promotionResult.rejectedRecords.slice(0, 5).forEach(r => {
|
promotionResult.rejectedRecords.slice(0, 5).forEach(r => {
|
||||||
@@ -224,8 +214,6 @@ export async function runFullDiscovery(
|
|||||||
totalLocationsFound,
|
totalLocationsFound,
|
||||||
totalLocationsUpserted,
|
totalLocationsUpserted,
|
||||||
durationMs,
|
durationMs,
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: Return new IDs for task chaining
|
|
||||||
newDispensaryIds,
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -127,8 +127,6 @@ export interface PromotionSummary {
|
|||||||
errors: string[];
|
errors: string[];
|
||||||
}>;
|
}>;
|
||||||
durationMs: number;
|
durationMs: number;
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
|
|
||||||
newDispensaryIds: number[];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -471,8 +469,6 @@ export async function promoteDiscoveredLocations(
|
|||||||
|
|
||||||
const results: PromotionResult[] = [];
|
const results: PromotionResult[] = [];
|
||||||
const rejectedRecords: PromotionSummary['rejectedRecords'] = [];
|
const rejectedRecords: PromotionSummary['rejectedRecords'] = [];
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
|
|
||||||
const newDispensaryIds: number[] = [];
|
|
||||||
let created = 0;
|
let created = 0;
|
||||||
let updated = 0;
|
let updated = 0;
|
||||||
let skipped = 0;
|
let skipped = 0;
|
||||||
@@ -529,8 +525,6 @@ export async function promoteDiscoveredLocations(
|
|||||||
|
|
||||||
if (promotionResult.action === 'created') {
|
if (promotionResult.action === 'created') {
|
||||||
created++;
|
created++;
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: Track new IDs for task chaining
|
|
||||||
newDispensaryIds.push(promotionResult.dispensaryId);
|
|
||||||
} else {
|
} else {
|
||||||
updated++;
|
updated++;
|
||||||
}
|
}
|
||||||
@@ -554,8 +548,6 @@ export async function promoteDiscoveredLocations(
|
|||||||
results,
|
results,
|
||||||
rejectedRecords,
|
rejectedRecords,
|
||||||
durationMs: Date.now() - startTime,
|
durationMs: Date.now() - startTime,
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: Return new IDs for task chaining
|
|
||||||
newDispensaryIds,
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -211,8 +211,6 @@ export interface FullDiscoveryResult {
|
|||||||
totalLocationsFound: number;
|
totalLocationsFound: number;
|
||||||
totalLocationsUpserted: number;
|
totalLocationsUpserted: number;
|
||||||
durationMs: number;
|
durationMs: number;
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
|
|
||||||
newDispensaryIds?: number[];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|||||||
@@ -90,7 +90,7 @@ export async function upsertStoreProducts(
|
|||||||
name_raw, brand_name_raw, category_raw, subcategory_raw,
|
name_raw, brand_name_raw, category_raw, subcategory_raw,
|
||||||
price_rec, price_med, price_rec_special, price_med_special,
|
price_rec, price_med, price_rec_special, price_med_special,
|
||||||
is_on_special, discount_percent,
|
is_on_special, discount_percent,
|
||||||
is_in_stock, stock_status, stock_quantity, total_quantity_available,
|
is_in_stock, stock_status,
|
||||||
thc_percent, cbd_percent,
|
thc_percent, cbd_percent,
|
||||||
image_url,
|
image_url,
|
||||||
first_seen_at, last_seen_at, updated_at
|
first_seen_at, last_seen_at, updated_at
|
||||||
@@ -99,9 +99,9 @@ export async function upsertStoreProducts(
|
|||||||
$5, $6, $7, $8,
|
$5, $6, $7, $8,
|
||||||
$9, $10, $11, $12,
|
$9, $10, $11, $12,
|
||||||
$13, $14,
|
$13, $14,
|
||||||
$15, $16, $17, $17,
|
$15, $16,
|
||||||
$18, $19,
|
$17, $18,
|
||||||
$20,
|
$19,
|
||||||
NOW(), NOW(), NOW()
|
NOW(), NOW(), NOW()
|
||||||
)
|
)
|
||||||
ON CONFLICT (dispensary_id, provider, provider_product_id)
|
ON CONFLICT (dispensary_id, provider, provider_product_id)
|
||||||
@@ -118,8 +118,6 @@ export async function upsertStoreProducts(
|
|||||||
discount_percent = EXCLUDED.discount_percent,
|
discount_percent = EXCLUDED.discount_percent,
|
||||||
is_in_stock = EXCLUDED.is_in_stock,
|
is_in_stock = EXCLUDED.is_in_stock,
|
||||||
stock_status = EXCLUDED.stock_status,
|
stock_status = EXCLUDED.stock_status,
|
||||||
stock_quantity = EXCLUDED.stock_quantity,
|
|
||||||
total_quantity_available = EXCLUDED.total_quantity_available,
|
|
||||||
thc_percent = EXCLUDED.thc_percent,
|
thc_percent = EXCLUDED.thc_percent,
|
||||||
cbd_percent = EXCLUDED.cbd_percent,
|
cbd_percent = EXCLUDED.cbd_percent,
|
||||||
image_url = EXCLUDED.image_url,
|
image_url = EXCLUDED.image_url,
|
||||||
@@ -143,7 +141,6 @@ export async function upsertStoreProducts(
|
|||||||
productPricing?.discountPercent,
|
productPricing?.discountPercent,
|
||||||
productAvailability?.inStock ?? true,
|
productAvailability?.inStock ?? true,
|
||||||
productAvailability?.stockStatus || 'unknown',
|
productAvailability?.stockStatus || 'unknown',
|
||||||
productAvailability?.quantity ?? null, // stock_quantity and total_quantity_available
|
|
||||||
// Clamp THC/CBD to valid percentage range (0-100) - some products report mg as %
|
// Clamp THC/CBD to valid percentage range (0-100) - some products report mg as %
|
||||||
product.thcPercent !== null && product.thcPercent <= 100 ? product.thcPercent : null,
|
product.thcPercent !== null && product.thcPercent <= 100 ? product.thcPercent : null,
|
||||||
product.cbdPercent !== null && product.cbdPercent <= 100 ? product.cbdPercent : null,
|
product.cbdPercent !== null && product.cbdPercent <= 100 ? product.cbdPercent : null,
|
||||||
|
|||||||
@@ -6,8 +6,6 @@ import { initializeMinio, isMinioEnabled } from './utils/minio';
|
|||||||
import { initializeImageStorage } from './utils/image-storage';
|
import { initializeImageStorage } from './utils/image-storage';
|
||||||
import { logger } from './services/logger';
|
import { logger } from './services/logger';
|
||||||
import { cleanupOrphanedJobs } from './services/proxyTestQueue';
|
import { cleanupOrphanedJobs } from './services/proxyTestQueue';
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: Database-driven task scheduler
|
|
||||||
import { taskScheduler } from './services/task-scheduler';
|
|
||||||
import { runAutoMigrations } from './db/auto-migrate';
|
import { runAutoMigrations } from './db/auto-migrate';
|
||||||
import { getPool } from './db/pool';
|
import { getPool } from './db/pool';
|
||||||
import healthRoutes from './routes/health';
|
import healthRoutes from './routes/health';
|
||||||
@@ -109,7 +107,7 @@ import scraperMonitorRoutes from './routes/scraper-monitor';
|
|||||||
import apiTokensRoutes from './routes/api-tokens';
|
import apiTokensRoutes from './routes/api-tokens';
|
||||||
import apiPermissionsRoutes from './routes/api-permissions';
|
import apiPermissionsRoutes from './routes/api-permissions';
|
||||||
import parallelScrapeRoutes from './routes/parallel-scrape';
|
import parallelScrapeRoutes from './routes/parallel-scrape';
|
||||||
// crawler-sandbox moved to _deprecated
|
import crawlerSandboxRoutes from './routes/crawler-sandbox';
|
||||||
import versionRoutes from './routes/version';
|
import versionRoutes from './routes/version';
|
||||||
import deployStatusRoutes from './routes/deploy-status';
|
import deployStatusRoutes from './routes/deploy-status';
|
||||||
import publicApiRoutes from './routes/public-api';
|
import publicApiRoutes from './routes/public-api';
|
||||||
@@ -131,6 +129,7 @@ import { createStatesRouter } from './routes/states';
|
|||||||
import { createAnalyticsV2Router } from './routes/analytics-v2';
|
import { createAnalyticsV2Router } from './routes/analytics-v2';
|
||||||
import { createDiscoveryRoutes } from './discovery';
|
import { createDiscoveryRoutes } from './discovery';
|
||||||
import pipelineRoutes from './routes/pipeline';
|
import pipelineRoutes from './routes/pipeline';
|
||||||
|
import { getPool } from './db/pool';
|
||||||
|
|
||||||
// Consumer API routes (findadispo.com, findagram.co)
|
// Consumer API routes (findadispo.com, findagram.co)
|
||||||
import consumerAuthRoutes from './routes/consumer-auth';
|
import consumerAuthRoutes from './routes/consumer-auth';
|
||||||
@@ -144,9 +143,6 @@ import seoRoutes from './routes/seo';
|
|||||||
import priceAnalyticsRoutes from './routes/price-analytics';
|
import priceAnalyticsRoutes from './routes/price-analytics';
|
||||||
import tasksRoutes from './routes/tasks';
|
import tasksRoutes from './routes/tasks';
|
||||||
import workerRegistryRoutes from './routes/worker-registry';
|
import workerRegistryRoutes from './routes/worker-registry';
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: Raw payload access API
|
|
||||||
import payloadsRoutes from './routes/payloads';
|
|
||||||
import k8sRoutes from './routes/k8s';
|
|
||||||
|
|
||||||
// Mark requests from trusted domains (cannaiq.co, findagram.co, findadispo.com)
|
// Mark requests from trusted domains (cannaiq.co, findagram.co, findadispo.com)
|
||||||
// These domains can access the API without authentication
|
// These domains can access the API without authentication
|
||||||
@@ -187,7 +183,7 @@ app.use('/api/scraper-monitor', scraperMonitorRoutes);
|
|||||||
app.use('/api/api-tokens', apiTokensRoutes);
|
app.use('/api/api-tokens', apiTokensRoutes);
|
||||||
app.use('/api/api-permissions', apiPermissionsRoutes);
|
app.use('/api/api-permissions', apiPermissionsRoutes);
|
||||||
app.use('/api/parallel-scrape', parallelScrapeRoutes);
|
app.use('/api/parallel-scrape', parallelScrapeRoutes);
|
||||||
// crawler-sandbox moved to _deprecated
|
app.use('/api/crawler-sandbox', crawlerSandboxRoutes);
|
||||||
app.use('/api/version', versionRoutes);
|
app.use('/api/version', versionRoutes);
|
||||||
app.use('/api/admin/deploy-status', deployStatusRoutes);
|
app.use('/api/admin/deploy-status', deployStatusRoutes);
|
||||||
console.log('[DeployStatus] Routes registered at /api/admin/deploy-status');
|
console.log('[DeployStatus] Routes registered at /api/admin/deploy-status');
|
||||||
@@ -227,14 +223,6 @@ console.log('[Tasks] Routes registered at /api/tasks');
|
|||||||
app.use('/api/worker-registry', workerRegistryRoutes);
|
app.use('/api/worker-registry', workerRegistryRoutes);
|
||||||
console.log('[WorkerRegistry] Routes registered at /api/worker-registry');
|
console.log('[WorkerRegistry] Routes registered at /api/worker-registry');
|
||||||
|
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: Raw payload access API
|
|
||||||
app.use('/api/payloads', payloadsRoutes);
|
|
||||||
console.log('[Payloads] Routes registered at /api/payloads');
|
|
||||||
|
|
||||||
// K8s control routes - worker scaling from admin UI
|
|
||||||
app.use('/api/k8s', k8sRoutes);
|
|
||||||
console.log('[K8s] Routes registered at /api/k8s');
|
|
||||||
|
|
||||||
// Phase 3: Analytics V2 - Enhanced analytics with rec/med state segmentation
|
// Phase 3: Analytics V2 - Enhanced analytics with rec/med state segmentation
|
||||||
try {
|
try {
|
||||||
const analyticsV2Router = createAnalyticsV2Router(getPool());
|
const analyticsV2Router = createAnalyticsV2Router(getPool());
|
||||||
@@ -339,17 +327,6 @@ async function startServer() {
|
|||||||
// Clean up any orphaned proxy test jobs from previous server runs
|
// Clean up any orphaned proxy test jobs from previous server runs
|
||||||
await cleanupOrphanedJobs();
|
await cleanupOrphanedJobs();
|
||||||
|
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: Start database-driven task scheduler
|
|
||||||
// This replaces node-cron - schedules are stored in DB and survive restarts
|
|
||||||
// Uses SELECT FOR UPDATE SKIP LOCKED for multi-replica safety
|
|
||||||
try {
|
|
||||||
await taskScheduler.start();
|
|
||||||
logger.info('system', 'Task scheduler started');
|
|
||||||
} catch (err: any) {
|
|
||||||
// Non-fatal - scheduler can recover on next poll
|
|
||||||
logger.warn('system', `Task scheduler startup warning: ${err.message}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
app.listen(PORT, () => {
|
app.listen(PORT, () => {
|
||||||
logger.info('system', `Server running on port ${PORT}`);
|
logger.info('system', `Server running on port ${PORT}`);
|
||||||
console.log(`🚀 Server running on port ${PORT}`);
|
console.log(`🚀 Server running on port ${PORT}`);
|
||||||
|
|||||||
@@ -5,8 +5,8 @@ import { Request, Response, NextFunction } from 'express';
|
|||||||
* These are our own frontends that should have unrestricted access.
|
* These are our own frontends that should have unrestricted access.
|
||||||
*/
|
*/
|
||||||
const TRUSTED_DOMAINS = [
|
const TRUSTED_DOMAINS = [
|
||||||
'*.cannaiq.co',
|
'cannaiq.co',
|
||||||
'*.cannabrands.app',
|
'www.cannaiq.co',
|
||||||
'findagram.co',
|
'findagram.co',
|
||||||
'www.findagram.co',
|
'www.findagram.co',
|
||||||
'findadispo.com',
|
'findadispo.com',
|
||||||
@@ -32,24 +32,6 @@ function extractDomain(header: string): string | null {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if a domain matches any trusted domain (supports *.domain.com wildcards)
|
|
||||||
*/
|
|
||||||
function isTrustedDomain(domain: string): boolean {
|
|
||||||
for (const trusted of TRUSTED_DOMAINS) {
|
|
||||||
if (trusted.startsWith('*.')) {
|
|
||||||
// Wildcard: *.example.com matches example.com and any subdomain
|
|
||||||
const baseDomain = trusted.slice(2);
|
|
||||||
if (domain === baseDomain || domain.endsWith('.' + baseDomain)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} else if (domain === trusted) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks if the request comes from a trusted domain
|
* Checks if the request comes from a trusted domain
|
||||||
*/
|
*/
|
||||||
@@ -60,7 +42,7 @@ function isRequestFromTrustedDomain(req: Request): boolean {
|
|||||||
// Check Origin header first (preferred for CORS requests)
|
// Check Origin header first (preferred for CORS requests)
|
||||||
if (origin) {
|
if (origin) {
|
||||||
const domain = extractDomain(origin);
|
const domain = extractDomain(origin);
|
||||||
if (domain && isTrustedDomain(domain)) {
|
if (domain && TRUSTED_DOMAINS.includes(domain)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -68,7 +50,7 @@ function isRequestFromTrustedDomain(req: Request): boolean {
|
|||||||
// Fallback to Referer header
|
// Fallback to Referer header
|
||||||
if (referer) {
|
if (referer) {
|
||||||
const domain = extractDomain(referer);
|
const domain = extractDomain(referer);
|
||||||
if (domain && isTrustedDomain(domain)) {
|
if (domain && TRUSTED_DOMAINS.includes(domain)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -702,10 +702,12 @@ export class StateQueryService {
|
|||||||
async getNationalSummary(): Promise<NationalSummary> {
|
async getNationalSummary(): Promise<NationalSummary> {
|
||||||
const stateMetrics = await this.getAllStateMetrics();
|
const stateMetrics = await this.getAllStateMetrics();
|
||||||
|
|
||||||
// Get all states count and aggregate metrics
|
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
COUNT(DISTINCT s.code) AS total_states,
|
COUNT(DISTINCT s.code) AS total_states,
|
||||||
|
COUNT(DISTINCT CASE WHEN EXISTS (
|
||||||
|
SELECT 1 FROM dispensaries d WHERE d.state = s.code AND d.menu_type IS NOT NULL
|
||||||
|
) THEN s.code END) AS active_states,
|
||||||
(SELECT COUNT(*) FROM dispensaries WHERE state IS NOT NULL) AS total_stores,
|
(SELECT COUNT(*) FROM dispensaries WHERE state IS NOT NULL) AS total_stores,
|
||||||
(SELECT COUNT(*) FROM store_products sp
|
(SELECT COUNT(*) FROM store_products sp
|
||||||
JOIN dispensaries d ON sp.dispensary_id = d.id
|
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||||
@@ -723,7 +725,7 @@ export class StateQueryService {
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
totalStates: parseInt(data.total_states),
|
totalStates: parseInt(data.total_states),
|
||||||
activeStates: parseInt(data.total_states), // Same as totalStates - all states shown
|
activeStates: parseInt(data.active_states),
|
||||||
totalStores: parseInt(data.total_stores),
|
totalStores: parseInt(data.total_stores),
|
||||||
totalProducts: parseInt(data.total_products),
|
totalProducts: parseInt(data.total_products),
|
||||||
totalBrands: parseInt(data.total_brands),
|
totalBrands: parseInt(data.total_brands),
|
||||||
|
|||||||
@@ -5,35 +5,22 @@
|
|||||||
*
|
*
|
||||||
* DO NOT MODIFY THIS FILE WITHOUT EXPLICIT AUTHORIZATION.
|
* DO NOT MODIFY THIS FILE WITHOUT EXPLICIT AUTHORIZATION.
|
||||||
*
|
*
|
||||||
* Updated: 2025-12-10 per workflow-12102025.md
|
* This is the canonical HTTP client for all Dutchie communication.
|
||||||
*
|
* All Dutchie workers (Alice, Bella, etc.) MUST use this client.
|
||||||
* KEY BEHAVIORS (per workflow-12102025.md):
|
|
||||||
* 1. startSession() gets identity from PROXY LOCATION, not task params
|
|
||||||
* 2. On 403: immediately get new IP + new fingerprint, then retry
|
|
||||||
* 3. After 3 consecutive 403s on same proxy → disable it (burned)
|
|
||||||
* 4. Language is always English (en-US)
|
|
||||||
*
|
*
|
||||||
* IMPLEMENTATION:
|
* IMPLEMENTATION:
|
||||||
* - Uses curl via child_process.execSync (bypasses TLS fingerprinting)
|
* - Uses curl via child_process.execSync (bypasses TLS fingerprinting)
|
||||||
* - NO Puppeteer, NO axios, NO fetch
|
* - NO Puppeteer, NO axios, NO fetch
|
||||||
* - Uses intoli/user-agents via CrawlRotator for realistic fingerprints
|
* - Fingerprint rotation on 403
|
||||||
* - Residential IP compatible
|
* - Residential IP compatible
|
||||||
*
|
*
|
||||||
* USAGE:
|
* USAGE:
|
||||||
* import { curlPost, curlGet, executeGraphQL, startSession } from '@dutchie/client';
|
* import { curlPost, curlGet, executeGraphQL } from '@dutchie/client';
|
||||||
*
|
*
|
||||||
* ============================================================
|
* ============================================================
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { execSync } from 'child_process';
|
import { execSync } from 'child_process';
|
||||||
import {
|
|
||||||
buildOrderedHeaders,
|
|
||||||
buildRefererFromMenuUrl,
|
|
||||||
getCurlBinary,
|
|
||||||
isCurlImpersonateAvailable,
|
|
||||||
HeaderContext,
|
|
||||||
BrowserType,
|
|
||||||
} from '../../services/http-fingerprint';
|
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// TYPES
|
// TYPES
|
||||||
@@ -45,8 +32,6 @@ export interface CurlResponse {
|
|||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Per workflow-12102025.md: fingerprint comes from CrawlRotator's BrowserFingerprint
|
|
||||||
// We keep a simplified interface here for header building
|
|
||||||
export interface Fingerprint {
|
export interface Fingerprint {
|
||||||
userAgent: string;
|
userAgent: string;
|
||||||
acceptLanguage: string;
|
acceptLanguage: string;
|
||||||
@@ -72,13 +57,15 @@ export const DUTCHIE_CONFIG = {
|
|||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// PROXY SUPPORT
|
// PROXY SUPPORT
|
||||||
// Per workflow-12102025.md:
|
// ============================================================
|
||||||
// - On 403: recordBlock() → increment consecutive_403_count
|
// Integrates with the CrawlRotator system from proxy-rotator.ts
|
||||||
// - After 3 consecutive 403s → proxy disabled
|
// On 403 errors:
|
||||||
// - Immediately rotate to new IP + new fingerprint on 403
|
// 1. Record failure on current proxy
|
||||||
|
// 2. Rotate to next proxy
|
||||||
|
// 3. Retry with new proxy
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
import type { CrawlRotator, BrowserFingerprint } from '../../services/crawl-rotator';
|
import type { CrawlRotator, Proxy } from '../../services/crawl-rotator';
|
||||||
|
|
||||||
let currentProxy: string | null = null;
|
let currentProxy: string | null = null;
|
||||||
let crawlRotator: CrawlRotator | null = null;
|
let crawlRotator: CrawlRotator | null = null;
|
||||||
@@ -105,12 +92,13 @@ export function getProxy(): string | null {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Set CrawlRotator for proxy rotation on 403s
|
* Set CrawlRotator for proxy rotation on 403s
|
||||||
* Per workflow-12102025.md: enables automatic rotation when blocked
|
* This enables automatic proxy rotation when blocked
|
||||||
*/
|
*/
|
||||||
export function setCrawlRotator(rotator: CrawlRotator | null): void {
|
export function setCrawlRotator(rotator: CrawlRotator | null): void {
|
||||||
crawlRotator = rotator;
|
crawlRotator = rotator;
|
||||||
if (rotator) {
|
if (rotator) {
|
||||||
console.log('[Dutchie Client] CrawlRotator attached - proxy rotation enabled');
|
console.log('[Dutchie Client] CrawlRotator attached - proxy rotation enabled');
|
||||||
|
// Set initial proxy from rotator
|
||||||
const proxy = rotator.proxy.getCurrent();
|
const proxy = rotator.proxy.getCurrent();
|
||||||
if (proxy) {
|
if (proxy) {
|
||||||
currentProxy = rotator.proxy.getProxyUrl(proxy);
|
currentProxy = rotator.proxy.getProxyUrl(proxy);
|
||||||
@@ -127,41 +115,30 @@ export function getCrawlRotator(): CrawlRotator | null {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handle 403 block - per workflow-12102025.md:
|
* Rotate to next proxy (called on 403)
|
||||||
* 1. Record block on current proxy (increments consecutive_403_count)
|
|
||||||
* 2. Immediately rotate to new proxy (new IP)
|
|
||||||
* 3. Rotate fingerprint
|
|
||||||
* Returns false if no more proxies available
|
|
||||||
*/
|
*/
|
||||||
async function handle403Block(): Promise<boolean> {
|
async function rotateProxyOn403(error?: string): Promise<boolean> {
|
||||||
if (!crawlRotator) {
|
if (!crawlRotator) {
|
||||||
console.warn('[Dutchie Client] No CrawlRotator - cannot handle 403');
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Per workflow-12102025.md: record block (tracks consecutive 403s)
|
// Record failure on current proxy
|
||||||
const wasDisabled = await crawlRotator.recordBlock();
|
await crawlRotator.recordFailure(error || '403 Forbidden');
|
||||||
if (wasDisabled) {
|
|
||||||
console.log('[Dutchie Client] Current proxy was disabled (3 consecutive 403s)');
|
|
||||||
}
|
|
||||||
|
|
||||||
// Per workflow-12102025.md: immediately get new IP + new fingerprint
|
|
||||||
const { proxy: nextProxy, fingerprint } = crawlRotator.rotateBoth();
|
|
||||||
|
|
||||||
|
// Rotate to next proxy
|
||||||
|
const nextProxy = crawlRotator.rotateProxy();
|
||||||
if (nextProxy) {
|
if (nextProxy) {
|
||||||
currentProxy = crawlRotator.proxy.getProxyUrl(nextProxy);
|
currentProxy = crawlRotator.proxy.getProxyUrl(nextProxy);
|
||||||
console.log(`[Dutchie Client] Rotated to new proxy: ${currentProxy.replace(/:[^:@]+@/, ':***@')}`);
|
console.log(`[Dutchie Client] Rotated proxy: ${currentProxy.replace(/:[^:@]+@/, ':***@')}`);
|
||||||
console.log(`[Dutchie Client] New fingerprint: ${fingerprint.userAgent.slice(0, 50)}...`);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
console.error('[Dutchie Client] No more proxies available!');
|
console.warn('[Dutchie Client] No more proxies available');
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Record success on current proxy
|
* Record success on current proxy
|
||||||
* Per workflow-12102025.md: resets consecutive_403_count
|
|
||||||
*/
|
*/
|
||||||
async function recordProxySuccess(responseTimeMs?: number): Promise<void> {
|
async function recordProxySuccess(responseTimeMs?: number): Promise<void> {
|
||||||
if (crawlRotator) {
|
if (crawlRotator) {
|
||||||
@@ -185,69 +162,163 @@ export const GRAPHQL_HASHES = {
|
|||||||
GetAllCitiesByState: 'ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6',
|
GetAllCitiesByState: 'ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6',
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// FINGERPRINTS - Browser profiles for anti-detect
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
const FINGERPRINTS: Fingerprint[] = [
|
||||||
|
// Chrome Windows (latest) - typical residential user, use first
|
||||||
|
{
|
||||||
|
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
||||||
|
acceptLanguage: 'en-US,en;q=0.9',
|
||||||
|
secChUa: '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
||||||
|
secChUaPlatform: '"Windows"',
|
||||||
|
secChUaMobile: '?0',
|
||||||
|
},
|
||||||
|
// Chrome Mac (latest)
|
||||||
|
{
|
||||||
|
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
||||||
|
acceptLanguage: 'en-US,en;q=0.9',
|
||||||
|
secChUa: '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
||||||
|
secChUaPlatform: '"macOS"',
|
||||||
|
secChUaMobile: '?0',
|
||||||
|
},
|
||||||
|
// Chrome Windows (120)
|
||||||
|
{
|
||||||
|
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||||
|
acceptLanguage: 'en-US,en;q=0.9',
|
||||||
|
secChUa: '"Chromium";v="120", "Google Chrome";v="120", "Not-A.Brand";v="99"',
|
||||||
|
secChUaPlatform: '"Windows"',
|
||||||
|
secChUaMobile: '?0',
|
||||||
|
},
|
||||||
|
// Firefox Windows
|
||||||
|
{
|
||||||
|
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0',
|
||||||
|
acceptLanguage: 'en-US,en;q=0.5',
|
||||||
|
},
|
||||||
|
// Safari Mac
|
||||||
|
{
|
||||||
|
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15',
|
||||||
|
acceptLanguage: 'en-US,en;q=0.9',
|
||||||
|
},
|
||||||
|
// Edge Windows
|
||||||
|
{
|
||||||
|
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0',
|
||||||
|
acceptLanguage: 'en-US,en;q=0.9',
|
||||||
|
secChUa: '"Microsoft Edge";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
||||||
|
secChUaPlatform: '"Windows"',
|
||||||
|
secChUaMobile: '?0',
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
let currentFingerprintIndex = 0;
|
||||||
|
|
||||||
|
// Forward declaration for session (actual CrawlSession interface defined later)
|
||||||
|
let currentSession: {
|
||||||
|
sessionId: string;
|
||||||
|
fingerprint: Fingerprint;
|
||||||
|
proxyUrl: string | null;
|
||||||
|
stateCode?: string;
|
||||||
|
timezone?: string;
|
||||||
|
startedAt: Date;
|
||||||
|
} | null = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get current fingerprint - returns session fingerprint if active, otherwise default
|
||||||
|
*/
|
||||||
|
export function getFingerprint(): Fingerprint {
|
||||||
|
// Use session fingerprint if a session is active
|
||||||
|
if (currentSession) {
|
||||||
|
return currentSession.fingerprint;
|
||||||
|
}
|
||||||
|
return FINGERPRINTS[currentFingerprintIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
export function rotateFingerprint(): Fingerprint {
|
||||||
|
currentFingerprintIndex = (currentFingerprintIndex + 1) % FINGERPRINTS.length;
|
||||||
|
const fp = FINGERPRINTS[currentFingerprintIndex];
|
||||||
|
console.log(`[Dutchie Client] Rotated to fingerprint: ${fp.userAgent.slice(0, 50)}...`);
|
||||||
|
return fp;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function resetFingerprint(): void {
|
||||||
|
currentFingerprintIndex = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a random fingerprint from the pool
|
||||||
|
*/
|
||||||
|
export function getRandomFingerprint(): Fingerprint {
|
||||||
|
const index = Math.floor(Math.random() * FINGERPRINTS.length);
|
||||||
|
return FINGERPRINTS[index];
|
||||||
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// SESSION MANAGEMENT
|
// SESSION MANAGEMENT
|
||||||
// Per workflow-12102025.md:
|
// Per-session fingerprint rotation for stealth
|
||||||
// - Session identity comes from PROXY LOCATION
|
|
||||||
// - NOT from task params (no stateCode/timezone params)
|
|
||||||
// - Language is always English
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
export interface CrawlSession {
|
export interface CrawlSession {
|
||||||
sessionId: string;
|
sessionId: string;
|
||||||
fingerprint: BrowserFingerprint;
|
fingerprint: Fingerprint;
|
||||||
proxyUrl: string | null;
|
proxyUrl: string | null;
|
||||||
proxyTimezone?: string;
|
stateCode?: string;
|
||||||
proxyState?: string;
|
timezone?: string;
|
||||||
startedAt: Date;
|
startedAt: Date;
|
||||||
// Per workflow-12102025.md: Dynamic Referer per dispensary
|
|
||||||
menuUrl?: string;
|
|
||||||
referer: string;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let currentSession: CrawlSession | null = null;
|
// Note: currentSession variable declared earlier in file for proper scoping
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Start a new crawl session
|
* Timezone to Accept-Language mapping
|
||||||
*
|
* US timezones all use en-US but this can be extended for international
|
||||||
* Per workflow-12102025.md:
|
|
||||||
* - NO state/timezone params - identity comes from proxy location
|
|
||||||
* - Gets fingerprint from CrawlRotator (uses intoli/user-agents)
|
|
||||||
* - Language is always English (en-US)
|
|
||||||
* - Dynamic Referer per dispensary (from menuUrl)
|
|
||||||
*
|
|
||||||
* @param menuUrl - The dispensary's menu URL for dynamic Referer header
|
|
||||||
*/
|
*/
|
||||||
export function startSession(menuUrl?: string): CrawlSession {
|
const TIMEZONE_TO_LOCALE: Record<string, string> = {
|
||||||
if (!crawlRotator) {
|
'America/Phoenix': 'en-US,en;q=0.9',
|
||||||
throw new Error('[Dutchie Client] Cannot start session without CrawlRotator');
|
'America/Los_Angeles': 'en-US,en;q=0.9',
|
||||||
}
|
'America/Denver': 'en-US,en;q=0.9',
|
||||||
|
'America/Chicago': 'en-US,en;q=0.9',
|
||||||
|
'America/New_York': 'en-US,en;q=0.9',
|
||||||
|
'America/Detroit': 'en-US,en;q=0.9',
|
||||||
|
'America/Anchorage': 'en-US,en;q=0.9',
|
||||||
|
'Pacific/Honolulu': 'en-US,en;q=0.9',
|
||||||
|
};
|
||||||
|
|
||||||
// Per workflow-12102025.md: get identity from proxy location
|
/**
|
||||||
const proxyLocation = crawlRotator.getProxyLocation();
|
* Get Accept-Language header for a given timezone
|
||||||
const fingerprint = crawlRotator.userAgent.getCurrent();
|
*/
|
||||||
|
export function getLocaleForTimezone(timezone?: string): string {
|
||||||
|
if (!timezone) return 'en-US,en;q=0.9';
|
||||||
|
return TIMEZONE_TO_LOCALE[timezone] || 'en-US,en;q=0.9';
|
||||||
|
}
|
||||||
|
|
||||||
// Per workflow-12102025.md: Dynamic Referer per dispensary
|
/**
|
||||||
const referer = buildRefererFromMenuUrl(menuUrl);
|
* Start a new crawl session with a random fingerprint
|
||||||
|
* Call this before crawling a store to get a fresh identity
|
||||||
|
*/
|
||||||
|
export function startSession(stateCode?: string, timezone?: string): CrawlSession {
|
||||||
|
const baseFp = getRandomFingerprint();
|
||||||
|
|
||||||
|
// Override Accept-Language based on timezone for geographic consistency
|
||||||
|
const fingerprint: Fingerprint = {
|
||||||
|
...baseFp,
|
||||||
|
acceptLanguage: getLocaleForTimezone(timezone),
|
||||||
|
};
|
||||||
|
|
||||||
currentSession = {
|
currentSession = {
|
||||||
sessionId: `session_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
|
sessionId: `session_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
|
||||||
fingerprint,
|
fingerprint,
|
||||||
proxyUrl: currentProxy,
|
proxyUrl: currentProxy,
|
||||||
proxyTimezone: proxyLocation?.timezone,
|
stateCode,
|
||||||
proxyState: proxyLocation?.state,
|
timezone,
|
||||||
startedAt: new Date(),
|
startedAt: new Date(),
|
||||||
menuUrl,
|
|
||||||
referer,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
console.log(`[Dutchie Client] Started session ${currentSession.sessionId}`);
|
console.log(`[Dutchie Client] Started session ${currentSession.sessionId}`);
|
||||||
console.log(`[Dutchie Client] Browser: ${fingerprint.browserName} (${fingerprint.deviceCategory})`);
|
console.log(`[Dutchie Client] Fingerprint: ${fingerprint.userAgent.slice(0, 50)}...`);
|
||||||
console.log(`[Dutchie Client] DNT: ${fingerprint.httpFingerprint.hasDNT ? 'enabled' : 'disabled'}`);
|
console.log(`[Dutchie Client] Accept-Language: ${fingerprint.acceptLanguage}`);
|
||||||
console.log(`[Dutchie Client] TLS: ${fingerprint.httpFingerprint.curlImpersonateBinary}`);
|
if (timezone) {
|
||||||
console.log(`[Dutchie Client] Referer: ${referer}`);
|
console.log(`[Dutchie Client] Timezone: ${timezone}`);
|
||||||
if (proxyLocation?.timezone) {
|
|
||||||
console.log(`[Dutchie Client] Proxy: ${proxyLocation.state || 'unknown'} (${proxyLocation.timezone})`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return currentSession;
|
return currentSession;
|
||||||
@@ -276,80 +347,48 @@ export function getCurrentSession(): CrawlSession | null {
|
|||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Per workflow-12102025.md: Build headers using HTTP fingerprint system
|
* Build headers for Dutchie requests
|
||||||
* Returns headers in browser-specific order with all natural variations
|
|
||||||
*/
|
*/
|
||||||
export function buildHeaders(isPost: boolean, contentLength?: number): { headers: Record<string, string>; orderedHeaders: string[] } {
|
export function buildHeaders(refererPath: string, fingerprint?: Fingerprint): Record<string, string> {
|
||||||
if (!currentSession || !crawlRotator) {
|
const fp = fingerprint || getFingerprint();
|
||||||
throw new Error('[Dutchie Client] Cannot build headers without active session');
|
const refererUrl = `https://dutchie.com${refererPath}`;
|
||||||
}
|
|
||||||
|
|
||||||
const fp = currentSession.fingerprint;
|
const headers: Record<string, string> = {
|
||||||
const httpFp = fp.httpFingerprint;
|
'accept': 'application/json, text/plain, */*',
|
||||||
|
'accept-language': fp.acceptLanguage,
|
||||||
// Per workflow-12102025.md: Build context for ordered headers
|
'content-type': 'application/json',
|
||||||
const context: HeaderContext = {
|
'origin': 'https://dutchie.com',
|
||||||
userAgent: fp.userAgent,
|
'referer': refererUrl,
|
||||||
secChUa: fp.secChUa,
|
'user-agent': fp.userAgent,
|
||||||
secChUaPlatform: fp.secChUaPlatform,
|
'apollographql-client-name': 'Marketplace (production)',
|
||||||
secChUaMobile: fp.secChUaMobile,
|
|
||||||
referer: currentSession.referer,
|
|
||||||
isPost,
|
|
||||||
contentLength,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Per workflow-12102025.md: Get ordered headers from HTTP fingerprint service
|
if (fp.secChUa) {
|
||||||
return buildOrderedHeaders(httpFp, context);
|
headers['sec-ch-ua'] = fp.secChUa;
|
||||||
|
headers['sec-ch-ua-mobile'] = fp.secChUaMobile || '?0';
|
||||||
|
headers['sec-ch-ua-platform'] = fp.secChUaPlatform || '"Windows"';
|
||||||
|
headers['sec-fetch-dest'] = 'empty';
|
||||||
|
headers['sec-fetch-mode'] = 'cors';
|
||||||
|
headers['sec-fetch-site'] = 'same-site';
|
||||||
|
}
|
||||||
|
|
||||||
|
return headers;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Per workflow-12102025.md: Get curl binary for current session's browser
|
* Execute HTTP POST using curl (bypasses TLS fingerprinting)
|
||||||
* Uses curl-impersonate for TLS fingerprint matching
|
|
||||||
*/
|
*/
|
||||||
function getCurlBinaryForSession(): string {
|
export function curlPost(url: string, body: any, headers: Record<string, string>, timeout = 30000): CurlResponse {
|
||||||
if (!currentSession) {
|
const filteredHeaders = Object.entries(headers)
|
||||||
return 'curl'; // Fallback to standard curl
|
.filter(([k]) => k.toLowerCase() !== 'accept-encoding')
|
||||||
}
|
.map(([k, v]) => `-H '${k}: ${v}'`)
|
||||||
|
|
||||||
const browserType = currentSession.fingerprint.browserName as BrowserType;
|
|
||||||
|
|
||||||
// Per workflow-12102025.md: Check if curl-impersonate is available
|
|
||||||
if (isCurlImpersonateAvailable(browserType)) {
|
|
||||||
return getCurlBinary(browserType);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback to standard curl with warning
|
|
||||||
console.warn(`[Dutchie Client] curl-impersonate not available for ${browserType}, using standard curl`);
|
|
||||||
return 'curl';
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Per workflow-12102025.md: Execute HTTP POST using curl/curl-impersonate
|
|
||||||
* - Uses browser-specific TLS fingerprint via curl-impersonate
|
|
||||||
* - Headers sent in browser-specific order
|
|
||||||
* - Dynamic Referer per dispensary
|
|
||||||
*/
|
|
||||||
export function curlPost(url: string, body: any, timeout = 30000): CurlResponse {
|
|
||||||
const bodyJson = JSON.stringify(body);
|
|
||||||
|
|
||||||
// Per workflow-12102025.md: Build ordered headers for POST request
|
|
||||||
const { headers, orderedHeaders } = buildHeaders(true, bodyJson.length);
|
|
||||||
|
|
||||||
// Per workflow-12102025.md: Build header args in browser-specific order
|
|
||||||
const headerArgs = orderedHeaders
|
|
||||||
.filter(h => h !== 'Host' && h !== 'Content-Length') // curl handles these
|
|
||||||
.map(h => `-H '${h}: ${headers[h]}'`)
|
|
||||||
.join(' ');
|
.join(' ');
|
||||||
|
|
||||||
const bodyEscaped = bodyJson.replace(/'/g, "'\\''");
|
const bodyJson = JSON.stringify(body).replace(/'/g, "'\\''");
|
||||||
const timeoutSec = Math.ceil(timeout / 1000);
|
const timeoutSec = Math.ceil(timeout / 1000);
|
||||||
const separator = '___HTTP_STATUS___';
|
const separator = '___HTTP_STATUS___';
|
||||||
const proxyArg = getProxyArg();
|
const proxyArg = getProxyArg();
|
||||||
|
const cmd = `curl -s --compressed ${proxyArg} -w '${separator}%{http_code}' --max-time ${timeoutSec} ${filteredHeaders} -d '${bodyJson}' '${url}'`;
|
||||||
// Per workflow-12102025.md: Use curl-impersonate for TLS fingerprint matching
|
|
||||||
const curlBinary = getCurlBinaryForSession();
|
|
||||||
|
|
||||||
const cmd = `${curlBinary} -s --compressed ${proxyArg} -w '${separator}%{http_code}' --max-time ${timeoutSec} ${headerArgs} -d '${bodyEscaped}' '${url}'`;
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const output = execSync(cmd, {
|
const output = execSync(cmd, {
|
||||||
@@ -388,29 +427,19 @@ export function curlPost(url: string, body: any, timeout = 30000): CurlResponse
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Per workflow-12102025.md: Execute HTTP GET using curl/curl-impersonate
|
* Execute HTTP GET using curl (bypasses TLS fingerprinting)
|
||||||
* - Uses browser-specific TLS fingerprint via curl-impersonate
|
* Returns HTML or JSON depending on response content-type
|
||||||
* - Headers sent in browser-specific order
|
|
||||||
* - Dynamic Referer per dispensary
|
|
||||||
*/
|
*/
|
||||||
export function curlGet(url: string, timeout = 30000): CurlResponse {
|
export function curlGet(url: string, headers: Record<string, string>, timeout = 30000): CurlResponse {
|
||||||
// Per workflow-12102025.md: Build ordered headers for GET request
|
const filteredHeaders = Object.entries(headers)
|
||||||
const { headers, orderedHeaders } = buildHeaders(false);
|
.filter(([k]) => k.toLowerCase() !== 'accept-encoding')
|
||||||
|
.map(([k, v]) => `-H '${k}: ${v}'`)
|
||||||
// Per workflow-12102025.md: Build header args in browser-specific order
|
|
||||||
const headerArgs = orderedHeaders
|
|
||||||
.filter(h => h !== 'Host' && h !== 'Content-Length') // curl handles these
|
|
||||||
.map(h => `-H '${h}: ${headers[h]}'`)
|
|
||||||
.join(' ');
|
.join(' ');
|
||||||
|
|
||||||
const timeoutSec = Math.ceil(timeout / 1000);
|
const timeoutSec = Math.ceil(timeout / 1000);
|
||||||
const separator = '___HTTP_STATUS___';
|
const separator = '___HTTP_STATUS___';
|
||||||
const proxyArg = getProxyArg();
|
const proxyArg = getProxyArg();
|
||||||
|
const cmd = `curl -s --compressed ${proxyArg} -w '${separator}%{http_code}' --max-time ${timeoutSec} ${filteredHeaders} '${url}'`;
|
||||||
// Per workflow-12102025.md: Use curl-impersonate for TLS fingerprint matching
|
|
||||||
const curlBinary = getCurlBinaryForSession();
|
|
||||||
|
|
||||||
const cmd = `${curlBinary} -s --compressed ${proxyArg} -w '${separator}%{http_code}' --max-time ${timeoutSec} ${headerArgs} '${url}'`;
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const output = execSync(cmd, {
|
const output = execSync(cmd, {
|
||||||
@@ -430,6 +459,7 @@ export function curlGet(url: string, timeout = 30000): CurlResponse {
|
|||||||
const responseBody = output.slice(0, separatorIndex);
|
const responseBody = output.slice(0, separatorIndex);
|
||||||
const statusCode = parseInt(output.slice(separatorIndex + separator.length).trim(), 10);
|
const statusCode = parseInt(output.slice(separatorIndex + separator.length).trim(), 10);
|
||||||
|
|
||||||
|
// Try to parse as JSON, otherwise return as string (HTML)
|
||||||
try {
|
try {
|
||||||
return { status: statusCode, data: JSON.parse(responseBody) };
|
return { status: statusCode, data: JSON.parse(responseBody) };
|
||||||
} catch {
|
} catch {
|
||||||
@@ -446,22 +476,16 @@ export function curlGet(url: string, timeout = 30000): CurlResponse {
|
|||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// GRAPHQL EXECUTION
|
// GRAPHQL EXECUTION
|
||||||
// Per workflow-12102025.md:
|
|
||||||
// - On 403: immediately rotate IP + fingerprint (no delay first)
|
|
||||||
// - Then retry
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
export interface ExecuteGraphQLOptions {
|
export interface ExecuteGraphQLOptions {
|
||||||
maxRetries?: number;
|
maxRetries?: number;
|
||||||
retryOn403?: boolean;
|
retryOn403?: boolean;
|
||||||
cName?: string;
|
cName?: string; // Optional - used for Referer header, defaults to 'cities'
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Per workflow-12102025.md: Execute GraphQL query with curl/curl-impersonate
|
* Execute GraphQL query with curl (bypasses TLS fingerprinting)
|
||||||
* - Uses browser-specific TLS fingerprint
|
|
||||||
* - Headers in browser-specific order
|
|
||||||
* - On 403: immediately rotate IP + fingerprint, then retry
|
|
||||||
*/
|
*/
|
||||||
export async function executeGraphQL(
|
export async function executeGraphQL(
|
||||||
operationName: string,
|
operationName: string,
|
||||||
@@ -469,12 +493,7 @@ export async function executeGraphQL(
|
|||||||
hash: string,
|
hash: string,
|
||||||
options: ExecuteGraphQLOptions
|
options: ExecuteGraphQLOptions
|
||||||
): Promise<any> {
|
): Promise<any> {
|
||||||
const { maxRetries = 3, retryOn403 = true } = options;
|
const { maxRetries = 3, retryOn403 = true, cName = 'cities' } = options;
|
||||||
|
|
||||||
// Per workflow-12102025.md: Session must be active for requests
|
|
||||||
if (!currentSession) {
|
|
||||||
throw new Error('[Dutchie Client] Cannot execute GraphQL without active session - call startSession() first');
|
|
||||||
}
|
|
||||||
|
|
||||||
const body = {
|
const body = {
|
||||||
operationName,
|
operationName,
|
||||||
@@ -488,14 +507,14 @@ export async function executeGraphQL(
|
|||||||
let attempt = 0;
|
let attempt = 0;
|
||||||
|
|
||||||
while (attempt <= maxRetries) {
|
while (attempt <= maxRetries) {
|
||||||
|
const fingerprint = getFingerprint();
|
||||||
|
const headers = buildHeaders(`/embedded-menu/${cName}`, fingerprint);
|
||||||
|
|
||||||
console.log(`[Dutchie Client] curl POST ${operationName} (attempt ${attempt + 1}/${maxRetries + 1})`);
|
console.log(`[Dutchie Client] curl POST ${operationName} (attempt ${attempt + 1}/${maxRetries + 1})`);
|
||||||
|
|
||||||
const startTime = Date.now();
|
const response = curlPost(DUTCHIE_CONFIG.graphqlEndpoint, body, headers, DUTCHIE_CONFIG.timeout);
|
||||||
// Per workflow-12102025.md: curlPost now uses ordered headers and curl-impersonate
|
|
||||||
const response = curlPost(DUTCHIE_CONFIG.graphqlEndpoint, body, DUTCHIE_CONFIG.timeout);
|
|
||||||
const responseTime = Date.now() - startTime;
|
|
||||||
|
|
||||||
console.log(`[Dutchie Client] Response status: ${response.status} (${responseTime}ms)`);
|
console.log(`[Dutchie Client] Response status: ${response.status}`);
|
||||||
|
|
||||||
if (response.error) {
|
if (response.error) {
|
||||||
console.error(`[Dutchie Client] curl error: ${response.error}`);
|
console.error(`[Dutchie Client] curl error: ${response.error}`);
|
||||||
@@ -508,9 +527,6 @@ export async function executeGraphQL(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
// Per workflow-12102025.md: success resets consecutive 403 count
|
|
||||||
await recordProxySuccess(responseTime);
|
|
||||||
|
|
||||||
if (response.data?.errors?.length > 0) {
|
if (response.data?.errors?.length > 0) {
|
||||||
console.warn(`[Dutchie Client] GraphQL errors: ${JSON.stringify(response.data.errors[0])}`);
|
console.warn(`[Dutchie Client] GraphQL errors: ${JSON.stringify(response.data.errors[0])}`);
|
||||||
}
|
}
|
||||||
@@ -518,20 +534,10 @@ export async function executeGraphQL(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (response.status === 403 && retryOn403) {
|
if (response.status === 403 && retryOn403) {
|
||||||
// Per workflow-12102025.md: immediately rotate IP + fingerprint
|
console.warn(`[Dutchie Client] 403 blocked - rotating fingerprint...`);
|
||||||
console.warn(`[Dutchie Client] 403 blocked - immediately rotating proxy + fingerprint...`);
|
rotateFingerprint();
|
||||||
const hasMoreProxies = await handle403Block();
|
|
||||||
|
|
||||||
if (!hasMoreProxies) {
|
|
||||||
throw new Error('All proxies exhausted - no more IPs available');
|
|
||||||
}
|
|
||||||
|
|
||||||
// Per workflow-12102025.md: Update session referer after rotation
|
|
||||||
currentSession.referer = buildRefererFromMenuUrl(currentSession.menuUrl);
|
|
||||||
|
|
||||||
attempt++;
|
attempt++;
|
||||||
// Per workflow-12102025.md: small backoff after rotation
|
await sleep(1000 * attempt);
|
||||||
await sleep(500);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -560,10 +566,8 @@ export interface FetchPageOptions {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Per workflow-12102025.md: Fetch HTML page from Dutchie
|
* Fetch HTML page from Dutchie (for city pages, dispensary pages, etc.)
|
||||||
* - Uses browser-specific TLS fingerprint
|
* Returns raw HTML string
|
||||||
* - Headers in browser-specific order
|
|
||||||
* - Same 403 handling as GraphQL
|
|
||||||
*/
|
*/
|
||||||
export async function fetchPage(
|
export async function fetchPage(
|
||||||
path: string,
|
path: string,
|
||||||
@@ -572,22 +576,32 @@ export async function fetchPage(
|
|||||||
const { maxRetries = 3, retryOn403 = true } = options;
|
const { maxRetries = 3, retryOn403 = true } = options;
|
||||||
const url = `${DUTCHIE_CONFIG.baseUrl}${path}`;
|
const url = `${DUTCHIE_CONFIG.baseUrl}${path}`;
|
||||||
|
|
||||||
// Per workflow-12102025.md: Session must be active for requests
|
|
||||||
if (!currentSession) {
|
|
||||||
throw new Error('[Dutchie Client] Cannot fetch page without active session - call startSession() first');
|
|
||||||
}
|
|
||||||
|
|
||||||
let attempt = 0;
|
let attempt = 0;
|
||||||
|
|
||||||
while (attempt <= maxRetries) {
|
while (attempt <= maxRetries) {
|
||||||
// Per workflow-12102025.md: curlGet now uses ordered headers and curl-impersonate
|
const fingerprint = getFingerprint();
|
||||||
|
const headers: Record<string, string> = {
|
||||||
|
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
||||||
|
'accept-language': fingerprint.acceptLanguage,
|
||||||
|
'user-agent': fingerprint.userAgent,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (fingerprint.secChUa) {
|
||||||
|
headers['sec-ch-ua'] = fingerprint.secChUa;
|
||||||
|
headers['sec-ch-ua-mobile'] = fingerprint.secChUaMobile || '?0';
|
||||||
|
headers['sec-ch-ua-platform'] = fingerprint.secChUaPlatform || '"Windows"';
|
||||||
|
headers['sec-fetch-dest'] = 'document';
|
||||||
|
headers['sec-fetch-mode'] = 'navigate';
|
||||||
|
headers['sec-fetch-site'] = 'none';
|
||||||
|
headers['sec-fetch-user'] = '?1';
|
||||||
|
headers['upgrade-insecure-requests'] = '1';
|
||||||
|
}
|
||||||
|
|
||||||
console.log(`[Dutchie Client] curl GET ${path} (attempt ${attempt + 1}/${maxRetries + 1})`);
|
console.log(`[Dutchie Client] curl GET ${path} (attempt ${attempt + 1}/${maxRetries + 1})`);
|
||||||
|
|
||||||
const startTime = Date.now();
|
const response = curlGet(url, headers, DUTCHIE_CONFIG.timeout);
|
||||||
const response = curlGet(url, DUTCHIE_CONFIG.timeout);
|
|
||||||
const responseTime = Date.now() - startTime;
|
|
||||||
|
|
||||||
console.log(`[Dutchie Client] Response status: ${response.status} (${responseTime}ms)`);
|
console.log(`[Dutchie Client] Response status: ${response.status}`);
|
||||||
|
|
||||||
if (response.error) {
|
if (response.error) {
|
||||||
console.error(`[Dutchie Client] curl error: ${response.error}`);
|
console.error(`[Dutchie Client] curl error: ${response.error}`);
|
||||||
@@ -599,26 +613,14 @@ export async function fetchPage(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
// Per workflow-12102025.md: success resets consecutive 403 count
|
|
||||||
await recordProxySuccess(responseTime);
|
|
||||||
return { html: response.data, status: response.status };
|
return { html: response.data, status: response.status };
|
||||||
}
|
}
|
||||||
|
|
||||||
if (response.status === 403 && retryOn403) {
|
if (response.status === 403 && retryOn403) {
|
||||||
// Per workflow-12102025.md: immediately rotate IP + fingerprint
|
console.warn(`[Dutchie Client] 403 blocked - rotating fingerprint...`);
|
||||||
console.warn(`[Dutchie Client] 403 blocked - immediately rotating proxy + fingerprint...`);
|
rotateFingerprint();
|
||||||
const hasMoreProxies = await handle403Block();
|
|
||||||
|
|
||||||
if (!hasMoreProxies) {
|
|
||||||
throw new Error('All proxies exhausted - no more IPs available');
|
|
||||||
}
|
|
||||||
|
|
||||||
// Per workflow-12102025.md: Update session after rotation
|
|
||||||
currentSession.referer = buildRefererFromMenuUrl(currentSession.menuUrl);
|
|
||||||
|
|
||||||
attempt++;
|
attempt++;
|
||||||
// Per workflow-12102025.md: small backoff after rotation
|
await sleep(1000 * attempt);
|
||||||
await sleep(500);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -6,17 +6,22 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
export {
|
export {
|
||||||
// HTTP Client (per workflow-12102025.md: uses curl-impersonate + ordered headers)
|
// HTTP Client
|
||||||
curlPost,
|
curlPost,
|
||||||
curlGet,
|
curlGet,
|
||||||
executeGraphQL,
|
executeGraphQL,
|
||||||
fetchPage,
|
fetchPage,
|
||||||
extractNextData,
|
extractNextData,
|
||||||
|
|
||||||
// Headers (per workflow-12102025.md: browser-specific ordering)
|
// Headers & Fingerprints
|
||||||
buildHeaders,
|
buildHeaders,
|
||||||
|
getFingerprint,
|
||||||
|
rotateFingerprint,
|
||||||
|
resetFingerprint,
|
||||||
|
getRandomFingerprint,
|
||||||
|
getLocaleForTimezone,
|
||||||
|
|
||||||
// Session Management (per workflow-12102025.md: menuUrl for dynamic Referer)
|
// Session Management (per-store fingerprint rotation)
|
||||||
startSession,
|
startSession,
|
||||||
endSession,
|
endSession,
|
||||||
getCurrentSession,
|
getCurrentSession,
|
||||||
|
|||||||
@@ -7,23 +7,15 @@
|
|||||||
* Routes are prefixed with /api/analytics/v2
|
* Routes are prefixed with /api/analytics/v2
|
||||||
*
|
*
|
||||||
* Phase 3: Analytics Engine + Rec/Med by State
|
* Phase 3: Analytics Engine + Rec/Med by State
|
||||||
*
|
|
||||||
* SECURITY: All routes require authentication via authMiddleware.
|
|
||||||
* Access is granted to:
|
|
||||||
* - Trusted origins (cannaiq.co, findadispo.com, etc.)
|
|
||||||
* - Trusted IPs (localhost, internal pods)
|
|
||||||
* - Valid JWT or API tokens
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { Router, Request, Response } from 'express';
|
import { Router, Request, Response } from 'express';
|
||||||
import { Pool } from 'pg';
|
import { Pool } from 'pg';
|
||||||
import { authMiddleware } from '../auth/middleware';
|
|
||||||
import { PriceAnalyticsService } from '../services/analytics/PriceAnalyticsService';
|
import { PriceAnalyticsService } from '../services/analytics/PriceAnalyticsService';
|
||||||
import { BrandPenetrationService } from '../services/analytics/BrandPenetrationService';
|
import { BrandPenetrationService } from '../services/analytics/BrandPenetrationService';
|
||||||
import { CategoryAnalyticsService } from '../services/analytics/CategoryAnalyticsService';
|
import { CategoryAnalyticsService } from '../services/analytics/CategoryAnalyticsService';
|
||||||
import { StoreAnalyticsService } from '../services/analytics/StoreAnalyticsService';
|
import { StoreAnalyticsService } from '../services/analytics/StoreAnalyticsService';
|
||||||
import { StateAnalyticsService } from '../services/analytics/StateAnalyticsService';
|
import { StateAnalyticsService } from '../services/analytics/StateAnalyticsService';
|
||||||
import { BrandIntelligenceService } from '../services/analytics/BrandIntelligenceService';
|
|
||||||
import { TimeWindow, LegalType } from '../services/analytics/types';
|
import { TimeWindow, LegalType } from '../services/analytics/types';
|
||||||
|
|
||||||
function parseTimeWindow(window?: string): TimeWindow {
|
function parseTimeWindow(window?: string): TimeWindow {
|
||||||
@@ -43,17 +35,12 @@ function parseLegalType(legalType?: string): LegalType {
|
|||||||
export function createAnalyticsV2Router(pool: Pool): Router {
|
export function createAnalyticsV2Router(pool: Pool): Router {
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
// SECURITY: Apply auth middleware to ALL routes
|
|
||||||
// This gate ensures only authenticated requests can access analytics data
|
|
||||||
router.use(authMiddleware);
|
|
||||||
|
|
||||||
// Initialize services
|
// Initialize services
|
||||||
const priceService = new PriceAnalyticsService(pool);
|
const priceService = new PriceAnalyticsService(pool);
|
||||||
const brandService = new BrandPenetrationService(pool);
|
const brandService = new BrandPenetrationService(pool);
|
||||||
const categoryService = new CategoryAnalyticsService(pool);
|
const categoryService = new CategoryAnalyticsService(pool);
|
||||||
const storeService = new StoreAnalyticsService(pool);
|
const storeService = new StoreAnalyticsService(pool);
|
||||||
const stateService = new StateAnalyticsService(pool);
|
const stateService = new StateAnalyticsService(pool);
|
||||||
const brandIntelligenceService = new BrandIntelligenceService(pool);
|
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// PRICE ANALYTICS
|
// PRICE ANALYTICS
|
||||||
@@ -244,76 +231,6 @@ export function createAnalyticsV2Router(pool: Pool): Router {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /brand/:name/promotions
|
|
||||||
* Get brand promotional history - tracks specials, discounts, duration, and sales estimates
|
|
||||||
*
|
|
||||||
* Query params:
|
|
||||||
* - window: 7d|30d|90d (default: 90d)
|
|
||||||
* - state: state code filter (e.g., AZ)
|
|
||||||
* - category: category filter (e.g., Flower)
|
|
||||||
*/
|
|
||||||
router.get('/brand/:name/promotions', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const brandName = decodeURIComponent(req.params.name);
|
|
||||||
const window = parseTimeWindow(req.query.window as string) || '90d';
|
|
||||||
const stateCode = req.query.state as string | undefined;
|
|
||||||
const category = req.query.category as string | undefined;
|
|
||||||
|
|
||||||
const result = await brandService.getBrandPromotionalHistory(brandName, {
|
|
||||||
window,
|
|
||||||
stateCode,
|
|
||||||
category,
|
|
||||||
});
|
|
||||||
res.json(result);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[AnalyticsV2] Brand promotions error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to fetch brand promotional history' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /brand/:name/intelligence
|
|
||||||
* Get comprehensive B2B brand intelligence dashboard data
|
|
||||||
*
|
|
||||||
* Returns all brand metrics in a single unified response:
|
|
||||||
* - Performance Snapshot (active SKUs, revenue, stores, market share)
|
|
||||||
* - Alerts/Slippage (lost stores, delisted SKUs, competitor takeovers)
|
|
||||||
* - Product Velocity (daily rates, velocity status)
|
|
||||||
* - Retail Footprint (penetration, whitespace opportunities)
|
|
||||||
* - Competitive Landscape (price position, market share trend)
|
|
||||||
* - Inventory Health (days of stock, risk levels)
|
|
||||||
* - Promotion Effectiveness (baseline vs promo velocity, ROI)
|
|
||||||
*
|
|
||||||
* Query params:
|
|
||||||
* - window: 7d|30d|90d (default: 30d)
|
|
||||||
* - state: state code filter (e.g., AZ)
|
|
||||||
* - category: category filter (e.g., Flower)
|
|
||||||
*/
|
|
||||||
router.get('/brand/:name/intelligence', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const brandName = decodeURIComponent(req.params.name);
|
|
||||||
const window = parseTimeWindow(req.query.window as string);
|
|
||||||
const stateCode = req.query.state as string | undefined;
|
|
||||||
const category = req.query.category as string | undefined;
|
|
||||||
|
|
||||||
const result = await brandIntelligenceService.getBrandIntelligence(brandName, {
|
|
||||||
window,
|
|
||||||
stateCode,
|
|
||||||
category,
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!result) {
|
|
||||||
return res.status(404).json({ error: 'Brand not found' });
|
|
||||||
}
|
|
||||||
|
|
||||||
res.json(result);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[AnalyticsV2] Brand intelligence error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to fetch brand intelligence' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// CATEGORY ANALYTICS
|
// CATEGORY ANALYTICS
|
||||||
// ============================================================
|
// ============================================================
|
||||||
@@ -483,31 +400,6 @@ export function createAnalyticsV2Router(pool: Pool): Router {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /store/:id/quantity-changes
|
|
||||||
* Get quantity changes for a store (increases/decreases)
|
|
||||||
* Useful for estimating sales (decreases) or restocks (increases)
|
|
||||||
*
|
|
||||||
* Query params:
|
|
||||||
* - window: 7d|30d|90d (default: 7d)
|
|
||||||
* - direction: increase|decrease|all (default: all)
|
|
||||||
* - limit: number (default: 100)
|
|
||||||
*/
|
|
||||||
router.get('/store/:id/quantity-changes', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const dispensaryId = parseInt(req.params.id);
|
|
||||||
const window = parseTimeWindow(req.query.window as string);
|
|
||||||
const direction = (req.query.direction as 'increase' | 'decrease' | 'all') || 'all';
|
|
||||||
const limit = req.query.limit ? parseInt(req.query.limit as string) : 100;
|
|
||||||
|
|
||||||
const result = await storeService.getQuantityChanges(dispensaryId, { window, direction, limit });
|
|
||||||
res.json(result);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[AnalyticsV2] Store quantity changes error:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to fetch store quantity changes' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /store/:id/inventory
|
* GET /store/:id/inventory
|
||||||
* Get store inventory composition
|
* Get store inventory composition
|
||||||
|
|||||||
@@ -47,27 +47,4 @@ router.post('/refresh', authMiddleware, async (req: AuthRequest, res) => {
|
|||||||
res.json({ token });
|
res.json({ token });
|
||||||
});
|
});
|
||||||
|
|
||||||
// Verify password for sensitive actions (requires current user to be authenticated)
|
|
||||||
router.post('/verify-password', authMiddleware, async (req: AuthRequest, res) => {
|
|
||||||
try {
|
|
||||||
const { password } = req.body;
|
|
||||||
|
|
||||||
if (!password) {
|
|
||||||
return res.status(400).json({ error: 'Password required' });
|
|
||||||
}
|
|
||||||
|
|
||||||
// Re-authenticate the current user with the provided password
|
|
||||||
const user = await authenticateUser(req.user!.email, password);
|
|
||||||
|
|
||||||
if (!user) {
|
|
||||||
return res.status(401).json({ error: 'Invalid password', verified: false });
|
|
||||||
}
|
|
||||||
|
|
||||||
res.json({ verified: true });
|
|
||||||
} catch (error) {
|
|
||||||
console.error('Password verification error:', error);
|
|
||||||
res.status(500).json({ error: 'Internal server error' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
export default router;
|
export default router;
|
||||||
|
|||||||
@@ -14,56 +14,35 @@ router.use(authMiddleware);
|
|||||||
/**
|
/**
|
||||||
* GET /api/admin/intelligence/brands
|
* GET /api/admin/intelligence/brands
|
||||||
* List all brands with state presence, store counts, and pricing
|
* List all brands with state presence, store counts, and pricing
|
||||||
* Query params:
|
|
||||||
* - state: Filter by state (e.g., "AZ")
|
|
||||||
* - limit: Max results (default 500)
|
|
||||||
* - offset: Pagination offset
|
|
||||||
*/
|
*/
|
||||||
router.get('/brands', async (req: Request, res: Response) => {
|
router.get('/brands', async (req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
const { limit = '500', offset = '0', state } = req.query;
|
const { limit = '500', offset = '0' } = req.query;
|
||||||
const limitNum = Math.min(parseInt(limit as string, 10), 1000);
|
const limitNum = Math.min(parseInt(limit as string, 10), 1000);
|
||||||
const offsetNum = parseInt(offset as string, 10);
|
const offsetNum = parseInt(offset as string, 10);
|
||||||
|
|
||||||
// Build WHERE clause based on state filter
|
|
||||||
let stateFilter = '';
|
|
||||||
const params: any[] = [limitNum, offsetNum];
|
|
||||||
if (state && state !== 'all') {
|
|
||||||
stateFilter = 'AND d.state = $3';
|
|
||||||
params.push(state);
|
|
||||||
}
|
|
||||||
|
|
||||||
const { rows } = await pool.query(`
|
const { rows } = await pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.brand_name_raw as brand_name,
|
sp.brand_name_raw as brand_name,
|
||||||
array_agg(DISTINCT d.state) FILTER (WHERE d.state IS NOT NULL) as states,
|
array_agg(DISTINCT d.state) FILTER (WHERE d.state IS NOT NULL) as states,
|
||||||
COUNT(DISTINCT d.id) as store_count,
|
COUNT(DISTINCT d.id) as store_count,
|
||||||
COUNT(DISTINCT sp.id) as sku_count,
|
COUNT(DISTINCT sp.id) as sku_count,
|
||||||
ROUND(AVG(sp.price_rec) FILTER (WHERE sp.price_rec > 0)::numeric, 2) as avg_price_rec,
|
ROUND(AVG(sp.price_rec)::numeric, 2) FILTER (WHERE sp.price_rec > 0) as avg_price_rec,
|
||||||
ROUND(AVG(sp.price_med) FILTER (WHERE sp.price_med > 0)::numeric, 2) as avg_price_med
|
ROUND(AVG(sp.price_med)::numeric, 2) FILTER (WHERE sp.price_med > 0) as avg_price_med
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON sp.dispensary_id = d.id
|
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||||
WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != ''
|
WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != ''
|
||||||
${stateFilter}
|
|
||||||
GROUP BY sp.brand_name_raw
|
GROUP BY sp.brand_name_raw
|
||||||
ORDER BY store_count DESC, sku_count DESC
|
ORDER BY store_count DESC, sku_count DESC
|
||||||
LIMIT $1 OFFSET $2
|
LIMIT $1 OFFSET $2
|
||||||
`, params);
|
`, [limitNum, offsetNum]);
|
||||||
|
|
||||||
// Get total count with same state filter
|
// Get total count
|
||||||
const countParams: any[] = [];
|
|
||||||
let countStateFilter = '';
|
|
||||||
if (state && state !== 'all') {
|
|
||||||
countStateFilter = 'AND d.state = $1';
|
|
||||||
countParams.push(state);
|
|
||||||
}
|
|
||||||
const { rows: countRows } = await pool.query(`
|
const { rows: countRows } = await pool.query(`
|
||||||
SELECT COUNT(DISTINCT sp.brand_name_raw) as total
|
SELECT COUNT(DISTINCT brand_name_raw) as total
|
||||||
FROM store_products sp
|
FROM store_products
|
||||||
JOIN dispensaries d ON sp.dispensary_id = d.id
|
WHERE brand_name_raw IS NOT NULL AND brand_name_raw != ''
|
||||||
WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != ''
|
`);
|
||||||
${countStateFilter}
|
|
||||||
`, countParams);
|
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
brands: rows.map((r: any) => ({
|
brands: rows.map((r: any) => ({
|
||||||
@@ -168,63 +147,29 @@ router.get('/brands/:brandName/penetration', async (req: Request, res: Response)
|
|||||||
/**
|
/**
|
||||||
* GET /api/admin/intelligence/pricing
|
* GET /api/admin/intelligence/pricing
|
||||||
* Get pricing analytics by category
|
* Get pricing analytics by category
|
||||||
* Query params:
|
|
||||||
* - state: Filter by state (e.g., "AZ")
|
|
||||||
*/
|
*/
|
||||||
router.get('/pricing', async (req: Request, res: Response) => {
|
router.get('/pricing', async (req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
const { state } = req.query;
|
const { rows: categoryRows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
sp.category_raw as category,
|
||||||
|
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||||
|
MIN(sp.price_rec) FILTER (WHERE sp.price_rec > 0) as min_price,
|
||||||
|
MAX(sp.price_rec) as max_price,
|
||||||
|
ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2)
|
||||||
|
FILTER (WHERE sp.price_rec > 0) as median_price,
|
||||||
|
COUNT(*) as product_count
|
||||||
|
FROM store_products sp
|
||||||
|
WHERE sp.category_raw IS NOT NULL AND sp.price_rec > 0
|
||||||
|
GROUP BY sp.category_raw
|
||||||
|
ORDER BY product_count DESC
|
||||||
|
`);
|
||||||
|
|
||||||
// Build WHERE clause based on state filter
|
|
||||||
let stateFilter = '';
|
|
||||||
const categoryParams: any[] = [];
|
|
||||||
const stateQueryParams: any[] = [];
|
|
||||||
const overallParams: any[] = [];
|
|
||||||
|
|
||||||
if (state && state !== 'all') {
|
|
||||||
stateFilter = 'AND d.state = $1';
|
|
||||||
categoryParams.push(state);
|
|
||||||
overallParams.push(state);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Category pricing with optional state filter
|
|
||||||
const categoryQuery = state && state !== 'all'
|
|
||||||
? `
|
|
||||||
SELECT
|
|
||||||
sp.category_raw as category,
|
|
||||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
|
||||||
MIN(sp.price_rec) as min_price,
|
|
||||||
MAX(sp.price_rec) as max_price,
|
|
||||||
ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2) as median_price,
|
|
||||||
COUNT(*) as product_count
|
|
||||||
FROM store_products sp
|
|
||||||
JOIN dispensaries d ON sp.dispensary_id = d.id
|
|
||||||
WHERE sp.category_raw IS NOT NULL AND sp.price_rec > 0 ${stateFilter}
|
|
||||||
GROUP BY sp.category_raw
|
|
||||||
ORDER BY product_count DESC
|
|
||||||
`
|
|
||||||
: `
|
|
||||||
SELECT
|
|
||||||
sp.category_raw as category,
|
|
||||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
|
||||||
MIN(sp.price_rec) as min_price,
|
|
||||||
MAX(sp.price_rec) as max_price,
|
|
||||||
ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2) as median_price,
|
|
||||||
COUNT(*) as product_count
|
|
||||||
FROM store_products sp
|
|
||||||
WHERE sp.category_raw IS NOT NULL AND sp.price_rec > 0
|
|
||||||
GROUP BY sp.category_raw
|
|
||||||
ORDER BY product_count DESC
|
|
||||||
`;
|
|
||||||
|
|
||||||
const { rows: categoryRows } = await pool.query(categoryQuery, categoryParams);
|
|
||||||
|
|
||||||
// State pricing
|
|
||||||
const { rows: stateRows } = await pool.query(`
|
const { rows: stateRows } = await pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
d.state,
|
d.state,
|
||||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||||
MIN(sp.price_rec) as min_price,
|
MIN(sp.price_rec) FILTER (WHERE sp.price_rec > 0) as min_price,
|
||||||
MAX(sp.price_rec) as max_price,
|
MAX(sp.price_rec) as max_price,
|
||||||
COUNT(DISTINCT sp.id) as product_count
|
COUNT(DISTINCT sp.id) as product_count
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
@@ -234,31 +179,6 @@ router.get('/pricing', async (req: Request, res: Response) => {
|
|||||||
ORDER BY avg_price DESC
|
ORDER BY avg_price DESC
|
||||||
`);
|
`);
|
||||||
|
|
||||||
// Overall stats with optional state filter
|
|
||||||
const overallQuery = state && state !== 'all'
|
|
||||||
? `
|
|
||||||
SELECT
|
|
||||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
|
||||||
MIN(sp.price_rec) as min_price,
|
|
||||||
MAX(sp.price_rec) as max_price,
|
|
||||||
COUNT(*) as total_products
|
|
||||||
FROM store_products sp
|
|
||||||
JOIN dispensaries d ON sp.dispensary_id = d.id
|
|
||||||
WHERE sp.price_rec > 0 ${stateFilter}
|
|
||||||
`
|
|
||||||
: `
|
|
||||||
SELECT
|
|
||||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
|
||||||
MIN(sp.price_rec) as min_price,
|
|
||||||
MAX(sp.price_rec) as max_price,
|
|
||||||
COUNT(*) as total_products
|
|
||||||
FROM store_products sp
|
|
||||||
WHERE sp.price_rec > 0
|
|
||||||
`;
|
|
||||||
|
|
||||||
const { rows: overallRows } = await pool.query(overallQuery, overallParams);
|
|
||||||
const overall = overallRows[0];
|
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
byCategory: categoryRows.map((r: any) => ({
|
byCategory: categoryRows.map((r: any) => ({
|
||||||
category: r.category,
|
category: r.category,
|
||||||
@@ -275,12 +195,6 @@ router.get('/pricing', async (req: Request, res: Response) => {
|
|||||||
maxPrice: r.max_price ? parseFloat(r.max_price) : null,
|
maxPrice: r.max_price ? parseFloat(r.max_price) : null,
|
||||||
productCount: parseInt(r.product_count, 10),
|
productCount: parseInt(r.product_count, 10),
|
||||||
})),
|
})),
|
||||||
overall: {
|
|
||||||
avgPrice: overall?.avg_price ? parseFloat(overall.avg_price) : null,
|
|
||||||
minPrice: overall?.min_price ? parseFloat(overall.min_price) : null,
|
|
||||||
maxPrice: overall?.max_price ? parseFloat(overall.max_price) : null,
|
|
||||||
totalProducts: parseInt(overall?.total_products || '0', 10),
|
|
||||||
},
|
|
||||||
});
|
});
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
console.error('[Intelligence] Error fetching pricing:', error.message);
|
console.error('[Intelligence] Error fetching pricing:', error.message);
|
||||||
@@ -291,23 +205,9 @@ router.get('/pricing', async (req: Request, res: Response) => {
|
|||||||
/**
|
/**
|
||||||
* GET /api/admin/intelligence/stores
|
* GET /api/admin/intelligence/stores
|
||||||
* Get store intelligence summary
|
* Get store intelligence summary
|
||||||
* Query params:
|
|
||||||
* - state: Filter by state (e.g., "AZ")
|
|
||||||
* - limit: Max results (default 200)
|
|
||||||
*/
|
*/
|
||||||
router.get('/stores', async (req: Request, res: Response) => {
|
router.get('/stores', async (req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
const { state, limit = '200' } = req.query;
|
|
||||||
const limitNum = Math.min(parseInt(limit as string, 10), 500);
|
|
||||||
|
|
||||||
// Build WHERE clause based on state filter
|
|
||||||
let stateFilter = '';
|
|
||||||
const params: any[] = [limitNum];
|
|
||||||
if (state && state !== 'all') {
|
|
||||||
stateFilter = 'AND d.state = $2';
|
|
||||||
params.push(state);
|
|
||||||
}
|
|
||||||
|
|
||||||
const { rows: storeRows } = await pool.query(`
|
const { rows: storeRows } = await pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
d.id,
|
d.id,
|
||||||
@@ -317,22 +217,17 @@ router.get('/stores', async (req: Request, res: Response) => {
|
|||||||
d.state,
|
d.state,
|
||||||
d.menu_type,
|
d.menu_type,
|
||||||
d.crawl_enabled,
|
d.crawl_enabled,
|
||||||
c.name as chain_name,
|
COUNT(DISTINCT sp.id) as product_count,
|
||||||
COUNT(DISTINCT sp.id) as sku_count,
|
|
||||||
COUNT(DISTINCT sp.brand_name_raw) as brand_count,
|
COUNT(DISTINCT sp.brand_name_raw) as brand_count,
|
||||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||||
MAX(sp.updated_at) as last_crawl,
|
MAX(sp.updated_at) as last_product_update
|
||||||
(SELECT COUNT(*) FROM store_product_snapshots sps
|
|
||||||
WHERE sps.store_product_id IN (SELECT id FROM store_products WHERE dispensary_id = d.id)) as snapshot_count
|
|
||||||
FROM dispensaries d
|
FROM dispensaries d
|
||||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
|
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
|
||||||
LEFT JOIN chains c ON d.chain_id = c.id
|
WHERE d.state IS NOT NULL
|
||||||
WHERE d.state IS NOT NULL AND d.crawl_enabled = true
|
GROUP BY d.id, d.name, d.dba_name, d.city, d.state, d.menu_type, d.crawl_enabled
|
||||||
${stateFilter}
|
ORDER BY product_count DESC
|
||||||
GROUP BY d.id, d.name, d.dba_name, d.city, d.state, d.menu_type, d.crawl_enabled, c.name
|
LIMIT 200
|
||||||
ORDER BY sku_count DESC
|
`);
|
||||||
LIMIT $1
|
|
||||||
`, params);
|
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
stores: storeRows.map((r: any) => ({
|
stores: storeRows.map((r: any) => ({
|
||||||
@@ -343,13 +238,10 @@ router.get('/stores', async (req: Request, res: Response) => {
|
|||||||
state: r.state,
|
state: r.state,
|
||||||
menuType: r.menu_type,
|
menuType: r.menu_type,
|
||||||
crawlEnabled: r.crawl_enabled,
|
crawlEnabled: r.crawl_enabled,
|
||||||
chainName: r.chain_name || null,
|
productCount: parseInt(r.product_count || '0', 10),
|
||||||
skuCount: parseInt(r.sku_count || '0', 10),
|
|
||||||
snapshotCount: parseInt(r.snapshot_count || '0', 10),
|
|
||||||
brandCount: parseInt(r.brand_count || '0', 10),
|
brandCount: parseInt(r.brand_count || '0', 10),
|
||||||
avgPrice: r.avg_price ? parseFloat(r.avg_price) : null,
|
avgPrice: r.avg_price ? parseFloat(r.avg_price) : null,
|
||||||
lastCrawl: r.last_crawl,
|
lastProductUpdate: r.last_product_update,
|
||||||
crawlFrequencyHours: 4, // Default crawl frequency
|
|
||||||
})),
|
})),
|
||||||
total: storeRows.length,
|
total: storeRows.length,
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -15,14 +15,9 @@
|
|||||||
|
|
||||||
import { Router, Request, Response } from 'express';
|
import { Router, Request, Response } from 'express';
|
||||||
import { pool } from '../db/pool';
|
import { pool } from '../db/pool';
|
||||||
import { authMiddleware, requireRole } from '../auth/middleware';
|
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
// All job-queue routes require authentication and admin role
|
|
||||||
router.use(authMiddleware);
|
|
||||||
router.use(requireRole('admin', 'superadmin'));
|
|
||||||
|
|
||||||
// In-memory queue state (would be in Redis in production)
|
// In-memory queue state (would be in Redis in production)
|
||||||
let queuePaused = false;
|
let queuePaused = false;
|
||||||
|
|
||||||
@@ -548,9 +543,6 @@ router.post('/bulk-priority', async (req: Request, res: Response) => {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* POST /api/job-queue/enqueue - Add a new job to the queue
|
* POST /api/job-queue/enqueue - Add a new job to the queue
|
||||||
*
|
|
||||||
* 2024-12-10: Rewired to use worker_tasks via taskService.
|
|
||||||
* Legacy dispensary_crawl_jobs code commented out below.
|
|
||||||
*/
|
*/
|
||||||
router.post('/enqueue', async (req: Request, res: Response) => {
|
router.post('/enqueue', async (req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
@@ -560,59 +552,6 @@ router.post('/enqueue', async (req: Request, res: Response) => {
|
|||||||
return res.status(400).json({ success: false, error: 'dispensary_id is required' });
|
return res.status(400).json({ success: false, error: 'dispensary_id is required' });
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2024-12-10: Map legacy job_type to new task role
|
|
||||||
const roleMap: Record<string, string> = {
|
|
||||||
'dutchie_product_crawl': 'product_refresh',
|
|
||||||
'menu_detection': 'entry_point_discovery',
|
|
||||||
'menu_detection_single': 'entry_point_discovery',
|
|
||||||
'product_discovery': 'product_discovery',
|
|
||||||
'store_discovery': 'store_discovery',
|
|
||||||
};
|
|
||||||
const role = roleMap[job_type] || 'product_refresh';
|
|
||||||
|
|
||||||
// 2024-12-10: Use taskService to create task in worker_tasks table
|
|
||||||
const { taskService } = await import('../tasks/task-service');
|
|
||||||
|
|
||||||
// Check if task already pending for this dispensary
|
|
||||||
const existingTasks = await taskService.listTasks({
|
|
||||||
dispensary_id,
|
|
||||||
role: role as any,
|
|
||||||
status: ['pending', 'claimed', 'running'],
|
|
||||||
limit: 1,
|
|
||||||
});
|
|
||||||
|
|
||||||
if (existingTasks.length > 0) {
|
|
||||||
return res.json({
|
|
||||||
success: true,
|
|
||||||
task_id: existingTasks[0].id,
|
|
||||||
message: 'Task already queued'
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
const task = await taskService.createTask({
|
|
||||||
role: role as any,
|
|
||||||
dispensary_id,
|
|
||||||
priority,
|
|
||||||
});
|
|
||||||
|
|
||||||
res.json({ success: true, task_id: task.id, message: 'Task enqueued' });
|
|
||||||
} catch (error: any) {
|
|
||||||
console.error('[JobQueue] Error enqueuing task:', error);
|
|
||||||
res.status(500).json({ success: false, error: error.message });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/*
|
|
||||||
* LEGACY CODE - 2024-12-10: Commented out, was using orphaned dispensary_crawl_jobs table
|
|
||||||
*
|
|
||||||
router.post('/enqueue', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const { dispensary_id, job_type = 'dutchie_product_crawl', priority = 0 } = req.body;
|
|
||||||
|
|
||||||
if (!dispensary_id) {
|
|
||||||
return res.status(400).json({ success: false, error: 'dispensary_id is required' });
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if job already pending for this dispensary
|
// Check if job already pending for this dispensary
|
||||||
const existing = await pool.query(`
|
const existing = await pool.query(`
|
||||||
SELECT id FROM dispensary_crawl_jobs
|
SELECT id FROM dispensary_crawl_jobs
|
||||||
@@ -646,7 +585,6 @@ router.post('/enqueue', async (req: Request, res: Response) => {
|
|||||||
res.status(500).json({ success: false, error: error.message });
|
res.status(500).json({ success: false, error: error.message });
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* POST /api/job-queue/pause - Pause queue processing
|
* POST /api/job-queue/pause - Pause queue processing
|
||||||
@@ -674,8 +612,6 @@ router.get('/paused', async (_req: Request, res: Response) => {
|
|||||||
/**
|
/**
|
||||||
* POST /api/job-queue/enqueue-batch - Queue multiple dispensaries at once
|
* POST /api/job-queue/enqueue-batch - Queue multiple dispensaries at once
|
||||||
* Body: { dispensary_ids: number[], job_type?: string, priority?: number }
|
* Body: { dispensary_ids: number[], job_type?: string, priority?: number }
|
||||||
*
|
|
||||||
* 2024-12-10: Rewired to use worker_tasks via taskService.
|
|
||||||
*/
|
*/
|
||||||
router.post('/enqueue-batch', async (req: Request, res: Response) => {
|
router.post('/enqueue-batch', async (req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
@@ -689,30 +625,35 @@ router.post('/enqueue-batch', async (req: Request, res: Response) => {
|
|||||||
return res.status(400).json({ success: false, error: 'Maximum 500 dispensaries per batch' });
|
return res.status(400).json({ success: false, error: 'Maximum 500 dispensaries per batch' });
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2024-12-10: Map legacy job_type to new task role
|
// Insert jobs, skipping duplicates
|
||||||
const roleMap: Record<string, string> = {
|
const { rows } = await pool.query(`
|
||||||
'dutchie_product_crawl': 'product_refresh',
|
INSERT INTO dispensary_crawl_jobs (dispensary_id, job_type, priority, trigger_type, status, created_at)
|
||||||
'menu_detection': 'entry_point_discovery',
|
SELECT
|
||||||
'product_discovery': 'product_discovery',
|
d.id,
|
||||||
};
|
$2::text,
|
||||||
const role = roleMap[job_type] || 'product_refresh';
|
$3::integer,
|
||||||
|
'api_batch',
|
||||||
// 2024-12-10: Use taskService to create tasks in worker_tasks table
|
'pending',
|
||||||
const { taskService } = await import('../tasks/task-service');
|
NOW()
|
||||||
|
FROM dispensaries d
|
||||||
const tasks = dispensary_ids.map(dispensary_id => ({
|
WHERE d.id = ANY($1::int[])
|
||||||
role: role as any,
|
AND d.crawl_enabled = true
|
||||||
dispensary_id,
|
AND d.platform_dispensary_id IS NOT NULL
|
||||||
priority,
|
AND NOT EXISTS (
|
||||||
}));
|
SELECT 1 FROM dispensary_crawl_jobs cj
|
||||||
|
WHERE cj.dispensary_id = d.id
|
||||||
const createdCount = await taskService.createTasks(tasks);
|
AND cj.job_type = $2::text
|
||||||
|
AND cj.status IN ('pending', 'running')
|
||||||
|
)
|
||||||
|
RETURNING id, dispensary_id
|
||||||
|
`, [dispensary_ids, job_type, priority]);
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
success: true,
|
success: true,
|
||||||
queued: createdCount,
|
queued: rows.length,
|
||||||
requested: dispensary_ids.length,
|
requested: dispensary_ids.length,
|
||||||
message: `Queued ${createdCount} of ${dispensary_ids.length} dispensaries`
|
job_ids: rows.map(r => r.id),
|
||||||
|
message: `Queued ${rows.length} of ${dispensary_ids.length} dispensaries`
|
||||||
});
|
});
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
console.error('[JobQueue] Error batch enqueuing:', error);
|
console.error('[JobQueue] Error batch enqueuing:', error);
|
||||||
@@ -723,8 +664,6 @@ router.post('/enqueue-batch', async (req: Request, res: Response) => {
|
|||||||
/**
|
/**
|
||||||
* POST /api/job-queue/enqueue-state - Queue all crawl-enabled dispensaries for a state
|
* POST /api/job-queue/enqueue-state - Queue all crawl-enabled dispensaries for a state
|
||||||
* Body: { state_code: string, job_type?: string, priority?: number, limit?: number }
|
* Body: { state_code: string, job_type?: string, priority?: number, limit?: number }
|
||||||
*
|
|
||||||
* 2024-12-10: Rewired to use worker_tasks via taskService.
|
|
||||||
*/
|
*/
|
||||||
router.post('/enqueue-state', async (req: Request, res: Response) => {
|
router.post('/enqueue-state', async (req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
@@ -734,55 +673,52 @@ router.post('/enqueue-state', async (req: Request, res: Response) => {
|
|||||||
return res.status(400).json({ success: false, error: 'state_code is required (e.g., "AZ")' });
|
return res.status(400).json({ success: false, error: 'state_code is required (e.g., "AZ")' });
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2024-12-10: Map legacy job_type to new task role
|
// Get state_id and queue jobs
|
||||||
const roleMap: Record<string, string> = {
|
const { rows } = await pool.query(`
|
||||||
'dutchie_product_crawl': 'product_refresh',
|
WITH target_state AS (
|
||||||
'menu_detection': 'entry_point_discovery',
|
SELECT id FROM states WHERE code = $1
|
||||||
'product_discovery': 'product_discovery',
|
)
|
||||||
};
|
INSERT INTO dispensary_crawl_jobs (dispensary_id, job_type, priority, trigger_type, status, created_at)
|
||||||
const role = roleMap[job_type] || 'product_refresh';
|
SELECT
|
||||||
|
d.id,
|
||||||
// Get dispensary IDs for the state
|
$2::text,
|
||||||
const dispensaryResult = await pool.query(`
|
$3::integer,
|
||||||
SELECT d.id
|
'api_state',
|
||||||
FROM dispensaries d
|
'pending',
|
||||||
JOIN states s ON s.id = d.state_id
|
NOW()
|
||||||
WHERE s.code = $1
|
FROM dispensaries d, target_state
|
||||||
|
WHERE d.state_id = target_state.id
|
||||||
AND d.crawl_enabled = true
|
AND d.crawl_enabled = true
|
||||||
AND d.platform_dispensary_id IS NOT NULL
|
AND d.platform_dispensary_id IS NOT NULL
|
||||||
LIMIT $2
|
AND NOT EXISTS (
|
||||||
`, [state_code.toUpperCase(), limit]);
|
SELECT 1 FROM dispensary_crawl_jobs cj
|
||||||
|
WHERE cj.dispensary_id = d.id
|
||||||
const dispensary_ids = dispensaryResult.rows.map((r: any) => r.id);
|
AND cj.job_type = $2::text
|
||||||
|
AND cj.status IN ('pending', 'running')
|
||||||
// 2024-12-10: Use taskService to create tasks in worker_tasks table
|
)
|
||||||
const { taskService } = await import('../tasks/task-service');
|
LIMIT $4::integer
|
||||||
|
RETURNING id, dispensary_id
|
||||||
const tasks = dispensary_ids.map((dispensary_id: number) => ({
|
`, [state_code.toUpperCase(), job_type, priority, limit]);
|
||||||
role: role as any,
|
|
||||||
dispensary_id,
|
|
||||||
priority,
|
|
||||||
}));
|
|
||||||
|
|
||||||
const createdCount = await taskService.createTasks(tasks);
|
|
||||||
|
|
||||||
// Get total available count
|
// Get total available count
|
||||||
const countResult = await pool.query(`
|
const countResult = await pool.query(`
|
||||||
|
WITH target_state AS (
|
||||||
|
SELECT id FROM states WHERE code = $1
|
||||||
|
)
|
||||||
SELECT COUNT(*) as total
|
SELECT COUNT(*) as total
|
||||||
FROM dispensaries d
|
FROM dispensaries d, target_state
|
||||||
JOIN states s ON s.id = d.state_id
|
WHERE d.state_id = target_state.id
|
||||||
WHERE s.code = $1
|
|
||||||
AND d.crawl_enabled = true
|
AND d.crawl_enabled = true
|
||||||
AND d.platform_dispensary_id IS NOT NULL
|
AND d.platform_dispensary_id IS NOT NULL
|
||||||
`, [state_code.toUpperCase()]);
|
`, [state_code.toUpperCase()]);
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
success: true,
|
success: true,
|
||||||
queued: createdCount,
|
queued: rows.length,
|
||||||
total_available: parseInt(countResult.rows[0].total),
|
total_available: parseInt(countResult.rows[0].total),
|
||||||
state: state_code.toUpperCase(),
|
state: state_code.toUpperCase(),
|
||||||
role,
|
job_type,
|
||||||
message: `Queued ${createdCount} dispensaries for ${state_code.toUpperCase()}`
|
message: `Queued ${rows.length} dispensaries for ${state_code.toUpperCase()}`
|
||||||
});
|
});
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
console.error('[JobQueue] Error enqueuing state:', error);
|
console.error('[JobQueue] Error enqueuing state:', error);
|
||||||
|
|||||||
@@ -1,145 +0,0 @@
|
|||||||
/**
|
|
||||||
* Kubernetes Control Routes
|
|
||||||
*
|
|
||||||
* Provides admin UI control over k8s resources like worker scaling.
|
|
||||||
* Uses in-cluster config when running in k8s, or kubeconfig locally.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { Router, Request, Response } from 'express';
|
|
||||||
import * as k8s from '@kubernetes/client-node';
|
|
||||||
import { authMiddleware, requireRole } from '../auth/middleware';
|
|
||||||
|
|
||||||
const router = Router();
|
|
||||||
|
|
||||||
// K8s control routes require authentication and admin role
|
|
||||||
router.use(authMiddleware);
|
|
||||||
router.use(requireRole('admin', 'superadmin'));
|
|
||||||
|
|
||||||
// K8s client setup - lazy initialization
|
|
||||||
let appsApi: k8s.AppsV1Api | null = null;
|
|
||||||
let k8sError: string | null = null;
|
|
||||||
|
|
||||||
function getK8sClient(): k8s.AppsV1Api | null {
|
|
||||||
if (appsApi) return appsApi;
|
|
||||||
if (k8sError) return null;
|
|
||||||
|
|
||||||
try {
|
|
||||||
const kc = new k8s.KubeConfig();
|
|
||||||
|
|
||||||
// Try in-cluster config first (when running in k8s)
|
|
||||||
try {
|
|
||||||
kc.loadFromCluster();
|
|
||||||
console.log('[K8s] Loaded in-cluster config');
|
|
||||||
} catch {
|
|
||||||
// Fall back to default kubeconfig (local dev)
|
|
||||||
try {
|
|
||||||
kc.loadFromDefault();
|
|
||||||
console.log('[K8s] Loaded default kubeconfig');
|
|
||||||
} catch (e) {
|
|
||||||
k8sError = 'No k8s config available';
|
|
||||||
console.log('[K8s] No config available - k8s routes disabled');
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
appsApi = kc.makeApiClient(k8s.AppsV1Api);
|
|
||||||
return appsApi;
|
|
||||||
} catch (e: any) {
|
|
||||||
k8sError = e.message;
|
|
||||||
console.error('[K8s] Failed to initialize client:', e.message);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const NAMESPACE = process.env.K8S_NAMESPACE || 'dispensary-scraper';
|
|
||||||
const WORKER_DEPLOYMENT = 'scraper-worker';
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/k8s/workers
|
|
||||||
* Get current worker deployment status
|
|
||||||
*/
|
|
||||||
router.get('/workers', async (_req: Request, res: Response) => {
|
|
||||||
const client = getK8sClient();
|
|
||||||
|
|
||||||
if (!client) {
|
|
||||||
return res.json({
|
|
||||||
success: true,
|
|
||||||
available: false,
|
|
||||||
error: k8sError || 'K8s not available',
|
|
||||||
replicas: 0,
|
|
||||||
readyReplicas: 0,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const deployment = await client.readNamespacedDeployment({
|
|
||||||
name: WORKER_DEPLOYMENT,
|
|
||||||
namespace: NAMESPACE,
|
|
||||||
});
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
available: true,
|
|
||||||
replicas: deployment.spec?.replicas || 0,
|
|
||||||
readyReplicas: deployment.status?.readyReplicas || 0,
|
|
||||||
availableReplicas: deployment.status?.availableReplicas || 0,
|
|
||||||
updatedReplicas: deployment.status?.updatedReplicas || 0,
|
|
||||||
});
|
|
||||||
} catch (e: any) {
|
|
||||||
console.error('[K8s] Error getting deployment:', e.message);
|
|
||||||
res.status(500).json({
|
|
||||||
success: false,
|
|
||||||
error: e.message,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/k8s/workers/scale
|
|
||||||
* Scale worker deployment
|
|
||||||
* Body: { replicas: number }
|
|
||||||
*/
|
|
||||||
router.post('/workers/scale', async (req: Request, res: Response) => {
|
|
||||||
const client = getK8sClient();
|
|
||||||
|
|
||||||
if (!client) {
|
|
||||||
return res.status(503).json({
|
|
||||||
success: false,
|
|
||||||
error: k8sError || 'K8s not available',
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
const { replicas } = req.body;
|
|
||||||
|
|
||||||
if (typeof replicas !== 'number' || replicas < 0 || replicas > 50) {
|
|
||||||
return res.status(400).json({
|
|
||||||
success: false,
|
|
||||||
error: 'replicas must be a number between 0 and 50',
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Patch the deployment to set replicas
|
|
||||||
await client.patchNamespacedDeploymentScale({
|
|
||||||
name: WORKER_DEPLOYMENT,
|
|
||||||
namespace: NAMESPACE,
|
|
||||||
body: { spec: { replicas } },
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log(`[K8s] Scaled ${WORKER_DEPLOYMENT} to ${replicas} replicas`);
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
replicas,
|
|
||||||
message: `Scaled to ${replicas} workers`,
|
|
||||||
});
|
|
||||||
} catch (e: any) {
|
|
||||||
console.error('[K8s] Error scaling deployment:', e.message);
|
|
||||||
res.status(500).json({
|
|
||||||
success: false,
|
|
||||||
error: e.message,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
export default router;
|
|
||||||
@@ -291,107 +291,6 @@ router.get('/stores/:id/summary', async (req: Request, res: Response) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/markets/stores/:id/crawl-history
|
|
||||||
* Get crawl history for a specific store
|
|
||||||
*/
|
|
||||||
router.get('/stores/:id/crawl-history', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const { id } = req.params;
|
|
||||||
const { limit = '50' } = req.query;
|
|
||||||
const dispensaryId = parseInt(id, 10);
|
|
||||||
const limitNum = Math.min(parseInt(limit as string, 10), 100);
|
|
||||||
|
|
||||||
// Get crawl history from crawl_orchestration_traces
|
|
||||||
const { rows: historyRows } = await pool.query(`
|
|
||||||
SELECT
|
|
||||||
id,
|
|
||||||
run_id,
|
|
||||||
profile_key,
|
|
||||||
crawler_module,
|
|
||||||
state_at_start,
|
|
||||||
state_at_end,
|
|
||||||
total_steps,
|
|
||||||
duration_ms,
|
|
||||||
success,
|
|
||||||
error_message,
|
|
||||||
products_found,
|
|
||||||
started_at,
|
|
||||||
completed_at
|
|
||||||
FROM crawl_orchestration_traces
|
|
||||||
WHERE dispensary_id = $1
|
|
||||||
ORDER BY started_at DESC
|
|
||||||
LIMIT $2
|
|
||||||
`, [dispensaryId, limitNum]);
|
|
||||||
|
|
||||||
// Get next scheduled crawl if available
|
|
||||||
const { rows: scheduleRows } = await pool.query(`
|
|
||||||
SELECT
|
|
||||||
js.id as schedule_id,
|
|
||||||
js.job_name,
|
|
||||||
js.enabled,
|
|
||||||
js.base_interval_minutes,
|
|
||||||
js.jitter_minutes,
|
|
||||||
js.next_run_at,
|
|
||||||
js.last_run_at,
|
|
||||||
js.last_status
|
|
||||||
FROM job_schedules js
|
|
||||||
WHERE js.enabled = true
|
|
||||||
AND js.job_config->>'dispensaryId' = $1::text
|
|
||||||
ORDER BY js.next_run_at
|
|
||||||
LIMIT 1
|
|
||||||
`, [dispensaryId.toString()]);
|
|
||||||
|
|
||||||
// Get dispensary info for slug
|
|
||||||
const { rows: dispRows } = await pool.query(`
|
|
||||||
SELECT
|
|
||||||
id,
|
|
||||||
name,
|
|
||||||
dba_name,
|
|
||||||
slug,
|
|
||||||
state,
|
|
||||||
city,
|
|
||||||
menu_type,
|
|
||||||
platform_dispensary_id,
|
|
||||||
last_menu_scrape
|
|
||||||
FROM dispensaries
|
|
||||||
WHERE id = $1
|
|
||||||
`, [dispensaryId]);
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
dispensary: dispRows[0] || null,
|
|
||||||
history: historyRows.map(row => ({
|
|
||||||
id: row.id,
|
|
||||||
runId: row.run_id,
|
|
||||||
profileKey: row.profile_key,
|
|
||||||
crawlerModule: row.crawler_module,
|
|
||||||
stateAtStart: row.state_at_start,
|
|
||||||
stateAtEnd: row.state_at_end,
|
|
||||||
totalSteps: row.total_steps,
|
|
||||||
durationMs: row.duration_ms,
|
|
||||||
success: row.success,
|
|
||||||
errorMessage: row.error_message,
|
|
||||||
productsFound: row.products_found,
|
|
||||||
startedAt: row.started_at?.toISOString() || null,
|
|
||||||
completedAt: row.completed_at?.toISOString() || null,
|
|
||||||
})),
|
|
||||||
nextSchedule: scheduleRows[0] ? {
|
|
||||||
scheduleId: scheduleRows[0].schedule_id,
|
|
||||||
jobName: scheduleRows[0].job_name,
|
|
||||||
enabled: scheduleRows[0].enabled,
|
|
||||||
baseIntervalMinutes: scheduleRows[0].base_interval_minutes,
|
|
||||||
jitterMinutes: scheduleRows[0].jitter_minutes,
|
|
||||||
nextRunAt: scheduleRows[0].next_run_at?.toISOString() || null,
|
|
||||||
lastRunAt: scheduleRows[0].last_run_at?.toISOString() || null,
|
|
||||||
lastStatus: scheduleRows[0].last_status,
|
|
||||||
} : null,
|
|
||||||
});
|
|
||||||
} catch (error: any) {
|
|
||||||
console.error('[Markets] Error fetching crawl history:', error.message);
|
|
||||||
res.status(500).json({ error: error.message });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/markets/stores/:id/products
|
* GET /api/markets/stores/:id/products
|
||||||
* Get products for a store with filtering and pagination
|
* Get products for a store with filtering and pagination
|
||||||
|
|||||||
@@ -11,14 +11,9 @@ import { getLatestTrace, getTracesForDispensary, getTraceById } from '../service
|
|||||||
import { getProviderDisplayName } from '../utils/provider-display';
|
import { getProviderDisplayName } from '../utils/provider-display';
|
||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
import { authMiddleware, requireRole } from '../auth/middleware';
|
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
// Orchestrator admin routes require authentication and admin role
|
|
||||||
router.use(authMiddleware);
|
|
||||||
router.use(requireRole('admin', 'superadmin'));
|
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// ORCHESTRATOR METRICS
|
// ORCHESTRATOR METRICS
|
||||||
// ============================================================
|
// ============================================================
|
||||||
@@ -83,14 +78,14 @@ router.get('/metrics', async (_req: Request, res: Response) => {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/admin/orchestrator/states
|
* GET /api/admin/orchestrator/states
|
||||||
* Returns array of states with at least one crawl-enabled dispensary
|
* Returns array of states with at least one known dispensary
|
||||||
*/
|
*/
|
||||||
router.get('/states', async (_req: Request, res: Response) => {
|
router.get('/states', async (_req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
const { rows } = await pool.query(`
|
const { rows } = await pool.query(`
|
||||||
SELECT DISTINCT state, COUNT(*) as store_count
|
SELECT DISTINCT state, COUNT(*) as store_count
|
||||||
FROM dispensaries
|
FROM dispensaries
|
||||||
WHERE state IS NOT NULL AND crawl_enabled = true
|
WHERE state IS NOT NULL
|
||||||
GROUP BY state
|
GROUP BY state
|
||||||
ORDER BY state
|
ORDER BY state
|
||||||
`);
|
`);
|
||||||
|
|||||||
@@ -1,338 +0,0 @@
|
|||||||
/**
|
|
||||||
* Payload Routes
|
|
||||||
*
|
|
||||||
* Per TASK_WORKFLOW_2024-12-10.md: API access to raw crawl payloads.
|
|
||||||
*
|
|
||||||
* Endpoints:
|
|
||||||
* - GET /api/payloads - List payload metadata (paginated)
|
|
||||||
* - GET /api/payloads/:id - Get payload metadata by ID
|
|
||||||
* - GET /api/payloads/:id/data - Get full payload JSON
|
|
||||||
* - GET /api/payloads/store/:dispensaryId - List payloads for a store
|
|
||||||
* - GET /api/payloads/store/:dispensaryId/latest - Get latest payload for a store
|
|
||||||
* - GET /api/payloads/store/:dispensaryId/diff - Diff two payloads
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { Router, Request, Response } from 'express';
|
|
||||||
import { getPool } from '../db/pool';
|
|
||||||
import {
|
|
||||||
loadRawPayloadById,
|
|
||||||
getLatestPayload,
|
|
||||||
getRecentPayloads,
|
|
||||||
listPayloadMetadata,
|
|
||||||
} from '../utils/payload-storage';
|
|
||||||
import { Pool } from 'pg';
|
|
||||||
import { authMiddleware } from '../auth/middleware';
|
|
||||||
|
|
||||||
const router = Router();
|
|
||||||
|
|
||||||
// All payload routes require authentication (trusted origins or API token)
|
|
||||||
router.use(authMiddleware);
|
|
||||||
|
|
||||||
// Get pool instance for queries
|
|
||||||
const getDbPool = (): Pool => getPool() as unknown as Pool;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/payloads
|
|
||||||
* List payload metadata (paginated)
|
|
||||||
*/
|
|
||||||
router.get('/', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const pool = getDbPool();
|
|
||||||
const limit = Math.min(parseInt(req.query.limit as string) || 50, 100);
|
|
||||||
const offset = parseInt(req.query.offset as string) || 0;
|
|
||||||
const dispensaryId = req.query.dispensary_id ? parseInt(req.query.dispensary_id as string) : undefined;
|
|
||||||
|
|
||||||
const payloads = await listPayloadMetadata(pool, {
|
|
||||||
dispensaryId,
|
|
||||||
limit,
|
|
||||||
offset,
|
|
||||||
});
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
payloads,
|
|
||||||
pagination: { limit, offset },
|
|
||||||
});
|
|
||||||
} catch (error: any) {
|
|
||||||
console.error('[Payloads] List error:', error.message);
|
|
||||||
res.status(500).json({ success: false, error: error.message });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/payloads/:id
|
|
||||||
* Get payload metadata by ID
|
|
||||||
*/
|
|
||||||
router.get('/:id', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const pool = getDbPool();
|
|
||||||
const id = parseInt(req.params.id);
|
|
||||||
|
|
||||||
const result = await pool.query(`
|
|
||||||
SELECT
|
|
||||||
p.id,
|
|
||||||
p.dispensary_id,
|
|
||||||
p.crawl_run_id,
|
|
||||||
p.storage_path,
|
|
||||||
p.product_count,
|
|
||||||
p.size_bytes,
|
|
||||||
p.size_bytes_raw,
|
|
||||||
p.fetched_at,
|
|
||||||
p.processed_at,
|
|
||||||
p.checksum_sha256,
|
|
||||||
d.name as dispensary_name
|
|
||||||
FROM raw_crawl_payloads p
|
|
||||||
LEFT JOIN dispensaries d ON d.id = p.dispensary_id
|
|
||||||
WHERE p.id = $1
|
|
||||||
`, [id]);
|
|
||||||
|
|
||||||
if (result.rows.length === 0) {
|
|
||||||
return res.status(404).json({ success: false, error: 'Payload not found' });
|
|
||||||
}
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
payload: result.rows[0],
|
|
||||||
});
|
|
||||||
} catch (error: any) {
|
|
||||||
console.error('[Payloads] Get error:', error.message);
|
|
||||||
res.status(500).json({ success: false, error: error.message });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/payloads/:id/data
|
|
||||||
* Get full payload JSON (decompressed from disk)
|
|
||||||
*/
|
|
||||||
router.get('/:id/data', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const pool = getDbPool();
|
|
||||||
const id = parseInt(req.params.id);
|
|
||||||
|
|
||||||
const result = await loadRawPayloadById(pool, id);
|
|
||||||
|
|
||||||
if (!result) {
|
|
||||||
return res.status(404).json({ success: false, error: 'Payload not found' });
|
|
||||||
}
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
metadata: result.metadata,
|
|
||||||
data: result.payload,
|
|
||||||
});
|
|
||||||
} catch (error: any) {
|
|
||||||
console.error('[Payloads] Get data error:', error.message);
|
|
||||||
res.status(500).json({ success: false, error: error.message });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/payloads/store/:dispensaryId
|
|
||||||
* List payloads for a specific store
|
|
||||||
*/
|
|
||||||
router.get('/store/:dispensaryId', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const pool = getDbPool();
|
|
||||||
const dispensaryId = parseInt(req.params.dispensaryId);
|
|
||||||
const limit = Math.min(parseInt(req.query.limit as string) || 20, 100);
|
|
||||||
const offset = parseInt(req.query.offset as string) || 0;
|
|
||||||
|
|
||||||
const payloads = await listPayloadMetadata(pool, {
|
|
||||||
dispensaryId,
|
|
||||||
limit,
|
|
||||||
offset,
|
|
||||||
});
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
dispensaryId,
|
|
||||||
payloads,
|
|
||||||
pagination: { limit, offset },
|
|
||||||
});
|
|
||||||
} catch (error: any) {
|
|
||||||
console.error('[Payloads] Store list error:', error.message);
|
|
||||||
res.status(500).json({ success: false, error: error.message });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/payloads/store/:dispensaryId/latest
|
|
||||||
* Get the latest payload for a store (with full data)
|
|
||||||
*/
|
|
||||||
router.get('/store/:dispensaryId/latest', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const pool = getDbPool();
|
|
||||||
const dispensaryId = parseInt(req.params.dispensaryId);
|
|
||||||
|
|
||||||
const result = await getLatestPayload(pool, dispensaryId);
|
|
||||||
|
|
||||||
if (!result) {
|
|
||||||
return res.status(404).json({
|
|
||||||
success: false,
|
|
||||||
error: `No payloads found for dispensary ${dispensaryId}`,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
metadata: result.metadata,
|
|
||||||
data: result.payload,
|
|
||||||
});
|
|
||||||
} catch (error: any) {
|
|
||||||
console.error('[Payloads] Latest error:', error.message);
|
|
||||||
res.status(500).json({ success: false, error: error.message });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/payloads/store/:dispensaryId/diff
|
|
||||||
* Compare two payloads for a store
|
|
||||||
*
|
|
||||||
* Query params:
|
|
||||||
* - from: payload ID (older)
|
|
||||||
* - to: payload ID (newer) - optional, defaults to latest
|
|
||||||
*/
|
|
||||||
router.get('/store/:dispensaryId/diff', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const pool = getDbPool();
|
|
||||||
const dispensaryId = parseInt(req.params.dispensaryId);
|
|
||||||
const fromId = req.query.from ? parseInt(req.query.from as string) : undefined;
|
|
||||||
const toId = req.query.to ? parseInt(req.query.to as string) : undefined;
|
|
||||||
|
|
||||||
let fromPayload: any;
|
|
||||||
let toPayload: any;
|
|
||||||
|
|
||||||
if (fromId && toId) {
|
|
||||||
// Load specific payloads
|
|
||||||
const [from, to] = await Promise.all([
|
|
||||||
loadRawPayloadById(pool, fromId),
|
|
||||||
loadRawPayloadById(pool, toId),
|
|
||||||
]);
|
|
||||||
fromPayload = from;
|
|
||||||
toPayload = to;
|
|
||||||
} else {
|
|
||||||
// Load two most recent
|
|
||||||
const recent = await getRecentPayloads(pool, dispensaryId, 2);
|
|
||||||
if (recent.length < 2) {
|
|
||||||
return res.status(400).json({
|
|
||||||
success: false,
|
|
||||||
error: 'Need at least 2 payloads to diff. Only found ' + recent.length,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
toPayload = recent[0]; // Most recent
|
|
||||||
fromPayload = recent[1]; // Previous
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!fromPayload || !toPayload) {
|
|
||||||
return res.status(404).json({ success: false, error: 'One or both payloads not found' });
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build product maps by ID
|
|
||||||
const fromProducts = new Map<string, any>();
|
|
||||||
const toProducts = new Map<string, any>();
|
|
||||||
|
|
||||||
for (const p of fromPayload.payload.products || []) {
|
|
||||||
const id = p._id || p.id;
|
|
||||||
if (id) fromProducts.set(id, p);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const p of toPayload.payload.products || []) {
|
|
||||||
const id = p._id || p.id;
|
|
||||||
if (id) toProducts.set(id, p);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find differences
|
|
||||||
const added: any[] = [];
|
|
||||||
const removed: any[] = [];
|
|
||||||
const priceChanges: any[] = [];
|
|
||||||
const stockChanges: any[] = [];
|
|
||||||
|
|
||||||
// Products in "to" but not in "from" = added
|
|
||||||
for (const [id, product] of toProducts) {
|
|
||||||
if (!fromProducts.has(id)) {
|
|
||||||
added.push({
|
|
||||||
id,
|
|
||||||
name: product.name,
|
|
||||||
brand: product.brand?.name,
|
|
||||||
price: product.Prices?.[0]?.price,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Products in "from" but not in "to" = removed
|
|
||||||
for (const [id, product] of fromProducts) {
|
|
||||||
if (!toProducts.has(id)) {
|
|
||||||
removed.push({
|
|
||||||
id,
|
|
||||||
name: product.name,
|
|
||||||
brand: product.brand?.name,
|
|
||||||
price: product.Prices?.[0]?.price,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Products in both - check for changes
|
|
||||||
for (const [id, toProduct] of toProducts) {
|
|
||||||
const fromProduct = fromProducts.get(id);
|
|
||||||
if (!fromProduct) continue;
|
|
||||||
|
|
||||||
const fromPrice = fromProduct.Prices?.[0]?.price;
|
|
||||||
const toPrice = toProduct.Prices?.[0]?.price;
|
|
||||||
|
|
||||||
if (fromPrice !== toPrice) {
|
|
||||||
priceChanges.push({
|
|
||||||
id,
|
|
||||||
name: toProduct.name,
|
|
||||||
brand: toProduct.brand?.name,
|
|
||||||
oldPrice: fromPrice,
|
|
||||||
newPrice: toPrice,
|
|
||||||
change: toPrice && fromPrice ? toPrice - fromPrice : null,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
const fromStock = fromProduct.Status || fromProduct.status;
|
|
||||||
const toStock = toProduct.Status || toProduct.status;
|
|
||||||
|
|
||||||
if (fromStock !== toStock) {
|
|
||||||
stockChanges.push({
|
|
||||||
id,
|
|
||||||
name: toProduct.name,
|
|
||||||
brand: toProduct.brand?.name,
|
|
||||||
oldStatus: fromStock,
|
|
||||||
newStatus: toStock,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
from: {
|
|
||||||
id: fromPayload.metadata.id,
|
|
||||||
fetchedAt: fromPayload.metadata.fetchedAt,
|
|
||||||
productCount: fromPayload.metadata.productCount,
|
|
||||||
},
|
|
||||||
to: {
|
|
||||||
id: toPayload.metadata.id,
|
|
||||||
fetchedAt: toPayload.metadata.fetchedAt,
|
|
||||||
productCount: toPayload.metadata.productCount,
|
|
||||||
},
|
|
||||||
diff: {
|
|
||||||
added: added.length,
|
|
||||||
removed: removed.length,
|
|
||||||
priceChanges: priceChanges.length,
|
|
||||||
stockChanges: stockChanges.length,
|
|
||||||
},
|
|
||||||
details: {
|
|
||||||
added,
|
|
||||||
removed,
|
|
||||||
priceChanges,
|
|
||||||
stockChanges,
|
|
||||||
},
|
|
||||||
});
|
|
||||||
} catch (error: any) {
|
|
||||||
console.error('[Payloads] Diff error:', error.message);
|
|
||||||
res.status(500).json({ success: false, error: error.message });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
export default router;
|
|
||||||
@@ -18,14 +18,9 @@
|
|||||||
|
|
||||||
import { Router, Request, Response } from 'express';
|
import { Router, Request, Response } from 'express';
|
||||||
import { pool } from '../db/pool';
|
import { pool } from '../db/pool';
|
||||||
import { authMiddleware, requireRole } from '../auth/middleware';
|
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
// Pipeline routes require authentication and admin role
|
|
||||||
router.use(authMiddleware);
|
|
||||||
router.use(requireRole('admin', 'superadmin'));
|
|
||||||
|
|
||||||
// Valid stages
|
// Valid stages
|
||||||
const STAGES = ['discovered', 'validated', 'promoted', 'sandbox', 'production', 'failing'] as const;
|
const STAGES = ['discovered', 'validated', 'promoted', 'sandbox', 'production', 'failing'] as const;
|
||||||
type Stage = typeof STAGES[number];
|
type Stage = typeof STAGES[number];
|
||||||
|
|||||||
@@ -183,8 +183,8 @@ router.post('/test-all', requireRole('superadmin', 'admin'), async (req, res) =>
|
|||||||
return res.status(400).json({ error: 'Concurrency must be between 1 and 50' });
|
return res.status(400).json({ error: 'Concurrency must be between 1 and 50' });
|
||||||
}
|
}
|
||||||
|
|
||||||
const { jobId, totalProxies } = await createProxyTestJob(mode, concurrency);
|
const jobId = await createProxyTestJob(mode, concurrency);
|
||||||
res.json({ jobId, total: totalProxies, mode, concurrency, message: `Proxy test job started (mode: ${mode}, concurrency: ${concurrency})` });
|
res.json({ jobId, mode, concurrency, message: `Proxy test job started (mode: ${mode}, concurrency: ${concurrency})` });
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
console.error('Error starting proxy test job:', error);
|
console.error('Error starting proxy test job:', error);
|
||||||
res.status(500).json({ error: error.message || 'Failed to start proxy test job' });
|
res.status(500).json({ error: error.message || 'Failed to start proxy test job' });
|
||||||
@@ -195,8 +195,8 @@ router.post('/test-all', requireRole('superadmin', 'admin'), async (req, res) =>
|
|||||||
router.post('/test-failed', requireRole('superadmin', 'admin'), async (req, res) => {
|
router.post('/test-failed', requireRole('superadmin', 'admin'), async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const concurrency = parseInt(req.query.concurrency as string) || 10;
|
const concurrency = parseInt(req.query.concurrency as string) || 10;
|
||||||
const { jobId, totalProxies } = await createProxyTestJob('failed', concurrency);
|
const jobId = await createProxyTestJob('failed', concurrency);
|
||||||
res.json({ jobId, total: totalProxies, mode: 'failed', concurrency, message: 'Retesting failed proxies...' });
|
res.json({ jobId, mode: 'failed', concurrency, message: 'Retesting failed proxies...' });
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
console.error('Error starting failed proxy test:', error);
|
console.error('Error starting failed proxy test:', error);
|
||||||
res.status(500).json({ error: error.message || 'Failed to start proxy test job' });
|
res.status(500).json({ error: error.message || 'Failed to start proxy test job' });
|
||||||
@@ -278,7 +278,7 @@ router.post('/update-locations', requireRole('superadmin', 'admin'), async (req,
|
|||||||
|
|
||||||
// Run in background
|
// Run in background
|
||||||
updateAllProxyLocations().catch(err => {
|
updateAllProxyLocations().catch(err => {
|
||||||
console.error('Location update failed:', err);
|
console.error('❌ Location update failed:', err);
|
||||||
});
|
});
|
||||||
|
|
||||||
res.json({ message: 'Location update job started' });
|
res.json({ message: 'Location update job started' });
|
||||||
|
|||||||
@@ -130,12 +130,6 @@ const CONSUMER_TRUSTED_ORIGINS = [
|
|||||||
'http://localhost:3002',
|
'http://localhost:3002',
|
||||||
];
|
];
|
||||||
|
|
||||||
// Wildcard trusted origin patterns (*.domain.com)
|
|
||||||
const CONSUMER_TRUSTED_PATTERNS = [
|
|
||||||
/^https:\/\/([a-z0-9-]+\.)?cannaiq\.co$/,
|
|
||||||
/^https:\/\/([a-z0-9-]+\.)?cannabrands\.app$/,
|
|
||||||
];
|
|
||||||
|
|
||||||
// Trusted IPs for local development (bypass API key auth)
|
// Trusted IPs for local development (bypass API key auth)
|
||||||
const TRUSTED_IPS = ['127.0.0.1', '::1', '::ffff:127.0.0.1'];
|
const TRUSTED_IPS = ['127.0.0.1', '::1', '::ffff:127.0.0.1'];
|
||||||
|
|
||||||
@@ -156,17 +150,8 @@ function isConsumerTrustedRequest(req: Request): boolean {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
const origin = req.headers.origin;
|
const origin = req.headers.origin;
|
||||||
if (origin) {
|
if (origin && CONSUMER_TRUSTED_ORIGINS.includes(origin)) {
|
||||||
// Check exact matches
|
return true;
|
||||||
if (CONSUMER_TRUSTED_ORIGINS.includes(origin)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
// Check wildcard patterns
|
|
||||||
for (const pattern of CONSUMER_TRUSTED_PATTERNS) {
|
|
||||||
if (pattern.test(origin)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
const referer = req.headers.referer;
|
const referer = req.headers.referer;
|
||||||
if (referer) {
|
if (referer) {
|
||||||
@@ -175,18 +160,6 @@ function isConsumerTrustedRequest(req: Request): boolean {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Check wildcard patterns against referer origin
|
|
||||||
try {
|
|
||||||
const refererUrl = new URL(referer);
|
|
||||||
const refererOrigin = refererUrl.origin;
|
|
||||||
for (const pattern of CONSUMER_TRUSTED_PATTERNS) {
|
|
||||||
if (pattern.test(refererOrigin)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
// Invalid referer URL, ignore
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,20 +13,9 @@ import {
|
|||||||
TaskFilter,
|
TaskFilter,
|
||||||
} from '../tasks/task-service';
|
} from '../tasks/task-service';
|
||||||
import { pool } from '../db/pool';
|
import { pool } from '../db/pool';
|
||||||
import {
|
|
||||||
isTaskPoolPaused,
|
|
||||||
pauseTaskPool,
|
|
||||||
resumeTaskPool,
|
|
||||||
getTaskPoolStatus,
|
|
||||||
} from '../tasks/task-pool-state';
|
|
||||||
import { authMiddleware, requireRole } from '../auth/middleware';
|
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
// Task routes require authentication and admin role
|
|
||||||
router.use(authMiddleware);
|
|
||||||
router.use(requireRole('admin', 'superadmin'));
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/tasks
|
* GET /api/tasks
|
||||||
* List tasks with optional filters
|
* List tasks with optional filters
|
||||||
@@ -156,36 +145,6 @@ router.get('/:id', async (req: Request, res: Response) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
/**
|
|
||||||
* DELETE /api/tasks/:id
|
|
||||||
* Delete a specific task by ID
|
|
||||||
* Only allows deletion of failed, completed, or pending tasks (not running)
|
|
||||||
*/
|
|
||||||
router.delete('/:id', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const taskId = parseInt(req.params.id, 10);
|
|
||||||
|
|
||||||
// First check if task exists and its status
|
|
||||||
const task = await taskService.getTask(taskId);
|
|
||||||
if (!task) {
|
|
||||||
return res.status(404).json({ error: 'Task not found' });
|
|
||||||
}
|
|
||||||
|
|
||||||
// Don't allow deleting running tasks
|
|
||||||
if (task.status === 'running' || task.status === 'claimed') {
|
|
||||||
return res.status(400).json({ error: 'Cannot delete a running or claimed task' });
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete the task
|
|
||||||
await pool.query('DELETE FROM worker_tasks WHERE id = $1', [taskId]);
|
|
||||||
|
|
||||||
res.json({ success: true, message: `Task ${taskId} deleted` });
|
|
||||||
} catch (error: unknown) {
|
|
||||||
console.error('Error deleting task:', error);
|
|
||||||
res.status(500).json({ error: 'Failed to delete task' });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* POST /api/tasks
|
* POST /api/tasks
|
||||||
* Create a new task
|
* Create a new task
|
||||||
@@ -603,42 +562,4 @@ router.post('/migration/full-migrate', async (req: Request, res: Response) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/tasks/pool/status
|
|
||||||
* Check if task pool is paused
|
|
||||||
*/
|
|
||||||
router.get('/pool/status', async (_req: Request, res: Response) => {
|
|
||||||
const status = getTaskPoolStatus();
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
...status,
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/tasks/pool/pause
|
|
||||||
* Pause the task pool - workers won't pick up new tasks
|
|
||||||
*/
|
|
||||||
router.post('/pool/pause', async (_req: Request, res: Response) => {
|
|
||||||
pauseTaskPool();
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
paused: true,
|
|
||||||
message: 'Task pool paused - workers will not pick up new tasks',
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/tasks/pool/resume
|
|
||||||
* Resume the task pool - workers will pick up tasks again
|
|
||||||
*/
|
|
||||||
router.post('/pool/resume', async (_req: Request, res: Response) => {
|
|
||||||
resumeTaskPool();
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
paused: false,
|
|
||||||
message: 'Task pool resumed - workers will pick up new tasks',
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
export default router;
|
export default router;
|
||||||
|
|||||||
@@ -14,36 +14,23 @@ router.get('/', async (req: AuthRequest, res) => {
|
|||||||
try {
|
try {
|
||||||
const { search, domain } = req.query;
|
const { search, domain } = req.query;
|
||||||
|
|
||||||
// Check which columns exist (schema-tolerant)
|
let query = `
|
||||||
const columnsResult = await pool.query(`
|
SELECT id, email, role, first_name, last_name, phone, domain, created_at, updated_at
|
||||||
SELECT column_name FROM information_schema.columns
|
FROM users
|
||||||
WHERE table_name = 'users' AND column_name IN ('first_name', 'last_name', 'phone', 'domain')
|
WHERE 1=1
|
||||||
`);
|
`;
|
||||||
const existingColumns = new Set(columnsResult.rows.map((r: any) => r.column_name));
|
|
||||||
|
|
||||||
// Build column list based on what exists
|
|
||||||
const selectCols = ['id', 'email', 'role', 'created_at', 'updated_at'];
|
|
||||||
if (existingColumns.has('first_name')) selectCols.push('first_name');
|
|
||||||
if (existingColumns.has('last_name')) selectCols.push('last_name');
|
|
||||||
if (existingColumns.has('phone')) selectCols.push('phone');
|
|
||||||
if (existingColumns.has('domain')) selectCols.push('domain');
|
|
||||||
|
|
||||||
let query = `SELECT ${selectCols.join(', ')} FROM users WHERE 1=1`;
|
|
||||||
const params: any[] = [];
|
const params: any[] = [];
|
||||||
let paramIndex = 1;
|
let paramIndex = 1;
|
||||||
|
|
||||||
// Search by email (and optionally first_name, last_name if they exist)
|
// Search by email, first_name, or last_name
|
||||||
if (search && typeof search === 'string') {
|
if (search && typeof search === 'string') {
|
||||||
const searchClauses = ['email ILIKE $' + paramIndex];
|
query += ` AND (email ILIKE $${paramIndex} OR first_name ILIKE $${paramIndex} OR last_name ILIKE $${paramIndex})`;
|
||||||
if (existingColumns.has('first_name')) searchClauses.push('first_name ILIKE $' + paramIndex);
|
|
||||||
if (existingColumns.has('last_name')) searchClauses.push('last_name ILIKE $' + paramIndex);
|
|
||||||
query += ` AND (${searchClauses.join(' OR ')})`;
|
|
||||||
params.push(`%${search}%`);
|
params.push(`%${search}%`);
|
||||||
paramIndex++;
|
paramIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter by domain (if column exists)
|
// Filter by domain
|
||||||
if (domain && typeof domain === 'string' && existingColumns.has('domain')) {
|
if (domain && typeof domain === 'string') {
|
||||||
query += ` AND domain = $${paramIndex}`;
|
query += ` AND domain = $${paramIndex}`;
|
||||||
params.push(domain);
|
params.push(domain);
|
||||||
paramIndex++;
|
paramIndex++;
|
||||||
@@ -63,22 +50,8 @@ router.get('/', async (req: AuthRequest, res) => {
|
|||||||
router.get('/:id', async (req: AuthRequest, res) => {
|
router.get('/:id', async (req: AuthRequest, res) => {
|
||||||
try {
|
try {
|
||||||
const { id } = req.params;
|
const { id } = req.params;
|
||||||
|
|
||||||
// Check which columns exist (schema-tolerant)
|
|
||||||
const columnsResult = await pool.query(`
|
|
||||||
SELECT column_name FROM information_schema.columns
|
|
||||||
WHERE table_name = 'users' AND column_name IN ('first_name', 'last_name', 'phone', 'domain')
|
|
||||||
`);
|
|
||||||
const existingColumns = new Set(columnsResult.rows.map((r: any) => r.column_name));
|
|
||||||
|
|
||||||
const selectCols = ['id', 'email', 'role', 'created_at', 'updated_at'];
|
|
||||||
if (existingColumns.has('first_name')) selectCols.push('first_name');
|
|
||||||
if (existingColumns.has('last_name')) selectCols.push('last_name');
|
|
||||||
if (existingColumns.has('phone')) selectCols.push('phone');
|
|
||||||
if (existingColumns.has('domain')) selectCols.push('domain');
|
|
||||||
|
|
||||||
const result = await pool.query(`
|
const result = await pool.query(`
|
||||||
SELECT ${selectCols.join(', ')}
|
SELECT id, email, role, first_name, last_name, phone, domain, created_at, updated_at
|
||||||
FROM users
|
FROM users
|
||||||
WHERE id = $1
|
WHERE id = $1
|
||||||
`, [id]);
|
`, [id]);
|
||||||
|
|||||||
@@ -23,14 +23,9 @@
|
|||||||
import { Router, Request, Response } from 'express';
|
import { Router, Request, Response } from 'express';
|
||||||
import { pool } from '../db/pool';
|
import { pool } from '../db/pool';
|
||||||
import os from 'os';
|
import os from 'os';
|
||||||
import { authMiddleware } from '../auth/middleware';
|
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
// Worker registry routes require authentication
|
|
||||||
// Note: Internal workers (pods) can access via trusted IP (localhost, in-cluster)
|
|
||||||
router.use(authMiddleware);
|
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// WORKER REGISTRATION
|
// WORKER REGISTRATION
|
||||||
// ============================================================
|
// ============================================================
|
||||||
@@ -75,20 +70,21 @@ router.post('/register', async (req: Request, res: Response) => {
|
|||||||
);
|
);
|
||||||
|
|
||||||
if (existing.rows.length > 0) {
|
if (existing.rows.length > 0) {
|
||||||
// Re-activate existing worker - keep existing pod_name (fantasy name), don't overwrite with K8s name
|
// Re-activate existing worker
|
||||||
const { rows } = await pool.query(`
|
const { rows } = await pool.query(`
|
||||||
UPDATE worker_registry
|
UPDATE worker_registry
|
||||||
SET status = 'active',
|
SET status = 'active',
|
||||||
role = $1,
|
role = $1,
|
||||||
hostname = $2,
|
pod_name = $2,
|
||||||
ip_address = $3,
|
hostname = $3,
|
||||||
|
ip_address = $4,
|
||||||
last_heartbeat_at = NOW(),
|
last_heartbeat_at = NOW(),
|
||||||
started_at = NOW(),
|
started_at = NOW(),
|
||||||
metadata = $4,
|
metadata = $5,
|
||||||
updated_at = NOW()
|
updated_at = NOW()
|
||||||
WHERE worker_id = $5
|
WHERE worker_id = $6
|
||||||
RETURNING id, worker_id, friendly_name, pod_name, role
|
RETURNING id, worker_id, friendly_name, role
|
||||||
`, [role, finalHostname, clientIp, metadata, finalWorkerId]);
|
`, [role, pod_name, finalHostname, clientIp, metadata, finalWorkerId]);
|
||||||
|
|
||||||
const worker = rows[0];
|
const worker = rows[0];
|
||||||
const roleMsg = role ? `for ${role}` : 'as role-agnostic';
|
const roleMsg = role ? `for ${role}` : 'as role-agnostic';
|
||||||
@@ -109,13 +105,13 @@ router.post('/register', async (req: Request, res: Response) => {
|
|||||||
const nameResult = await pool.query('SELECT assign_worker_name($1) as name', [finalWorkerId]);
|
const nameResult = await pool.query('SELECT assign_worker_name($1) as name', [finalWorkerId]);
|
||||||
const friendlyName = nameResult.rows[0].name;
|
const friendlyName = nameResult.rows[0].name;
|
||||||
|
|
||||||
// Register the worker - use friendlyName as pod_name (not K8s name)
|
// Register the worker
|
||||||
const { rows } = await pool.query(`
|
const { rows } = await pool.query(`
|
||||||
INSERT INTO worker_registry (
|
INSERT INTO worker_registry (
|
||||||
worker_id, friendly_name, role, pod_name, hostname, ip_address, status, metadata
|
worker_id, friendly_name, role, pod_name, hostname, ip_address, status, metadata
|
||||||
) VALUES ($1, $2, $3, $4, $5, $6, 'active', $7)
|
) VALUES ($1, $2, $3, $4, $5, $6, 'active', $7)
|
||||||
RETURNING id, worker_id, friendly_name, pod_name, role
|
RETURNING id, worker_id, friendly_name, role
|
||||||
`, [finalWorkerId, friendlyName, role, friendlyName, finalHostname, clientIp, metadata]);
|
`, [finalWorkerId, friendlyName, role, pod_name, finalHostname, clientIp, metadata]);
|
||||||
|
|
||||||
const worker = rows[0];
|
const worker = rows[0];
|
||||||
const roleMsg = role ? `for ${role}` : 'as role-agnostic';
|
const roleMsg = role ? `for ${role}` : 'as role-agnostic';
|
||||||
@@ -142,36 +138,17 @@ router.post('/register', async (req: Request, res: Response) => {
|
|||||||
*
|
*
|
||||||
* Body:
|
* Body:
|
||||||
* - worker_id: string (required)
|
* - worker_id: string (required)
|
||||||
* - current_task_id: number (optional) - task currently being processed (primary task)
|
* - current_task_id: number (optional) - task currently being processed
|
||||||
* - current_task_ids: number[] (optional) - all tasks currently being processed (concurrent)
|
|
||||||
* - active_task_count: number (optional) - number of tasks currently running
|
|
||||||
* - max_concurrent_tasks: number (optional) - max concurrent tasks this worker can handle
|
|
||||||
* - status: string (optional) - 'active', 'idle'
|
* - status: string (optional) - 'active', 'idle'
|
||||||
* - resources: object (optional) - memory_mb, cpu_user_ms, cpu_system_ms, etc.
|
|
||||||
*/
|
*/
|
||||||
router.post('/heartbeat', async (req: Request, res: Response) => {
|
router.post('/heartbeat', async (req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
const {
|
const { worker_id, current_task_id, status = 'active', resources } = req.body;
|
||||||
worker_id,
|
|
||||||
current_task_id,
|
|
||||||
current_task_ids,
|
|
||||||
active_task_count,
|
|
||||||
max_concurrent_tasks,
|
|
||||||
status = 'active',
|
|
||||||
resources
|
|
||||||
} = req.body;
|
|
||||||
|
|
||||||
if (!worker_id) {
|
if (!worker_id) {
|
||||||
return res.status(400).json({ success: false, error: 'worker_id is required' });
|
return res.status(400).json({ success: false, error: 'worker_id is required' });
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build metadata object with all the new fields
|
|
||||||
const metadata: Record<string, unknown> = {};
|
|
||||||
if (resources) Object.assign(metadata, resources);
|
|
||||||
if (current_task_ids) metadata.current_task_ids = current_task_ids;
|
|
||||||
if (active_task_count !== undefined) metadata.active_task_count = active_task_count;
|
|
||||||
if (max_concurrent_tasks !== undefined) metadata.max_concurrent_tasks = max_concurrent_tasks;
|
|
||||||
|
|
||||||
// Store resources in metadata jsonb column
|
// Store resources in metadata jsonb column
|
||||||
const { rows } = await pool.query(`
|
const { rows } = await pool.query(`
|
||||||
UPDATE worker_registry
|
UPDATE worker_registry
|
||||||
@@ -182,7 +159,7 @@ router.post('/heartbeat', async (req: Request, res: Response) => {
|
|||||||
updated_at = NOW()
|
updated_at = NOW()
|
||||||
WHERE worker_id = $3
|
WHERE worker_id = $3
|
||||||
RETURNING id, friendly_name, status
|
RETURNING id, friendly_name, status
|
||||||
`, [current_task_id || null, status, worker_id, Object.keys(metadata).length > 0 ? JSON.stringify(metadata) : null]);
|
`, [current_task_id || null, status, worker_id, resources ? JSON.stringify(resources) : null]);
|
||||||
|
|
||||||
if (rows.length === 0) {
|
if (rows.length === 0) {
|
||||||
return res.status(404).json({ success: false, error: 'Worker not found - please register first' });
|
return res.status(404).json({ success: false, error: 'Worker not found - please register first' });
|
||||||
@@ -296,29 +273,6 @@ router.post('/deregister', async (req: Request, res: Response) => {
|
|||||||
*/
|
*/
|
||||||
router.get('/workers', async (req: Request, res: Response) => {
|
router.get('/workers', async (req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
// Check if worker_registry table exists
|
|
||||||
const tableCheck = await pool.query(`
|
|
||||||
SELECT EXISTS (
|
|
||||||
SELECT FROM information_schema.tables
|
|
||||||
WHERE table_name = 'worker_registry'
|
|
||||||
) as exists
|
|
||||||
`);
|
|
||||||
|
|
||||||
if (!tableCheck.rows[0].exists) {
|
|
||||||
// Return empty result if table doesn't exist yet
|
|
||||||
return res.json({
|
|
||||||
success: true,
|
|
||||||
workers: [],
|
|
||||||
summary: {
|
|
||||||
active_count: 0,
|
|
||||||
idle_count: 0,
|
|
||||||
offline_count: 0,
|
|
||||||
total_count: 0,
|
|
||||||
active_roles: 0
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
const { status, role, include_terminated = 'false' } = req.query;
|
const { status, role, include_terminated = 'false' } = req.query;
|
||||||
|
|
||||||
let whereClause = include_terminated === 'true' ? 'WHERE 1=1' : "WHERE status != 'terminated'";
|
let whereClause = include_terminated === 'true' ? 'WHERE 1=1' : "WHERE status != 'terminated'";
|
||||||
@@ -353,27 +307,12 @@ router.get('/workers', async (req: Request, res: Response) => {
|
|||||||
tasks_completed,
|
tasks_completed,
|
||||||
tasks_failed,
|
tasks_failed,
|
||||||
current_task_id,
|
current_task_id,
|
||||||
-- Concurrent task fields from metadata
|
|
||||||
(metadata->>'current_task_ids')::jsonb as current_task_ids,
|
|
||||||
(metadata->>'active_task_count')::int as active_task_count,
|
|
||||||
(metadata->>'max_concurrent_tasks')::int as max_concurrent_tasks,
|
|
||||||
-- Decommission fields
|
|
||||||
COALESCE(decommission_requested, false) as decommission_requested,
|
|
||||||
decommission_reason,
|
|
||||||
-- Preflight fields (dual-transport verification)
|
|
||||||
curl_ip,
|
|
||||||
http_ip,
|
|
||||||
preflight_status,
|
|
||||||
preflight_at,
|
|
||||||
fingerprint_data,
|
|
||||||
-- Full metadata for resources
|
|
||||||
metadata,
|
metadata,
|
||||||
EXTRACT(EPOCH FROM (NOW() - last_heartbeat_at)) as seconds_since_heartbeat,
|
EXTRACT(EPOCH FROM (NOW() - last_heartbeat_at)) as seconds_since_heartbeat,
|
||||||
CASE
|
CASE
|
||||||
WHEN status = 'offline' OR status = 'terminated' THEN status
|
WHEN status = 'offline' OR status = 'terminated' THEN status
|
||||||
WHEN last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
WHEN last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
||||||
WHEN current_task_id IS NOT NULL THEN 'busy'
|
WHEN current_task_id IS NOT NULL THEN 'busy'
|
||||||
WHEN (metadata->>'active_task_count')::int > 0 THEN 'busy'
|
|
||||||
ELSE 'ready'
|
ELSE 'ready'
|
||||||
END as health_status,
|
END as health_status,
|
||||||
created_at
|
created_at
|
||||||
@@ -710,163 +649,4 @@ router.get('/capacity', async (_req: Request, res: Response) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// WORKER LIFECYCLE MANAGEMENT
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/worker-registry/workers/:workerId/decommission
|
|
||||||
* Request graceful decommission of a worker (will stop after current task)
|
|
||||||
*/
|
|
||||||
router.post('/workers/:workerId/decommission', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const { workerId } = req.params;
|
|
||||||
const { reason, issued_by } = req.body;
|
|
||||||
|
|
||||||
// Update worker_registry to flag for decommission
|
|
||||||
const result = await pool.query(
|
|
||||||
`UPDATE worker_registry
|
|
||||||
SET decommission_requested = true,
|
|
||||||
decommission_reason = $2,
|
|
||||||
decommission_requested_at = NOW()
|
|
||||||
WHERE worker_id = $1
|
|
||||||
RETURNING friendly_name, status, current_task_id`,
|
|
||||||
[workerId, reason || 'Manual decommission from admin']
|
|
||||||
);
|
|
||||||
|
|
||||||
if (result.rows.length === 0) {
|
|
||||||
return res.status(404).json({ success: false, error: 'Worker not found' });
|
|
||||||
}
|
|
||||||
|
|
||||||
const worker = result.rows[0];
|
|
||||||
|
|
||||||
// Also log to worker_commands for audit trail
|
|
||||||
await pool.query(
|
|
||||||
`INSERT INTO worker_commands (worker_id, command, reason, issued_by)
|
|
||||||
VALUES ($1, 'decommission', $2, $3)
|
|
||||||
ON CONFLICT DO NOTHING`,
|
|
||||||
[workerId, reason || 'Manual decommission', issued_by || 'admin']
|
|
||||||
).catch(() => {
|
|
||||||
// Table might not exist yet - ignore
|
|
||||||
});
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
message: worker.current_task_id
|
|
||||||
? `Worker ${worker.friendly_name} will stop after completing task #${worker.current_task_id}`
|
|
||||||
: `Worker ${worker.friendly_name} will stop on next poll`,
|
|
||||||
worker: {
|
|
||||||
friendly_name: worker.friendly_name,
|
|
||||||
status: worker.status,
|
|
||||||
current_task_id: worker.current_task_id,
|
|
||||||
decommission_requested: true
|
|
||||||
}
|
|
||||||
});
|
|
||||||
} catch (error: any) {
|
|
||||||
res.status(500).json({ success: false, error: error.message });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/worker-registry/workers/:workerId/cancel-decommission
|
|
||||||
* Cancel a pending decommission request
|
|
||||||
*/
|
|
||||||
router.post('/workers/:workerId/cancel-decommission', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const { workerId } = req.params;
|
|
||||||
|
|
||||||
const result = await pool.query(
|
|
||||||
`UPDATE worker_registry
|
|
||||||
SET decommission_requested = false,
|
|
||||||
decommission_reason = NULL,
|
|
||||||
decommission_requested_at = NULL
|
|
||||||
WHERE worker_id = $1
|
|
||||||
RETURNING friendly_name`,
|
|
||||||
[workerId]
|
|
||||||
);
|
|
||||||
|
|
||||||
if (result.rows.length === 0) {
|
|
||||||
return res.status(404).json({ success: false, error: 'Worker not found' });
|
|
||||||
}
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
message: `Decommission cancelled for ${result.rows[0].friendly_name}`
|
|
||||||
});
|
|
||||||
} catch (error: any) {
|
|
||||||
res.status(500).json({ success: false, error: error.message });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/worker-registry/spawn
|
|
||||||
* Spawn a new worker in the current pod (only works in multi-worker-per-pod mode)
|
|
||||||
* For now, this is a placeholder - actual spawning requires the pod supervisor
|
|
||||||
*/
|
|
||||||
router.post('/spawn', async (req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const { pod_name, role } = req.body;
|
|
||||||
|
|
||||||
// For now, we can't actually spawn workers from the API
|
|
||||||
// This would require a supervisor process in each pod that listens for spawn commands
|
|
||||||
// Instead, return instructions for how to scale
|
|
||||||
res.json({
|
|
||||||
success: false,
|
|
||||||
error: 'Direct worker spawning not yet implemented',
|
|
||||||
instructions: 'To add workers, scale the K8s deployment: kubectl scale deployment/scraper-worker --replicas=N'
|
|
||||||
});
|
|
||||||
} catch (error: any) {
|
|
||||||
res.status(500).json({ success: false, error: error.message });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/worker-registry/pods
|
|
||||||
* Get workers grouped by pod
|
|
||||||
*/
|
|
||||||
router.get('/pods', async (_req: Request, res: Response) => {
|
|
||||||
try {
|
|
||||||
const { rows } = await pool.query(`
|
|
||||||
SELECT
|
|
||||||
COALESCE(pod_name, 'Unknown') as pod_name,
|
|
||||||
COUNT(*) as worker_count,
|
|
||||||
COUNT(*) FILTER (WHERE current_task_id IS NOT NULL) as busy_count,
|
|
||||||
COUNT(*) FILTER (WHERE current_task_id IS NULL) as idle_count,
|
|
||||||
SUM(tasks_completed) as total_completed,
|
|
||||||
SUM(tasks_failed) as total_failed,
|
|
||||||
SUM((metadata->>'memory_rss_mb')::int) as total_memory_mb,
|
|
||||||
array_agg(json_build_object(
|
|
||||||
'worker_id', worker_id,
|
|
||||||
'friendly_name', friendly_name,
|
|
||||||
'status', status,
|
|
||||||
'current_task_id', current_task_id,
|
|
||||||
'tasks_completed', tasks_completed,
|
|
||||||
'tasks_failed', tasks_failed,
|
|
||||||
'decommission_requested', COALESCE(decommission_requested, false),
|
|
||||||
'last_heartbeat_at', last_heartbeat_at
|
|
||||||
)) as workers
|
|
||||||
FROM worker_registry
|
|
||||||
WHERE status NOT IN ('offline', 'terminated')
|
|
||||||
GROUP BY pod_name
|
|
||||||
ORDER BY pod_name
|
|
||||||
`);
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
pods: rows.map(row => ({
|
|
||||||
pod_name: row.pod_name,
|
|
||||||
worker_count: parseInt(row.worker_count),
|
|
||||||
busy_count: parseInt(row.busy_count),
|
|
||||||
idle_count: parseInt(row.idle_count),
|
|
||||||
total_completed: parseInt(row.total_completed) || 0,
|
|
||||||
total_failed: parseInt(row.total_failed) || 0,
|
|
||||||
total_memory_mb: parseInt(row.total_memory_mb) || 0,
|
|
||||||
workers: row.workers
|
|
||||||
}))
|
|
||||||
});
|
|
||||||
} catch (error: any) {
|
|
||||||
res.status(500).json({ success: false, error: error.message });
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
export default router;
|
export default router;
|
||||||
|
|||||||
@@ -17,238 +17,13 @@
|
|||||||
* GET /api/monitor/jobs - Get recent job history
|
* GET /api/monitor/jobs - Get recent job history
|
||||||
* GET /api/monitor/active-jobs - Get currently running jobs
|
* GET /api/monitor/active-jobs - Get currently running jobs
|
||||||
* GET /api/monitor/summary - Get monitoring summary
|
* GET /api/monitor/summary - Get monitoring summary
|
||||||
*
|
|
||||||
* K8s Scaling (added 2024-12-10):
|
|
||||||
* GET /api/workers/k8s/replicas - Get current replica count
|
|
||||||
* POST /api/workers/k8s/scale - Scale worker replicas up/down
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { Router, Request, Response } from 'express';
|
import { Router, Request, Response } from 'express';
|
||||||
import { pool } from '../db/pool';
|
import { pool } from '../db/pool';
|
||||||
import * as k8s from '@kubernetes/client-node';
|
|
||||||
import { authMiddleware } from '../auth/middleware';
|
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
// All worker routes require authentication (trusted origins or API token)
|
|
||||||
router.use(authMiddleware);
|
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// K8S SCALING CONFIGURATION (added 2024-12-10)
|
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: Admin can scale workers from UI
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
const K8S_NAMESPACE = process.env.K8S_NAMESPACE || 'dispensary-scraper';
|
|
||||||
const K8S_DEPLOYMENT_NAME = process.env.K8S_WORKER_DEPLOYMENT || 'scraper-worker';
|
|
||||||
|
|
||||||
// Initialize K8s client - uses in-cluster config when running in K8s,
|
|
||||||
// or kubeconfig when running locally
|
|
||||||
let k8sAppsApi: k8s.AppsV1Api | null = null;
|
|
||||||
|
|
||||||
function getK8sClient(): k8s.AppsV1Api | null {
|
|
||||||
if (k8sAppsApi) return k8sAppsApi;
|
|
||||||
|
|
||||||
try {
|
|
||||||
const kc = new k8s.KubeConfig();
|
|
||||||
|
|
||||||
// Try in-cluster config first (when running as a pod)
|
|
||||||
// Falls back to default kubeconfig (~/.kube/config) for local dev
|
|
||||||
try {
|
|
||||||
kc.loadFromCluster();
|
|
||||||
} catch {
|
|
||||||
kc.loadFromDefault();
|
|
||||||
}
|
|
||||||
|
|
||||||
k8sAppsApi = kc.makeApiClient(k8s.AppsV1Api);
|
|
||||||
return k8sAppsApi;
|
|
||||||
} catch (err: any) {
|
|
||||||
console.warn('[Workers] K8s client not available:', err.message);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// K8S SCALING ROUTES (added 2024-12-10)
|
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: Admin can scale workers from UI
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/workers/k8s/replicas - Get current worker replica count
|
|
||||||
* Returns current and desired replica counts from the Deployment
|
|
||||||
*/
|
|
||||||
router.get('/k8s/replicas', async (_req: Request, res: Response) => {
|
|
||||||
const client = getK8sClient();
|
|
||||||
|
|
||||||
if (!client) {
|
|
||||||
return res.status(503).json({
|
|
||||||
success: false,
|
|
||||||
error: 'K8s client not available (not running in cluster or no kubeconfig)',
|
|
||||||
replicas: null,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const response = await client.readNamespacedDeployment({
|
|
||||||
name: K8S_DEPLOYMENT_NAME,
|
|
||||||
namespace: K8S_NAMESPACE,
|
|
||||||
});
|
|
||||||
|
|
||||||
const deployment = response;
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
replicas: {
|
|
||||||
current: deployment.status?.readyReplicas || 0,
|
|
||||||
desired: deployment.spec?.replicas || 0,
|
|
||||||
available: deployment.status?.availableReplicas || 0,
|
|
||||||
updated: deployment.status?.updatedReplicas || 0,
|
|
||||||
},
|
|
||||||
deployment: K8S_DEPLOYMENT_NAME,
|
|
||||||
namespace: K8S_NAMESPACE,
|
|
||||||
});
|
|
||||||
} catch (err: any) {
|
|
||||||
console.error('[Workers] K8s replicas error:', err.body?.message || err.message);
|
|
||||||
res.status(500).json({
|
|
||||||
success: false,
|
|
||||||
error: err.body?.message || err.message,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/workers/k8s/scale - Scale worker replicas
|
|
||||||
* Body: { replicas: number } - desired replica count (0-20)
|
|
||||||
*/
|
|
||||||
router.post('/k8s/scale', async (req: Request, res: Response) => {
|
|
||||||
const client = getK8sClient();
|
|
||||||
|
|
||||||
if (!client) {
|
|
||||||
return res.status(503).json({
|
|
||||||
success: false,
|
|
||||||
error: 'K8s client not available (not running in cluster or no kubeconfig)',
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
const { replicas } = req.body;
|
|
||||||
|
|
||||||
// Validate replica count
|
|
||||||
if (typeof replicas !== 'number' || replicas < 0 || replicas > 20) {
|
|
||||||
return res.status(400).json({
|
|
||||||
success: false,
|
|
||||||
error: 'replicas must be a number between 0 and 20',
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Get current state first
|
|
||||||
const currentResponse = await client.readNamespacedDeploymentScale({
|
|
||||||
name: K8S_DEPLOYMENT_NAME,
|
|
||||||
namespace: K8S_NAMESPACE,
|
|
||||||
});
|
|
||||||
const currentReplicas = currentResponse.spec?.replicas || 0;
|
|
||||||
|
|
||||||
// Update scale using replaceNamespacedDeploymentScale
|
|
||||||
await client.replaceNamespacedDeploymentScale({
|
|
||||||
name: K8S_DEPLOYMENT_NAME,
|
|
||||||
namespace: K8S_NAMESPACE,
|
|
||||||
body: {
|
|
||||||
apiVersion: 'autoscaling/v1',
|
|
||||||
kind: 'Scale',
|
|
||||||
metadata: {
|
|
||||||
name: K8S_DEPLOYMENT_NAME,
|
|
||||||
namespace: K8S_NAMESPACE,
|
|
||||||
},
|
|
||||||
spec: {
|
|
||||||
replicas: replicas,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log(`[Workers] Scaled ${K8S_DEPLOYMENT_NAME} from ${currentReplicas} to ${replicas} replicas`);
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
message: `Scaled from ${currentReplicas} to ${replicas} replicas`,
|
|
||||||
previous: currentReplicas,
|
|
||||||
desired: replicas,
|
|
||||||
deployment: K8S_DEPLOYMENT_NAME,
|
|
||||||
namespace: K8S_NAMESPACE,
|
|
||||||
});
|
|
||||||
} catch (err: any) {
|
|
||||||
console.error('[Workers] K8s scale error:', err.body?.message || err.message);
|
|
||||||
res.status(500).json({
|
|
||||||
success: false,
|
|
||||||
error: err.body?.message || err.message,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
|
||||||
* POST /api/workers/k8s/scale-up - Scale up worker replicas by 1
|
|
||||||
* Convenience endpoint for adding a single worker
|
|
||||||
*/
|
|
||||||
router.post('/k8s/scale-up', async (_req: Request, res: Response) => {
|
|
||||||
const client = getK8sClient();
|
|
||||||
|
|
||||||
if (!client) {
|
|
||||||
return res.status(503).json({
|
|
||||||
success: false,
|
|
||||||
error: 'K8s client not available (not running in cluster or no kubeconfig)',
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Get current replica count
|
|
||||||
const currentResponse = await client.readNamespacedDeploymentScale({
|
|
||||||
name: K8S_DEPLOYMENT_NAME,
|
|
||||||
namespace: K8S_NAMESPACE,
|
|
||||||
});
|
|
||||||
const currentReplicas = currentResponse.spec?.replicas || 0;
|
|
||||||
const newReplicas = currentReplicas + 1;
|
|
||||||
|
|
||||||
// Cap at 20 replicas
|
|
||||||
if (newReplicas > 20) {
|
|
||||||
return res.status(400).json({
|
|
||||||
success: false,
|
|
||||||
error: 'Maximum replica count (20) reached',
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Scale up by 1
|
|
||||||
await client.replaceNamespacedDeploymentScale({
|
|
||||||
name: K8S_DEPLOYMENT_NAME,
|
|
||||||
namespace: K8S_NAMESPACE,
|
|
||||||
body: {
|
|
||||||
apiVersion: 'autoscaling/v1',
|
|
||||||
kind: 'Scale',
|
|
||||||
metadata: {
|
|
||||||
name: K8S_DEPLOYMENT_NAME,
|
|
||||||
namespace: K8S_NAMESPACE,
|
|
||||||
},
|
|
||||||
spec: {
|
|
||||||
replicas: newReplicas,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log(`[Workers] Scaled up ${K8S_DEPLOYMENT_NAME} from ${currentReplicas} to ${newReplicas} replicas`);
|
|
||||||
|
|
||||||
res.json({
|
|
||||||
success: true,
|
|
||||||
message: `Added worker (${currentReplicas} → ${newReplicas} replicas)`,
|
|
||||||
previous: currentReplicas,
|
|
||||||
desired: newReplicas,
|
|
||||||
deployment: K8S_DEPLOYMENT_NAME,
|
|
||||||
namespace: K8S_NAMESPACE,
|
|
||||||
});
|
|
||||||
} catch (err: any) {
|
|
||||||
console.error('[Workers] K8s scale-up error:', err.body?.message || err.message);
|
|
||||||
res.status(500).json({
|
|
||||||
success: false,
|
|
||||||
error: err.body?.message || err.message,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// STATIC ROUTES (must come before parameterized routes)
|
// STATIC ROUTES (must come before parameterized routes)
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|||||||
@@ -16,11 +16,10 @@ import {
|
|||||||
executeGraphQL,
|
executeGraphQL,
|
||||||
startSession,
|
startSession,
|
||||||
endSession,
|
endSession,
|
||||||
setCrawlRotator,
|
getFingerprint,
|
||||||
GRAPHQL_HASHES,
|
GRAPHQL_HASHES,
|
||||||
DUTCHIE_CONFIG,
|
DUTCHIE_CONFIG,
|
||||||
} from '../platforms/dutchie';
|
} from '../platforms/dutchie';
|
||||||
import { CrawlRotator } from '../services/crawl-rotator';
|
|
||||||
|
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
|
|
||||||
@@ -109,27 +108,19 @@ async function main() {
|
|||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// STEP 2: Start stealth session
|
// STEP 2: Start stealth session
|
||||||
// Per workflow-12102025.md: Initialize CrawlRotator and start session with menuUrl
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||||
console.log('│ STEP 2: Start Stealth Session │');
|
console.log('│ STEP 2: Start Stealth Session │');
|
||||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||||
|
|
||||||
// Per workflow-12102025.md: Initialize CrawlRotator (required for sessions)
|
// Use Arizona timezone for this store
|
||||||
const rotator = new CrawlRotator();
|
const session = startSession(disp.state || 'AZ', 'America/Phoenix');
|
||||||
setCrawlRotator(rotator);
|
|
||||||
|
|
||||||
// Per workflow-12102025.md: startSession takes menuUrl for dynamic Referer
|
const fp = getFingerprint();
|
||||||
const session = startSession(disp.menu_url);
|
|
||||||
|
|
||||||
const fp = session.fingerprint;
|
|
||||||
console.log(` Session ID: ${session.sessionId}`);
|
console.log(` Session ID: ${session.sessionId}`);
|
||||||
console.log(` Browser: ${fp.browserName} (${fp.deviceCategory})`);
|
|
||||||
console.log(` User-Agent: ${fp.userAgent.slice(0, 60)}...`);
|
console.log(` User-Agent: ${fp.userAgent.slice(0, 60)}...`);
|
||||||
console.log(` Accept-Language: ${fp.acceptLanguage}`);
|
console.log(` Accept-Language: ${fp.acceptLanguage}`);
|
||||||
console.log(` Referer: ${session.referer}`);
|
console.log(` Sec-CH-UA: ${fp.secChUa || '(not set)'}`);
|
||||||
console.log(` DNT: ${fp.httpFingerprint.hasDNT ? 'enabled' : 'disabled'}`);
|
|
||||||
console.log(` TLS: ${fp.httpFingerprint.curlImpersonateBinary}`);
|
|
||||||
console.log('');
|
console.log('');
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
/**
|
/**
|
||||||
* Test script for stealth session management
|
* Test script for stealth session management
|
||||||
*
|
*
|
||||||
* Per workflow-12102025.md:
|
* Tests:
|
||||||
* - Tests HTTP fingerprinting (browser-specific headers + ordering)
|
* 1. Per-session fingerprint rotation
|
||||||
* - Tests UA generation (device distribution, browser filtering)
|
* 2. Geographic consistency (timezone → Accept-Language)
|
||||||
* - Tests dynamic Referer per dispensary
|
* 3. Proxy location loading from database
|
||||||
*
|
*
|
||||||
* Usage:
|
* Usage:
|
||||||
* npx tsx src/scripts/test-stealth-session.ts
|
* npx tsx src/scripts/test-stealth-session.ts
|
||||||
@@ -14,142 +14,104 @@ import {
|
|||||||
startSession,
|
startSession,
|
||||||
endSession,
|
endSession,
|
||||||
getCurrentSession,
|
getCurrentSession,
|
||||||
|
getFingerprint,
|
||||||
|
getRandomFingerprint,
|
||||||
|
getLocaleForTimezone,
|
||||||
buildHeaders,
|
buildHeaders,
|
||||||
setCrawlRotator,
|
|
||||||
} from '../platforms/dutchie';
|
} from '../platforms/dutchie';
|
||||||
|
|
||||||
import { CrawlRotator } from '../services/crawl-rotator';
|
|
||||||
import {
|
|
||||||
generateHTTPFingerprint,
|
|
||||||
buildRefererFromMenuUrl,
|
|
||||||
BrowserType,
|
|
||||||
} from '../services/http-fingerprint';
|
|
||||||
|
|
||||||
console.log('='.repeat(60));
|
console.log('='.repeat(60));
|
||||||
console.log('STEALTH SESSION TEST (per workflow-12102025.md)');
|
console.log('STEALTH SESSION TEST');
|
||||||
console.log('='.repeat(60));
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
// Initialize CrawlRotator (required for sessions)
|
// Test 1: Timezone to Locale mapping
|
||||||
console.log('\n[Setup] Initializing CrawlRotator...');
|
console.log('\n[Test 1] Timezone to Locale Mapping:');
|
||||||
const rotator = new CrawlRotator();
|
const testTimezones = [
|
||||||
setCrawlRotator(rotator);
|
'America/Phoenix',
|
||||||
console.log(' CrawlRotator initialized');
|
'America/Los_Angeles',
|
||||||
|
'America/New_York',
|
||||||
// Test 1: HTTP Fingerprint Generation
|
'America/Chicago',
|
||||||
console.log('\n[Test 1] HTTP Fingerprint Generation:');
|
|
||||||
const browsers: BrowserType[] = ['Chrome', 'Firefox', 'Safari', 'Edge'];
|
|
||||||
|
|
||||||
for (const browser of browsers) {
|
|
||||||
const httpFp = generateHTTPFingerprint(browser);
|
|
||||||
console.log(` ${browser}:`);
|
|
||||||
console.log(` TLS binary: ${httpFp.curlImpersonateBinary}`);
|
|
||||||
console.log(` DNT: ${httpFp.hasDNT ? 'enabled' : 'disabled'}`);
|
|
||||||
console.log(` Header order: ${httpFp.headerOrder.slice(0, 5).join(', ')}...`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test 2: Dynamic Referer from menu URLs
|
|
||||||
console.log('\n[Test 2] Dynamic Referer from Menu URLs:');
|
|
||||||
const testUrls = [
|
|
||||||
'https://dutchie.com/embedded-menu/harvest-of-tempe',
|
|
||||||
'https://dutchie.com/dispensary/zen-leaf-mesa',
|
|
||||||
'/embedded-menu/deeply-rooted',
|
|
||||||
'/dispensary/curaleaf-phoenix',
|
|
||||||
null,
|
|
||||||
undefined,
|
undefined,
|
||||||
|
'Invalid/Timezone',
|
||||||
];
|
];
|
||||||
|
|
||||||
for (const url of testUrls) {
|
for (const tz of testTimezones) {
|
||||||
const referer = buildRefererFromMenuUrl(url);
|
const locale = getLocaleForTimezone(tz);
|
||||||
console.log(` ${url || '(null/undefined)'}`);
|
console.log(` ${tz || '(undefined)'} → ${locale}`);
|
||||||
console.log(` → ${referer}`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test 3: Session with Dynamic Referer
|
// Test 2: Random fingerprint selection
|
||||||
console.log('\n[Test 3] Session with Dynamic Referer:');
|
console.log('\n[Test 2] Random Fingerprint Selection (5 samples):');
|
||||||
const testMenuUrl = 'https://dutchie.com/dispensary/harvest-of-tempe';
|
for (let i = 0; i < 5; i++) {
|
||||||
console.log(` Starting session with menuUrl: ${testMenuUrl}`);
|
const fp = getRandomFingerprint();
|
||||||
|
console.log(` ${i + 1}. ${fp.userAgent.slice(0, 60)}...`);
|
||||||
const session1 = startSession(testMenuUrl);
|
|
||||||
console.log(` Session ID: ${session1.sessionId}`);
|
|
||||||
console.log(` Browser: ${session1.fingerprint.browserName}`);
|
|
||||||
console.log(` Device: ${session1.fingerprint.deviceCategory}`);
|
|
||||||
console.log(` Referer: ${session1.referer}`);
|
|
||||||
console.log(` DNT: ${session1.fingerprint.httpFingerprint.hasDNT ? 'enabled' : 'disabled'}`);
|
|
||||||
console.log(` TLS: ${session1.fingerprint.httpFingerprint.curlImpersonateBinary}`);
|
|
||||||
|
|
||||||
// Test 4: Build Headers (browser-specific order)
|
|
||||||
console.log('\n[Test 4] Build Headers (browser-specific order):');
|
|
||||||
const { headers, orderedHeaders } = buildHeaders(true, 1000);
|
|
||||||
console.log(` Headers built for ${session1.fingerprint.browserName}:`);
|
|
||||||
console.log(` Order: ${orderedHeaders.join(' → ')}`);
|
|
||||||
console.log(` Sample headers:`);
|
|
||||||
console.log(` User-Agent: ${headers['User-Agent']?.slice(0, 50)}...`);
|
|
||||||
console.log(` Accept: ${headers['Accept']}`);
|
|
||||||
console.log(` Accept-Language: ${headers['Accept-Language']}`);
|
|
||||||
console.log(` Referer: ${headers['Referer']}`);
|
|
||||||
if (headers['sec-ch-ua']) {
|
|
||||||
console.log(` sec-ch-ua: ${headers['sec-ch-ua']}`);
|
|
||||||
}
|
|
||||||
if (headers['DNT']) {
|
|
||||||
console.log(` DNT: ${headers['DNT']}`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test 3: Session Management
|
||||||
|
console.log('\n[Test 3] Session Management:');
|
||||||
|
|
||||||
|
// Before session - should use default fingerprint
|
||||||
|
console.log(' Before session:');
|
||||||
|
const beforeFp = getFingerprint();
|
||||||
|
console.log(` getFingerprint(): ${beforeFp.userAgent.slice(0, 50)}...`);
|
||||||
|
console.log(` getCurrentSession(): ${getCurrentSession()}`);
|
||||||
|
|
||||||
|
// Start session with Arizona timezone
|
||||||
|
console.log('\n Starting session (AZ, America/Phoenix):');
|
||||||
|
const session1 = startSession('AZ', 'America/Phoenix');
|
||||||
|
console.log(` Session ID: ${session1.sessionId}`);
|
||||||
|
console.log(` Fingerprint UA: ${session1.fingerprint.userAgent.slice(0, 50)}...`);
|
||||||
|
console.log(` Accept-Language: ${session1.fingerprint.acceptLanguage}`);
|
||||||
|
console.log(` Timezone: ${session1.timezone}`);
|
||||||
|
|
||||||
|
// During session - should use session fingerprint
|
||||||
|
console.log('\n During session:');
|
||||||
|
const duringFp = getFingerprint();
|
||||||
|
console.log(` getFingerprint(): ${duringFp.userAgent.slice(0, 50)}...`);
|
||||||
|
console.log(` Same as session? ${duringFp.userAgent === session1.fingerprint.userAgent}`);
|
||||||
|
|
||||||
|
// Test buildHeaders with session
|
||||||
|
console.log('\n buildHeaders() during session:');
|
||||||
|
const headers = buildHeaders('/embedded-menu/test-store');
|
||||||
|
console.log(` User-Agent: ${headers['user-agent'].slice(0, 50)}...`);
|
||||||
|
console.log(` Accept-Language: ${headers['accept-language']}`);
|
||||||
|
console.log(` Origin: ${headers['origin']}`);
|
||||||
|
console.log(` Referer: ${headers['referer']}`);
|
||||||
|
|
||||||
|
// End session
|
||||||
|
console.log('\n Ending session:');
|
||||||
endSession();
|
endSession();
|
||||||
|
console.log(` getCurrentSession(): ${getCurrentSession()}`);
|
||||||
|
|
||||||
// Test 5: Multiple Sessions (UA variety)
|
// Test 4: Multiple sessions should have different fingerprints
|
||||||
console.log('\n[Test 5] Multiple Sessions (UA & fingerprint variety):');
|
console.log('\n[Test 4] Multiple Sessions (fingerprint variety):');
|
||||||
const sessions: {
|
const fingerprints: string[] = [];
|
||||||
browser: string;
|
|
||||||
device: string;
|
|
||||||
hasDNT: boolean;
|
|
||||||
}[] = [];
|
|
||||||
|
|
||||||
for (let i = 0; i < 10; i++) {
|
for (let i = 0; i < 10; i++) {
|
||||||
const session = startSession(`/dispensary/store-${i}`);
|
const session = startSession('CA', 'America/Los_Angeles');
|
||||||
sessions.push({
|
fingerprints.push(session.fingerprint.userAgent);
|
||||||
browser: session.fingerprint.browserName,
|
|
||||||
device: session.fingerprint.deviceCategory,
|
|
||||||
hasDNT: session.fingerprint.httpFingerprint.hasDNT,
|
|
||||||
});
|
|
||||||
endSession();
|
endSession();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Count distribution
|
const uniqueCount = new Set(fingerprints).size;
|
||||||
const browserCounts: Record<string, number> = {};
|
console.log(` 10 sessions created, ${uniqueCount} unique fingerprints`);
|
||||||
const deviceCounts: Record<string, number> = {};
|
console.log(` Variety: ${uniqueCount >= 3 ? '✅ Good' : '⚠️ Low - may need more fingerprint options'}`);
|
||||||
let dntCount = 0;
|
|
||||||
|
|
||||||
for (const s of sessions) {
|
// Test 5: Geographic consistency check
|
||||||
browserCounts[s.browser] = (browserCounts[s.browser] || 0) + 1;
|
console.log('\n[Test 5] Geographic Consistency:');
|
||||||
deviceCounts[s.device] = (deviceCounts[s.device] || 0) + 1;
|
const geoTests = [
|
||||||
if (s.hasDNT) dntCount++;
|
{ state: 'AZ', tz: 'America/Phoenix' },
|
||||||
}
|
{ state: 'CA', tz: 'America/Los_Angeles' },
|
||||||
|
{ state: 'NY', tz: 'America/New_York' },
|
||||||
|
{ state: 'IL', tz: 'America/Chicago' },
|
||||||
|
];
|
||||||
|
|
||||||
console.log(` 10 sessions created:`);
|
for (const { state, tz } of geoTests) {
|
||||||
console.log(` Browsers: ${JSON.stringify(browserCounts)}`);
|
const session = startSession(state, tz);
|
||||||
console.log(` Devices: ${JSON.stringify(deviceCounts)}`);
|
const consistent = session.fingerprint.acceptLanguage.includes('en-US');
|
||||||
console.log(` DNT enabled: ${dntCount}/10 (expected ~30%)`);
|
console.log(` ${state} (${tz}): Accept-Language=${session.fingerprint.acceptLanguage} ${consistent ? '✅' : '❌'}`);
|
||||||
|
|
||||||
// Test 6: Device distribution check (per workflow-12102025.md: 62/36/2)
|
|
||||||
console.log('\n[Test 6] Device Distribution (larger sample):');
|
|
||||||
const deviceSamples: string[] = [];
|
|
||||||
|
|
||||||
for (let i = 0; i < 100; i++) {
|
|
||||||
const session = startSession();
|
|
||||||
deviceSamples.push(session.fingerprint.deviceCategory);
|
|
||||||
endSession();
|
endSession();
|
||||||
}
|
}
|
||||||
|
|
||||||
const mobileCount = deviceSamples.filter(d => d === 'mobile').length;
|
|
||||||
const desktopCount = deviceSamples.filter(d => d === 'desktop').length;
|
|
||||||
const tabletCount = deviceSamples.filter(d => d === 'tablet').length;
|
|
||||||
|
|
||||||
console.log(` 100 sessions (expected: 62% mobile, 36% desktop, 2% tablet):`);
|
|
||||||
console.log(` Mobile: ${mobileCount}%`);
|
|
||||||
console.log(` Desktop: ${desktopCount}%`);
|
|
||||||
console.log(` Tablet: ${tabletCount}%`);
|
|
||||||
console.log(` Distribution: ${Math.abs(mobileCount - 62) < 15 && Math.abs(desktopCount - 36) < 15 ? '✅ Reasonable' : '⚠️ Off target'}`);
|
|
||||||
|
|
||||||
console.log('\n' + '='.repeat(60));
|
console.log('\n' + '='.repeat(60));
|
||||||
console.log('TEST COMPLETE');
|
console.log('TEST COMPLETE');
|
||||||
console.log('='.repeat(60));
|
console.log('='.repeat(60));
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -26,8 +26,6 @@ import {
|
|||||||
PenetrationDataPoint,
|
PenetrationDataPoint,
|
||||||
BrandMarketPosition,
|
BrandMarketPosition,
|
||||||
BrandRecVsMedFootprint,
|
BrandRecVsMedFootprint,
|
||||||
BrandPromotionalSummary,
|
|
||||||
BrandPromotionalEvent,
|
|
||||||
} from './types';
|
} from './types';
|
||||||
|
|
||||||
export class BrandPenetrationService {
|
export class BrandPenetrationService {
|
||||||
@@ -46,17 +44,16 @@ export class BrandPenetrationService {
|
|||||||
// Get current brand presence
|
// Get current brand presence
|
||||||
const currentResult = await this.pool.query(`
|
const currentResult = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.brand_name_raw AS brand_name,
|
sp.brand_name,
|
||||||
COUNT(DISTINCT sp.dispensary_id) AS total_dispensaries,
|
COUNT(DISTINCT sp.dispensary_id) AS total_dispensaries,
|
||||||
COUNT(*) AS total_skus,
|
COUNT(*) AS total_skus,
|
||||||
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus_per_dispensary,
|
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus_per_dispensary,
|
||||||
ARRAY_AGG(DISTINCT s.code) FILTER (WHERE s.code IS NOT NULL) AS states_present
|
ARRAY_AGG(DISTINCT s.code) FILTER (WHERE s.code IS NOT NULL) AS states_present
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
LEFT JOIN states s ON s.id = sp.state_id
|
||||||
LEFT JOIN states s ON s.id = d.state_id
|
WHERE sp.brand_name = $1
|
||||||
WHERE sp.brand_name_raw = $1
|
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
GROUP BY sp.brand_name_raw
|
GROUP BY sp.brand_name
|
||||||
`, [brandName]);
|
`, [brandName]);
|
||||||
|
|
||||||
if (currentResult.rows.length === 0) {
|
if (currentResult.rows.length === 0) {
|
||||||
@@ -75,7 +72,7 @@ export class BrandPenetrationService {
|
|||||||
DATE(sps.captured_at) AS date,
|
DATE(sps.captured_at) AS date,
|
||||||
COUNT(DISTINCT sps.dispensary_id) AS dispensary_count
|
COUNT(DISTINCT sps.dispensary_id) AS dispensary_count
|
||||||
FROM store_product_snapshots sps
|
FROM store_product_snapshots sps
|
||||||
WHERE sps.brand_name_raw = $1
|
WHERE sps.brand_name = $1
|
||||||
AND sps.captured_at >= $2
|
AND sps.captured_at >= $2
|
||||||
AND sps.captured_at <= $3
|
AND sps.captured_at <= $3
|
||||||
AND sps.is_in_stock = TRUE
|
AND sps.is_in_stock = TRUE
|
||||||
@@ -126,9 +123,8 @@ export class BrandPenetrationService {
|
|||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
COUNT(*) AS sku_count
|
COUNT(*) AS sku_count
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
JOIN states s ON s.id = sp.state_id
|
||||||
JOIN states s ON s.id = d.state_id
|
WHERE sp.brand_name = $1
|
||||||
WHERE sp.brand_name_raw = $1
|
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal
|
GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal
|
||||||
),
|
),
|
||||||
@@ -137,8 +133,7 @@ export class BrandPenetrationService {
|
|||||||
s.code AS state_code,
|
s.code AS state_code,
|
||||||
COUNT(DISTINCT sp.dispensary_id) AS total_dispensaries
|
COUNT(DISTINCT sp.dispensary_id) AS total_dispensaries
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
JOIN states s ON s.id = sp.state_id
|
||||||
JOIN states s ON s.id = d.state_id
|
|
||||||
WHERE sp.is_in_stock = TRUE
|
WHERE sp.is_in_stock = TRUE
|
||||||
GROUP BY s.code
|
GROUP BY s.code
|
||||||
)
|
)
|
||||||
@@ -174,7 +169,7 @@ export class BrandPenetrationService {
|
|||||||
let filters = '';
|
let filters = '';
|
||||||
|
|
||||||
if (options.category) {
|
if (options.category) {
|
||||||
filters += ` AND sp.category_raw = $${paramIdx}`;
|
filters += ` AND sp.category = $${paramIdx}`;
|
||||||
params.push(options.category);
|
params.push(options.category);
|
||||||
paramIdx++;
|
paramIdx++;
|
||||||
}
|
}
|
||||||
@@ -188,33 +183,31 @@ export class BrandPenetrationService {
|
|||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
WITH brand_metrics AS (
|
WITH brand_metrics AS (
|
||||||
SELECT
|
SELECT
|
||||||
sp.brand_name_raw AS brand_name,
|
sp.brand_name,
|
||||||
sp.category_raw AS category,
|
sp.category,
|
||||||
s.code AS state_code,
|
s.code AS state_code,
|
||||||
COUNT(*) AS sku_count,
|
COUNT(*) AS sku_count,
|
||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
AVG(sp.price_rec) AS avg_price
|
AVG(sp.price_rec) AS avg_price
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
JOIN states s ON s.id = sp.state_id
|
||||||
JOIN states s ON s.id = d.state_id
|
WHERE sp.brand_name = $1
|
||||||
WHERE sp.brand_name_raw = $1
|
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND sp.category_raw IS NOT NULL
|
AND sp.category IS NOT NULL
|
||||||
${filters}
|
${filters}
|
||||||
GROUP BY sp.brand_name_raw, sp.category_raw, s.code
|
GROUP BY sp.brand_name, sp.category, s.code
|
||||||
),
|
),
|
||||||
category_totals AS (
|
category_totals AS (
|
||||||
SELECT
|
SELECT
|
||||||
sp.category_raw AS category,
|
sp.category,
|
||||||
s.code AS state_code,
|
s.code AS state_code,
|
||||||
COUNT(*) AS total_skus,
|
COUNT(*) AS total_skus,
|
||||||
AVG(sp.price_rec) AS category_avg_price
|
AVG(sp.price_rec) AS category_avg_price
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
JOIN states s ON s.id = sp.state_id
|
||||||
JOIN states s ON s.id = d.state_id
|
|
||||||
WHERE sp.is_in_stock = TRUE
|
WHERE sp.is_in_stock = TRUE
|
||||||
AND sp.category_raw IS NOT NULL
|
AND sp.category IS NOT NULL
|
||||||
GROUP BY sp.category_raw, s.code
|
GROUP BY sp.category, s.code
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
bm.*,
|
bm.*,
|
||||||
@@ -250,9 +243,8 @@ export class BrandPenetrationService {
|
|||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus
|
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
JOIN states s ON s.id = sp.state_id
|
||||||
JOIN states s ON s.id = d.state_id
|
WHERE sp.brand_name = $1
|
||||||
WHERE sp.brand_name_raw = $1
|
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND s.recreational_legal = TRUE
|
AND s.recreational_legal = TRUE
|
||||||
),
|
),
|
||||||
@@ -263,9 +255,8 @@ export class BrandPenetrationService {
|
|||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus
|
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
JOIN states s ON s.id = sp.state_id
|
||||||
JOIN states s ON s.id = d.state_id
|
WHERE sp.brand_name = $1
|
||||||
WHERE sp.brand_name_raw = $1
|
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND s.medical_legal = TRUE
|
AND s.medical_legal = TRUE
|
||||||
AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
|
AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
|
||||||
@@ -320,24 +311,23 @@ export class BrandPenetrationService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (category) {
|
if (category) {
|
||||||
filters += ` AND sp.category_raw = $${paramIdx}`;
|
filters += ` AND sp.category = $${paramIdx}`;
|
||||||
params.push(category);
|
params.push(category);
|
||||||
paramIdx++;
|
paramIdx++;
|
||||||
}
|
}
|
||||||
|
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.brand_name_raw AS brand_name,
|
sp.brand_name,
|
||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
COUNT(*) AS sku_count,
|
COUNT(*) AS sku_count,
|
||||||
COUNT(DISTINCT s.code) AS state_count
|
COUNT(DISTINCT s.code) AS state_count
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
LEFT JOIN states s ON s.id = sp.state_id
|
||||||
LEFT JOIN states s ON s.id = d.state_id
|
WHERE sp.brand_name IS NOT NULL
|
||||||
WHERE sp.brand_name_raw IS NOT NULL
|
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
${filters}
|
${filters}
|
||||||
GROUP BY sp.brand_name_raw
|
GROUP BY sp.brand_name
|
||||||
ORDER BY dispensary_count DESC, sku_count DESC
|
ORDER BY dispensary_count DESC, sku_count DESC
|
||||||
LIMIT $1
|
LIMIT $1
|
||||||
`, params);
|
`, params);
|
||||||
@@ -368,23 +358,23 @@ export class BrandPenetrationService {
|
|||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
WITH start_counts AS (
|
WITH start_counts AS (
|
||||||
SELECT
|
SELECT
|
||||||
brand_name_raw AS brand_name,
|
brand_name,
|
||||||
COUNT(DISTINCT dispensary_id) AS dispensary_count
|
COUNT(DISTINCT dispensary_id) AS dispensary_count
|
||||||
FROM store_product_snapshots
|
FROM store_product_snapshots
|
||||||
WHERE captured_at >= $1 AND captured_at < $1 + INTERVAL '1 day'
|
WHERE captured_at >= $1 AND captured_at < $1 + INTERVAL '1 day'
|
||||||
AND brand_name_raw IS NOT NULL
|
AND brand_name IS NOT NULL
|
||||||
AND is_in_stock = TRUE
|
AND is_in_stock = TRUE
|
||||||
GROUP BY brand_name_raw
|
GROUP BY brand_name
|
||||||
),
|
),
|
||||||
end_counts AS (
|
end_counts AS (
|
||||||
SELECT
|
SELECT
|
||||||
brand_name_raw AS brand_name,
|
brand_name,
|
||||||
COUNT(DISTINCT dispensary_id) AS dispensary_count
|
COUNT(DISTINCT dispensary_id) AS dispensary_count
|
||||||
FROM store_product_snapshots
|
FROM store_product_snapshots
|
||||||
WHERE captured_at >= $2 - INTERVAL '1 day' AND captured_at <= $2
|
WHERE captured_at >= $2 - INTERVAL '1 day' AND captured_at <= $2
|
||||||
AND brand_name_raw IS NOT NULL
|
AND brand_name IS NOT NULL
|
||||||
AND is_in_stock = TRUE
|
AND is_in_stock = TRUE
|
||||||
GROUP BY brand_name_raw
|
GROUP BY brand_name
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
COALESCE(sc.brand_name, ec.brand_name) AS brand_name,
|
COALESCE(sc.brand_name, ec.brand_name) AS brand_name,
|
||||||
@@ -411,225 +401,6 @@ export class BrandPenetrationService {
|
|||||||
change_percent: row.change_percent ? parseFloat(row.change_percent) : 0,
|
change_percent: row.change_percent ? parseFloat(row.change_percent) : 0,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Get brand promotional history
|
|
||||||
*
|
|
||||||
* Tracks when products went on special, how long, what discount,
|
|
||||||
* and estimated quantity sold during the promotion.
|
|
||||||
*/
|
|
||||||
async getBrandPromotionalHistory(
|
|
||||||
brandName: string,
|
|
||||||
options: { window?: TimeWindow; customRange?: DateRange; stateCode?: string; category?: string } = {}
|
|
||||||
): Promise<BrandPromotionalSummary> {
|
|
||||||
const { window = '90d', customRange, stateCode, category } = options;
|
|
||||||
const { start, end } = getDateRangeFromWindow(window, customRange);
|
|
||||||
|
|
||||||
// Build filters
|
|
||||||
const params: any[] = [brandName, start, end];
|
|
||||||
let paramIdx = 4;
|
|
||||||
let filters = '';
|
|
||||||
|
|
||||||
if (stateCode) {
|
|
||||||
filters += ` AND s.code = $${paramIdx}`;
|
|
||||||
params.push(stateCode);
|
|
||||||
paramIdx++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (category) {
|
|
||||||
filters += ` AND sp.category_raw = $${paramIdx}`;
|
|
||||||
params.push(category);
|
|
||||||
paramIdx++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find promotional events by detecting when is_on_special transitions to TRUE
|
|
||||||
// and tracking until it transitions back to FALSE
|
|
||||||
const eventsResult = await this.pool.query(`
|
|
||||||
WITH snapshot_with_lag AS (
|
|
||||||
SELECT
|
|
||||||
sps.id,
|
|
||||||
sps.store_product_id,
|
|
||||||
sps.dispensary_id,
|
|
||||||
sps.brand_name_raw,
|
|
||||||
sps.name_raw,
|
|
||||||
sps.category_raw,
|
|
||||||
sps.is_on_special,
|
|
||||||
sps.price_rec,
|
|
||||||
sps.price_rec_special,
|
|
||||||
sps.stock_quantity,
|
|
||||||
sps.captured_at,
|
|
||||||
LAG(sps.is_on_special) OVER (
|
|
||||||
PARTITION BY sps.store_product_id
|
|
||||||
ORDER BY sps.captured_at
|
|
||||||
) AS prev_is_on_special,
|
|
||||||
LAG(sps.stock_quantity) OVER (
|
|
||||||
PARTITION BY sps.store_product_id
|
|
||||||
ORDER BY sps.captured_at
|
|
||||||
) AS prev_stock_quantity
|
|
||||||
FROM store_product_snapshots sps
|
|
||||||
JOIN store_products sp ON sp.id = sps.store_product_id
|
|
||||||
JOIN dispensaries dd ON dd.id = sp.dispensary_id
|
|
||||||
LEFT JOIN states s ON s.id = dd.state_id
|
|
||||||
WHERE sps.brand_name_raw = $1
|
|
||||||
AND sps.captured_at >= $2
|
|
||||||
AND sps.captured_at <= $3
|
|
||||||
${filters}
|
|
||||||
),
|
|
||||||
special_starts AS (
|
|
||||||
-- Find when specials START (transition from not-on-special to on-special)
|
|
||||||
SELECT
|
|
||||||
store_product_id,
|
|
||||||
dispensary_id,
|
|
||||||
name_raw,
|
|
||||||
category_raw,
|
|
||||||
captured_at AS special_start,
|
|
||||||
price_rec AS regular_price,
|
|
||||||
price_rec_special AS special_price,
|
|
||||||
stock_quantity AS quantity_at_start
|
|
||||||
FROM snapshot_with_lag
|
|
||||||
WHERE is_on_special = TRUE
|
|
||||||
AND (prev_is_on_special = FALSE OR prev_is_on_special IS NULL)
|
|
||||||
AND price_rec_special IS NOT NULL
|
|
||||||
AND price_rec IS NOT NULL
|
|
||||||
),
|
|
||||||
special_ends AS (
|
|
||||||
-- Find when specials END (transition from on-special to not-on-special)
|
|
||||||
SELECT
|
|
||||||
store_product_id,
|
|
||||||
captured_at AS special_end,
|
|
||||||
prev_stock_quantity AS quantity_at_end
|
|
||||||
FROM snapshot_with_lag
|
|
||||||
WHERE is_on_special = FALSE
|
|
||||||
AND prev_is_on_special = TRUE
|
|
||||||
),
|
|
||||||
matched_events AS (
|
|
||||||
SELECT
|
|
||||||
ss.store_product_id,
|
|
||||||
ss.dispensary_id,
|
|
||||||
ss.name_raw AS product_name,
|
|
||||||
ss.category_raw AS category,
|
|
||||||
ss.special_start,
|
|
||||||
se.special_end,
|
|
||||||
ss.regular_price,
|
|
||||||
ss.special_price,
|
|
||||||
ss.quantity_at_start,
|
|
||||||
COALESCE(se.quantity_at_end, ss.quantity_at_start) AS quantity_at_end
|
|
||||||
FROM special_starts ss
|
|
||||||
LEFT JOIN special_ends se ON se.store_product_id = ss.store_product_id
|
|
||||||
AND se.special_end > ss.special_start
|
|
||||||
AND se.special_end = (
|
|
||||||
SELECT MIN(se2.special_end)
|
|
||||||
FROM special_ends se2
|
|
||||||
WHERE se2.store_product_id = ss.store_product_id
|
|
||||||
AND se2.special_end > ss.special_start
|
|
||||||
)
|
|
||||||
)
|
|
||||||
SELECT
|
|
||||||
me.store_product_id,
|
|
||||||
me.dispensary_id,
|
|
||||||
d.name AS dispensary_name,
|
|
||||||
s.code AS state_code,
|
|
||||||
me.product_name,
|
|
||||||
me.category,
|
|
||||||
me.special_start,
|
|
||||||
me.special_end,
|
|
||||||
EXTRACT(DAY FROM COALESCE(me.special_end, NOW()) - me.special_start)::INT AS duration_days,
|
|
||||||
me.regular_price,
|
|
||||||
me.special_price,
|
|
||||||
ROUND(((me.regular_price - me.special_price) / NULLIF(me.regular_price, 0)) * 100, 1) AS discount_percent,
|
|
||||||
me.quantity_at_start,
|
|
||||||
me.quantity_at_end,
|
|
||||||
GREATEST(0, COALESCE(me.quantity_at_start, 0) - COALESCE(me.quantity_at_end, 0)) AS quantity_sold_estimate
|
|
||||||
FROM matched_events me
|
|
||||||
JOIN dispensaries d ON d.id = me.dispensary_id
|
|
||||||
LEFT JOIN states s ON s.id = d.state_id
|
|
||||||
ORDER BY me.special_start DESC
|
|
||||||
`, params);
|
|
||||||
|
|
||||||
const events: BrandPromotionalEvent[] = eventsResult.rows.map((row: any) => ({
|
|
||||||
product_name: row.product_name,
|
|
||||||
store_product_id: parseInt(row.store_product_id),
|
|
||||||
dispensary_id: parseInt(row.dispensary_id),
|
|
||||||
dispensary_name: row.dispensary_name,
|
|
||||||
state_code: row.state_code || 'Unknown',
|
|
||||||
category: row.category,
|
|
||||||
special_start: row.special_start.toISOString().split('T')[0],
|
|
||||||
special_end: row.special_end ? row.special_end.toISOString().split('T')[0] : null,
|
|
||||||
duration_days: row.duration_days ? parseInt(row.duration_days) : null,
|
|
||||||
regular_price: parseFloat(row.regular_price) || 0,
|
|
||||||
special_price: parseFloat(row.special_price) || 0,
|
|
||||||
discount_percent: parseFloat(row.discount_percent) || 0,
|
|
||||||
quantity_at_start: row.quantity_at_start ? parseInt(row.quantity_at_start) : null,
|
|
||||||
quantity_at_end: row.quantity_at_end ? parseInt(row.quantity_at_end) : null,
|
|
||||||
quantity_sold_estimate: row.quantity_sold_estimate ? parseInt(row.quantity_sold_estimate) : null,
|
|
||||||
}));
|
|
||||||
|
|
||||||
// Calculate summary stats
|
|
||||||
const totalEvents = events.length;
|
|
||||||
const uniqueProducts = new Set(events.map(e => e.store_product_id)).size;
|
|
||||||
const uniqueDispensaries = new Set(events.map(e => e.dispensary_id)).size;
|
|
||||||
const uniqueStates = [...new Set(events.map(e => e.state_code))];
|
|
||||||
|
|
||||||
const avgDiscount = totalEvents > 0
|
|
||||||
? events.reduce((sum, e) => sum + e.discount_percent, 0) / totalEvents
|
|
||||||
: 0;
|
|
||||||
|
|
||||||
const durations = events.filter(e => e.duration_days !== null).map(e => e.duration_days!);
|
|
||||||
const avgDuration = durations.length > 0
|
|
||||||
? durations.reduce((sum, d) => sum + d, 0) / durations.length
|
|
||||||
: null;
|
|
||||||
|
|
||||||
const totalQuantitySold = events
|
|
||||||
.filter(e => e.quantity_sold_estimate !== null)
|
|
||||||
.reduce((sum, e) => sum + (e.quantity_sold_estimate || 0), 0);
|
|
||||||
|
|
||||||
// Calculate frequency
|
|
||||||
const windowDays = Math.ceil((end.getTime() - start.getTime()) / (1000 * 60 * 60 * 24));
|
|
||||||
const weeklyAvg = windowDays > 0 ? (totalEvents / windowDays) * 7 : 0;
|
|
||||||
const monthlyAvg = windowDays > 0 ? (totalEvents / windowDays) * 30 : 0;
|
|
||||||
|
|
||||||
// Group by category
|
|
||||||
const categoryMap = new Map<string, { count: number; discounts: number[]; quantity: number }>();
|
|
||||||
for (const event of events) {
|
|
||||||
const cat = event.category || 'Uncategorized';
|
|
||||||
if (!categoryMap.has(cat)) {
|
|
||||||
categoryMap.set(cat, { count: 0, discounts: [], quantity: 0 });
|
|
||||||
}
|
|
||||||
const entry = categoryMap.get(cat)!;
|
|
||||||
entry.count++;
|
|
||||||
entry.discounts.push(event.discount_percent);
|
|
||||||
if (event.quantity_sold_estimate !== null) {
|
|
||||||
entry.quantity += event.quantity_sold_estimate;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const byCategory = Array.from(categoryMap.entries()).map(([category, data]) => ({
|
|
||||||
category,
|
|
||||||
event_count: data.count,
|
|
||||||
avg_discount_percent: data.discounts.length > 0
|
|
||||||
? Math.round((data.discounts.reduce((a, b) => a + b, 0) / data.discounts.length) * 10) / 10
|
|
||||||
: 0,
|
|
||||||
quantity_sold_estimate: data.quantity > 0 ? data.quantity : null,
|
|
||||||
})).sort((a, b) => b.event_count - a.event_count);
|
|
||||||
|
|
||||||
return {
|
|
||||||
brand_name: brandName,
|
|
||||||
window,
|
|
||||||
total_promotional_events: totalEvents,
|
|
||||||
total_products_on_special: uniqueProducts,
|
|
||||||
total_dispensaries_with_specials: uniqueDispensaries,
|
|
||||||
states_with_specials: uniqueStates,
|
|
||||||
avg_discount_percent: Math.round(avgDiscount * 10) / 10,
|
|
||||||
avg_duration_days: avgDuration !== null ? Math.round(avgDuration * 10) / 10 : null,
|
|
||||||
total_quantity_sold_estimate: totalQuantitySold > 0 ? totalQuantitySold : null,
|
|
||||||
promotional_frequency: {
|
|
||||||
weekly_avg: Math.round(weeklyAvg * 10) / 10,
|
|
||||||
monthly_avg: Math.round(monthlyAvg * 10) / 10,
|
|
||||||
},
|
|
||||||
by_category: byCategory,
|
|
||||||
events,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export default BrandPenetrationService;
|
export default BrandPenetrationService;
|
||||||
|
|||||||
@@ -43,14 +43,14 @@ export class CategoryAnalyticsService {
|
|||||||
// Get current category metrics
|
// Get current category metrics
|
||||||
const currentResult = await this.pool.query(`
|
const currentResult = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.category_raw,
|
sp.category,
|
||||||
COUNT(*) AS sku_count,
|
COUNT(*) AS sku_count,
|
||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
AVG(sp.price_rec) AS avg_price
|
AVG(sp.price_rec) AS avg_price
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
WHERE sp.category_raw = $1
|
WHERE sp.category = $1
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
GROUP BY sp.category_raw
|
GROUP BY sp.category
|
||||||
`, [category]);
|
`, [category]);
|
||||||
|
|
||||||
if (currentResult.rows.length === 0) {
|
if (currentResult.rows.length === 0) {
|
||||||
@@ -70,7 +70,7 @@ export class CategoryAnalyticsService {
|
|||||||
COUNT(DISTINCT sps.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sps.dispensary_id) AS dispensary_count,
|
||||||
AVG(sps.price_rec) AS avg_price
|
AVG(sps.price_rec) AS avg_price
|
||||||
FROM store_product_snapshots sps
|
FROM store_product_snapshots sps
|
||||||
WHERE sps.category_raw = $1
|
WHERE sps.category = $1
|
||||||
AND sps.captured_at >= $2
|
AND sps.captured_at >= $2
|
||||||
AND sps.captured_at <= $3
|
AND sps.captured_at <= $3
|
||||||
AND sps.is_in_stock = TRUE
|
AND sps.is_in_stock = TRUE
|
||||||
@@ -111,9 +111,8 @@ export class CategoryAnalyticsService {
|
|||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
AVG(sp.price_rec) AS avg_price
|
AVG(sp.price_rec) AS avg_price
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
JOIN states s ON s.id = sp.state_id
|
||||||
JOIN states s ON s.id = d.state_id
|
WHERE sp.category = $1
|
||||||
WHERE sp.category_raw = $1
|
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
GROUP BY s.code, s.name, s.recreational_legal
|
GROUP BY s.code, s.name, s.recreational_legal
|
||||||
ORDER BY sku_count DESC
|
ORDER BY sku_count DESC
|
||||||
@@ -155,25 +154,24 @@ export class CategoryAnalyticsService {
|
|||||||
|
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.category_raw,
|
sp.category,
|
||||||
COUNT(*) AS sku_count,
|
COUNT(*) AS sku_count,
|
||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
COUNT(DISTINCT sp.brand_name_raw) AS brand_count,
|
COUNT(DISTINCT sp.brand_name) AS brand_count,
|
||||||
AVG(sp.price_rec) AS avg_price,
|
AVG(sp.price_rec) AS avg_price,
|
||||||
COUNT(DISTINCT s.code) AS state_count
|
COUNT(DISTINCT s.code) AS state_count
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
LEFT JOIN dispensaries d ON d.id = sp.dispensary_id
|
LEFT JOIN states s ON s.id = sp.state_id
|
||||||
JOIN states s ON s.id = d.state_id
|
WHERE sp.category IS NOT NULL
|
||||||
WHERE sp.category_raw IS NOT NULL
|
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
${stateFilter}
|
${stateFilter}
|
||||||
GROUP BY sp.category_raw
|
GROUP BY sp.category
|
||||||
ORDER BY sku_count DESC
|
ORDER BY sku_count DESC
|
||||||
LIMIT $1
|
LIMIT $1
|
||||||
`, params);
|
`, params);
|
||||||
|
|
||||||
return result.rows.map((row: any) => ({
|
return result.rows.map((row: any) => ({
|
||||||
category: row.category_raw,
|
category: row.category,
|
||||||
sku_count: parseInt(row.sku_count),
|
sku_count: parseInt(row.sku_count),
|
||||||
dispensary_count: parseInt(row.dispensary_count),
|
dispensary_count: parseInt(row.dispensary_count),
|
||||||
brand_count: parseInt(row.brand_count),
|
brand_count: parseInt(row.brand_count),
|
||||||
@@ -190,14 +188,14 @@ export class CategoryAnalyticsService {
|
|||||||
let categoryFilter = '';
|
let categoryFilter = '';
|
||||||
|
|
||||||
if (category) {
|
if (category) {
|
||||||
categoryFilter = 'WHERE sp.category_raw = $1';
|
categoryFilter = 'WHERE sp.category = $1';
|
||||||
params.push(category);
|
params.push(category);
|
||||||
}
|
}
|
||||||
|
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
WITH category_stats AS (
|
WITH category_stats AS (
|
||||||
SELECT
|
SELECT
|
||||||
sp.category_raw,
|
sp.category,
|
||||||
CASE WHEN s.recreational_legal = TRUE THEN 'recreational' ELSE 'medical_only' END AS legal_type,
|
CASE WHEN s.recreational_legal = TRUE THEN 'recreational' ELSE 'medical_only' END AS legal_type,
|
||||||
COUNT(DISTINCT s.code) AS state_count,
|
COUNT(DISTINCT s.code) AS state_count,
|
||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
@@ -205,14 +203,13 @@ export class CategoryAnalyticsService {
|
|||||||
AVG(sp.price_rec) AS avg_price,
|
AVG(sp.price_rec) AS avg_price,
|
||||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price
|
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
JOIN states s ON s.id = sp.state_id
|
||||||
JOIN states s ON s.id = d.state_id
|
|
||||||
${categoryFilter}
|
${categoryFilter}
|
||||||
${category ? 'AND' : 'WHERE'} sp.category_raw IS NOT NULL
|
${category ? 'AND' : 'WHERE'} sp.category IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND sp.price_rec IS NOT NULL
|
AND sp.price_rec IS NOT NULL
|
||||||
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
|
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
|
||||||
GROUP BY sp.category_raw, CASE WHEN s.recreational_legal = TRUE THEN 'recreational' ELSE 'medical_only' END
|
GROUP BY sp.category, CASE WHEN s.recreational_legal = TRUE THEN 'recreational' ELSE 'medical_only' END
|
||||||
),
|
),
|
||||||
rec_stats AS (
|
rec_stats AS (
|
||||||
SELECT * FROM category_stats WHERE legal_type = 'recreational'
|
SELECT * FROM category_stats WHERE legal_type = 'recreational'
|
||||||
@@ -221,7 +218,7 @@ export class CategoryAnalyticsService {
|
|||||||
SELECT * FROM category_stats WHERE legal_type = 'medical_only'
|
SELECT * FROM category_stats WHERE legal_type = 'medical_only'
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
COALESCE(r.category_raw, m.category_raw) AS category,
|
COALESCE(r.category, m.category) AS category,
|
||||||
r.state_count AS rec_state_count,
|
r.state_count AS rec_state_count,
|
||||||
r.dispensary_count AS rec_dispensary_count,
|
r.dispensary_count AS rec_dispensary_count,
|
||||||
r.sku_count AS rec_sku_count,
|
r.sku_count AS rec_sku_count,
|
||||||
@@ -238,7 +235,7 @@ export class CategoryAnalyticsService {
|
|||||||
ELSE NULL
|
ELSE NULL
|
||||||
END AS price_diff_percent
|
END AS price_diff_percent
|
||||||
FROM rec_stats r
|
FROM rec_stats r
|
||||||
FULL OUTER JOIN med_stats m ON r.category_raw = m.category_raw
|
FULL OUTER JOIN med_stats m ON r.category = m.category
|
||||||
ORDER BY COALESCE(r.sku_count, 0) + COALESCE(m.sku_count, 0) DESC
|
ORDER BY COALESCE(r.sku_count, 0) + COALESCE(m.sku_count, 0) DESC
|
||||||
`, params);
|
`, params);
|
||||||
|
|
||||||
@@ -285,7 +282,7 @@ export class CategoryAnalyticsService {
|
|||||||
COUNT(*) AS sku_count,
|
COUNT(*) AS sku_count,
|
||||||
COUNT(DISTINCT sps.dispensary_id) AS dispensary_count
|
COUNT(DISTINCT sps.dispensary_id) AS dispensary_count
|
||||||
FROM store_product_snapshots sps
|
FROM store_product_snapshots sps
|
||||||
WHERE sps.category_raw = $1
|
WHERE sps.category = $1
|
||||||
AND sps.captured_at >= $2
|
AND sps.captured_at >= $2
|
||||||
AND sps.captured_at <= $3
|
AND sps.captured_at <= $3
|
||||||
AND sps.is_in_stock = TRUE
|
AND sps.is_in_stock = TRUE
|
||||||
@@ -338,33 +335,31 @@ export class CategoryAnalyticsService {
|
|||||||
WITH category_total AS (
|
WITH category_total AS (
|
||||||
SELECT COUNT(*) AS total
|
SELECT COUNT(*) AS total
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
LEFT JOIN dispensaries d ON d.id = sp.dispensary_id
|
LEFT JOIN states s ON s.id = sp.state_id
|
||||||
JOIN states s ON s.id = d.state_id
|
WHERE sp.category = $1
|
||||||
WHERE sp.category_raw = $1
|
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND sp.brand_name_raw IS NOT NULL
|
AND sp.brand_name IS NOT NULL
|
||||||
${stateFilter}
|
${stateFilter}
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
sp.brand_name_raw,
|
sp.brand_name,
|
||||||
COUNT(*) AS sku_count,
|
COUNT(*) AS sku_count,
|
||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
AVG(sp.price_rec) AS avg_price,
|
AVG(sp.price_rec) AS avg_price,
|
||||||
ROUND(COUNT(*)::NUMERIC * 100 / NULLIF((SELECT total FROM category_total), 0), 2) AS category_share_percent
|
ROUND(COUNT(*)::NUMERIC * 100 / NULLIF((SELECT total FROM category_total), 0), 2) AS category_share_percent
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
LEFT JOIN dispensaries d ON d.id = sp.dispensary_id
|
LEFT JOIN states s ON s.id = sp.state_id
|
||||||
JOIN states s ON s.id = d.state_id
|
WHERE sp.category = $1
|
||||||
WHERE sp.category_raw = $1
|
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND sp.brand_name_raw IS NOT NULL
|
AND sp.brand_name IS NOT NULL
|
||||||
${stateFilter}
|
${stateFilter}
|
||||||
GROUP BY sp.brand_name_raw
|
GROUP BY sp.brand_name
|
||||||
ORDER BY sku_count DESC
|
ORDER BY sku_count DESC
|
||||||
LIMIT $2
|
LIMIT $2
|
||||||
`, params);
|
`, params);
|
||||||
|
|
||||||
return result.rows.map((row: any) => ({
|
return result.rows.map((row: any) => ({
|
||||||
brand_name: row.brand_name_raw,
|
brand_name: row.brand_name,
|
||||||
sku_count: parseInt(row.sku_count),
|
sku_count: parseInt(row.sku_count),
|
||||||
dispensary_count: parseInt(row.dispensary_count),
|
dispensary_count: parseInt(row.dispensary_count),
|
||||||
avg_price: row.avg_price ? parseFloat(row.avg_price) : null,
|
avg_price: row.avg_price ? parseFloat(row.avg_price) : null,
|
||||||
@@ -426,7 +421,7 @@ export class CategoryAnalyticsService {
|
|||||||
`, [start, end, limit]);
|
`, [start, end, limit]);
|
||||||
|
|
||||||
return result.rows.map((row: any) => ({
|
return result.rows.map((row: any) => ({
|
||||||
category: row.category_raw,
|
category: row.category,
|
||||||
start_sku_count: parseInt(row.start_sku_count),
|
start_sku_count: parseInt(row.start_sku_count),
|
||||||
end_sku_count: parseInt(row.end_sku_count),
|
end_sku_count: parseInt(row.end_sku_count),
|
||||||
growth: parseInt(row.growth),
|
growth: parseInt(row.growth),
|
||||||
|
|||||||
@@ -43,9 +43,9 @@ export class PriceAnalyticsService {
|
|||||||
const productResult = await this.pool.query(`
|
const productResult = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.id,
|
sp.id,
|
||||||
sp.name_raw,
|
sp.name,
|
||||||
sp.brand_name_raw,
|
sp.brand_name,
|
||||||
sp.category_raw,
|
sp.category,
|
||||||
sp.dispensary_id,
|
sp.dispensary_id,
|
||||||
sp.price_rec,
|
sp.price_rec,
|
||||||
sp.price_med,
|
sp.price_med,
|
||||||
@@ -53,7 +53,7 @@ export class PriceAnalyticsService {
|
|||||||
s.code AS state_code
|
s.code AS state_code
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
JOIN states s ON s.id = d.state_id
|
LEFT JOIN states s ON s.id = sp.state_id
|
||||||
WHERE sp.id = $1
|
WHERE sp.id = $1
|
||||||
`, [storeProductId]);
|
`, [storeProductId]);
|
||||||
|
|
||||||
@@ -133,7 +133,7 @@ export class PriceAnalyticsService {
|
|||||||
|
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.category_raw,
|
sp.category,
|
||||||
s.code AS state_code,
|
s.code AS state_code,
|
||||||
s.name AS state_name,
|
s.name AS state_name,
|
||||||
CASE
|
CASE
|
||||||
@@ -148,18 +148,18 @@ export class PriceAnalyticsService {
|
|||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
JOIN states s ON s.id = d.state_id
|
JOIN states s ON s.id = sp.state_id
|
||||||
WHERE sp.category_raw = $1
|
WHERE sp.category = $1
|
||||||
AND sp.price_rec IS NOT NULL
|
AND sp.price_rec IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
|
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
|
||||||
${stateFilter}
|
${stateFilter}
|
||||||
GROUP BY sp.category_raw, s.code, s.name, s.recreational_legal
|
GROUP BY sp.category, s.code, s.name, s.recreational_legal
|
||||||
ORDER BY state_code
|
ORDER BY state_code
|
||||||
`, params);
|
`, params);
|
||||||
|
|
||||||
return result.rows.map((row: any) => ({
|
return result.rows.map((row: any) => ({
|
||||||
category: row.category_raw,
|
category: row.category,
|
||||||
state_code: row.state_code,
|
state_code: row.state_code,
|
||||||
state_name: row.state_name,
|
state_name: row.state_name,
|
||||||
legal_type: row.legal_type,
|
legal_type: row.legal_type,
|
||||||
@@ -189,7 +189,7 @@ export class PriceAnalyticsService {
|
|||||||
|
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.brand_name_raw AS category,
|
sp.brand_name AS category,
|
||||||
s.code AS state_code,
|
s.code AS state_code,
|
||||||
s.name AS state_name,
|
s.name AS state_name,
|
||||||
CASE
|
CASE
|
||||||
@@ -204,18 +204,18 @@ export class PriceAnalyticsService {
|
|||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
JOIN states s ON s.id = d.state_id
|
JOIN states s ON s.id = sp.state_id
|
||||||
WHERE sp.brand_name_raw = $1
|
WHERE sp.brand_name = $1
|
||||||
AND sp.price_rec IS NOT NULL
|
AND sp.price_rec IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
|
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
|
||||||
${stateFilter}
|
${stateFilter}
|
||||||
GROUP BY sp.brand_name_raw, s.code, s.name, s.recreational_legal
|
GROUP BY sp.brand_name, s.code, s.name, s.recreational_legal
|
||||||
ORDER BY state_code
|
ORDER BY state_code
|
||||||
`, params);
|
`, params);
|
||||||
|
|
||||||
return result.rows.map((row: any) => ({
|
return result.rows.map((row: any) => ({
|
||||||
category: row.category_raw,
|
category: row.category,
|
||||||
state_code: row.state_code,
|
state_code: row.state_code,
|
||||||
state_name: row.state_name,
|
state_name: row.state_name,
|
||||||
legal_type: row.legal_type,
|
legal_type: row.legal_type,
|
||||||
@@ -254,7 +254,7 @@ export class PriceAnalyticsService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (category) {
|
if (category) {
|
||||||
filters += ` AND sp.category_raw = $${paramIdx}`;
|
filters += ` AND sp.category = $${paramIdx}`;
|
||||||
params.push(category);
|
params.push(category);
|
||||||
paramIdx++;
|
paramIdx++;
|
||||||
}
|
}
|
||||||
@@ -288,16 +288,15 @@ export class PriceAnalyticsService {
|
|||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
v.store_product_id,
|
v.store_product_id,
|
||||||
sp.name_raw AS product_name,
|
sp.name AS product_name,
|
||||||
sp.brand_name_raw,
|
sp.brand_name,
|
||||||
v.change_count,
|
v.change_count,
|
||||||
v.avg_change_pct,
|
v.avg_change_pct,
|
||||||
v.max_change_pct,
|
v.max_change_pct,
|
||||||
v.last_change_at
|
v.last_change_at
|
||||||
FROM volatility v
|
FROM volatility v
|
||||||
JOIN store_products sp ON sp.id = v.store_product_id
|
JOIN store_products sp ON sp.id = v.store_product_id
|
||||||
LEFT JOIN dispensaries d ON d.id = sp.dispensary_id
|
LEFT JOIN states s ON s.id = sp.state_id
|
||||||
JOIN states s ON s.id = d.state_id
|
|
||||||
WHERE 1=1 ${filters}
|
WHERE 1=1 ${filters}
|
||||||
ORDER BY v.change_count DESC, v.avg_change_pct DESC
|
ORDER BY v.change_count DESC, v.avg_change_pct DESC
|
||||||
LIMIT $3
|
LIMIT $3
|
||||||
@@ -306,7 +305,7 @@ export class PriceAnalyticsService {
|
|||||||
return result.rows.map((row: any) => ({
|
return result.rows.map((row: any) => ({
|
||||||
store_product_id: row.store_product_id,
|
store_product_id: row.store_product_id,
|
||||||
product_name: row.product_name,
|
product_name: row.product_name,
|
||||||
brand_name: row.brand_name_raw,
|
brand_name: row.brand_name,
|
||||||
change_count: parseInt(row.change_count),
|
change_count: parseInt(row.change_count),
|
||||||
avg_change_percent: row.avg_change_pct ? parseFloat(row.avg_change_pct) : 0,
|
avg_change_percent: row.avg_change_pct ? parseFloat(row.avg_change_pct) : 0,
|
||||||
max_change_percent: row.max_change_pct ? parseFloat(row.max_change_pct) : 0,
|
max_change_percent: row.max_change_pct ? parseFloat(row.max_change_pct) : 0,
|
||||||
@@ -328,13 +327,13 @@ export class PriceAnalyticsService {
|
|||||||
let categoryFilter = '';
|
let categoryFilter = '';
|
||||||
|
|
||||||
if (category) {
|
if (category) {
|
||||||
categoryFilter = 'WHERE sp.category_raw = $1';
|
categoryFilter = 'WHERE sp.category = $1';
|
||||||
params.push(category);
|
params.push(category);
|
||||||
}
|
}
|
||||||
|
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.category_raw,
|
sp.category,
|
||||||
AVG(sp.price_rec) FILTER (WHERE s.recreational_legal = TRUE) AS rec_avg,
|
AVG(sp.price_rec) FILTER (WHERE s.recreational_legal = TRUE) AS rec_avg,
|
||||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)
|
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)
|
||||||
FILTER (WHERE s.recreational_legal = TRUE) AS rec_median,
|
FILTER (WHERE s.recreational_legal = TRUE) AS rec_median,
|
||||||
@@ -344,18 +343,17 @@ export class PriceAnalyticsService {
|
|||||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)
|
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)
|
||||||
FILTER (WHERE s.medical_legal = TRUE AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)) AS med_median
|
FILTER (WHERE s.medical_legal = TRUE AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)) AS med_median
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
JOIN states s ON s.id = sp.state_id
|
||||||
JOIN states s ON s.id = d.state_id
|
|
||||||
${categoryFilter}
|
${categoryFilter}
|
||||||
${category ? 'AND' : 'WHERE'} sp.price_rec IS NOT NULL
|
${category ? 'AND' : 'WHERE'} sp.price_rec IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND sp.category_raw IS NOT NULL
|
AND sp.category IS NOT NULL
|
||||||
GROUP BY sp.category_raw
|
GROUP BY sp.category
|
||||||
ORDER BY sp.category_raw
|
ORDER BY sp.category
|
||||||
`, params);
|
`, params);
|
||||||
|
|
||||||
return result.rows.map((row: any) => ({
|
return result.rows.map((row: any) => ({
|
||||||
category: row.category_raw,
|
category: row.category,
|
||||||
rec_avg: row.rec_avg ? parseFloat(row.rec_avg) : null,
|
rec_avg: row.rec_avg ? parseFloat(row.rec_avg) : null,
|
||||||
rec_median: row.rec_median ? parseFloat(row.rec_median) : null,
|
rec_median: row.rec_median ? parseFloat(row.rec_median) : null,
|
||||||
med_avg: row.med_avg ? parseFloat(row.med_avg) : null,
|
med_avg: row.med_avg ? parseFloat(row.med_avg) : null,
|
||||||
|
|||||||
@@ -108,14 +108,14 @@ export class StateAnalyticsService {
|
|||||||
SELECT
|
SELECT
|
||||||
COUNT(DISTINCT d.id) AS dispensary_count,
|
COUNT(DISTINCT d.id) AS dispensary_count,
|
||||||
COUNT(DISTINCT sp.id) AS product_count,
|
COUNT(DISTINCT sp.id) AS product_count,
|
||||||
COUNT(DISTINCT sp.brand_name_raw) FILTER (WHERE sp.brand_name_raw IS NOT NULL) AS brand_count,
|
COUNT(DISTINCT sp.brand_name) FILTER (WHERE sp.brand_name IS NOT NULL) AS brand_count,
|
||||||
COUNT(DISTINCT sp.category_raw) FILTER (WHERE sp.category_raw IS NOT NULL) AS category_count,
|
COUNT(DISTINCT sp.category) FILTER (WHERE sp.category IS NOT NULL) AS category_count,
|
||||||
COUNT(sps.id) AS snapshot_count,
|
COUNT(sps.id) AS snapshot_count,
|
||||||
MAX(sps.captured_at) AS last_crawl_at
|
MAX(sps.captured_at) AS last_crawl_at
|
||||||
FROM states s
|
FROM states s
|
||||||
LEFT JOIN dispensaries d ON d.state_id = s.id
|
LEFT JOIN dispensaries d ON d.state_id = s.id
|
||||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE
|
LEFT JOIN store_products sp ON sp.state_id = s.id AND sp.is_in_stock = TRUE
|
||||||
LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id
|
LEFT JOIN store_product_snapshots sps ON sps.state_id = s.id
|
||||||
WHERE s.code = $1
|
WHERE s.code = $1
|
||||||
`, [stateCode]);
|
`, [stateCode]);
|
||||||
|
|
||||||
@@ -129,8 +129,7 @@ export class StateAnalyticsService {
|
|||||||
MIN(price_rec) AS min_price,
|
MIN(price_rec) AS min_price,
|
||||||
MAX(price_rec) AS max_price
|
MAX(price_rec) AS max_price
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
JOIN states s ON s.id = sp.state_id
|
||||||
JOIN states s ON s.id = d.state_id
|
|
||||||
WHERE s.code = $1
|
WHERE s.code = $1
|
||||||
AND sp.price_rec IS NOT NULL
|
AND sp.price_rec IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
@@ -141,15 +140,14 @@ export class StateAnalyticsService {
|
|||||||
// Get top categories
|
// Get top categories
|
||||||
const topCategoriesResult = await this.pool.query(`
|
const topCategoriesResult = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.category_raw,
|
sp.category,
|
||||||
COUNT(*) AS count
|
COUNT(*) AS count
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
JOIN states s ON s.id = sp.state_id
|
||||||
JOIN states s ON s.id = d.state_id
|
|
||||||
WHERE s.code = $1
|
WHERE s.code = $1
|
||||||
AND sp.category_raw IS NOT NULL
|
AND sp.category IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
GROUP BY sp.category_raw
|
GROUP BY sp.category
|
||||||
ORDER BY count DESC
|
ORDER BY count DESC
|
||||||
LIMIT 10
|
LIMIT 10
|
||||||
`, [stateCode]);
|
`, [stateCode]);
|
||||||
@@ -157,15 +155,14 @@ export class StateAnalyticsService {
|
|||||||
// Get top brands
|
// Get top brands
|
||||||
const topBrandsResult = await this.pool.query(`
|
const topBrandsResult = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.brand_name_raw AS brand,
|
sp.brand_name AS brand,
|
||||||
COUNT(*) AS count
|
COUNT(*) AS count
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
JOIN states s ON s.id = sp.state_id
|
||||||
JOIN states s ON s.id = d.state_id
|
|
||||||
WHERE s.code = $1
|
WHERE s.code = $1
|
||||||
AND sp.brand_name_raw IS NOT NULL
|
AND sp.brand_name IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
GROUP BY sp.brand_name_raw
|
GROUP BY sp.brand_name
|
||||||
ORDER BY count DESC
|
ORDER BY count DESC
|
||||||
LIMIT 10
|
LIMIT 10
|
||||||
`, [stateCode]);
|
`, [stateCode]);
|
||||||
@@ -194,7 +191,7 @@ export class StateAnalyticsService {
|
|||||||
max_price: pricing.max_price ? parseFloat(pricing.max_price) : null,
|
max_price: pricing.max_price ? parseFloat(pricing.max_price) : null,
|
||||||
},
|
},
|
||||||
top_categories: topCategoriesResult.rows.map((row: any) => ({
|
top_categories: topCategoriesResult.rows.map((row: any) => ({
|
||||||
category: row.category_raw,
|
category: row.category,
|
||||||
count: parseInt(row.count),
|
count: parseInt(row.count),
|
||||||
})),
|
})),
|
||||||
top_brands: topBrandsResult.rows.map((row: any) => ({
|
top_brands: topBrandsResult.rows.map((row: any) => ({
|
||||||
@@ -218,8 +215,8 @@ export class StateAnalyticsService {
|
|||||||
COUNT(sps.id) AS snapshot_count
|
COUNT(sps.id) AS snapshot_count
|
||||||
FROM states s
|
FROM states s
|
||||||
LEFT JOIN dispensaries d ON d.state_id = s.id
|
LEFT JOIN dispensaries d ON d.state_id = s.id
|
||||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE
|
LEFT JOIN store_products sp ON sp.state_id = s.id AND sp.is_in_stock = TRUE
|
||||||
LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id
|
LEFT JOIN store_product_snapshots sps ON sps.state_id = s.id
|
||||||
WHERE s.recreational_legal = TRUE
|
WHERE s.recreational_legal = TRUE
|
||||||
GROUP BY s.code, s.name
|
GROUP BY s.code, s.name
|
||||||
ORDER BY dispensary_count DESC
|
ORDER BY dispensary_count DESC
|
||||||
@@ -235,8 +232,8 @@ export class StateAnalyticsService {
|
|||||||
COUNT(sps.id) AS snapshot_count
|
COUNT(sps.id) AS snapshot_count
|
||||||
FROM states s
|
FROM states s
|
||||||
LEFT JOIN dispensaries d ON d.state_id = s.id
|
LEFT JOIN dispensaries d ON d.state_id = s.id
|
||||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE
|
LEFT JOIN store_products sp ON sp.state_id = s.id AND sp.is_in_stock = TRUE
|
||||||
LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id
|
LEFT JOIN store_product_snapshots sps ON sps.state_id = s.id
|
||||||
WHERE s.medical_legal = TRUE
|
WHERE s.medical_legal = TRUE
|
||||||
AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
|
AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
|
||||||
GROUP BY s.code, s.name
|
GROUP BY s.code, s.name
|
||||||
@@ -298,48 +295,46 @@ export class StateAnalyticsService {
|
|||||||
let groupBy = 'NULL';
|
let groupBy = 'NULL';
|
||||||
|
|
||||||
if (category) {
|
if (category) {
|
||||||
categoryFilter = 'AND sp.category_raw = $1';
|
categoryFilter = 'AND sp.category = $1';
|
||||||
params.push(category);
|
params.push(category);
|
||||||
groupBy = 'sp.category_raw';
|
groupBy = 'sp.category';
|
||||||
} else {
|
} else {
|
||||||
groupBy = 'sp.category_raw';
|
groupBy = 'sp.category';
|
||||||
}
|
}
|
||||||
|
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
WITH rec_prices AS (
|
WITH rec_prices AS (
|
||||||
SELECT
|
SELECT
|
||||||
${category ? 'sp.category_raw' : 'sp.category_raw'},
|
${category ? 'sp.category' : 'sp.category'},
|
||||||
COUNT(DISTINCT s.code) AS state_count,
|
COUNT(DISTINCT s.code) AS state_count,
|
||||||
COUNT(*) AS product_count,
|
COUNT(*) AS product_count,
|
||||||
AVG(sp.price_rec) AS avg_price,
|
AVG(sp.price_rec) AS avg_price,
|
||||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price
|
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
JOIN states s ON s.id = sp.state_id
|
||||||
JOIN states s ON s.id = d.state_id
|
|
||||||
WHERE s.recreational_legal = TRUE
|
WHERE s.recreational_legal = TRUE
|
||||||
AND sp.price_rec IS NOT NULL
|
AND sp.price_rec IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND sp.category_raw IS NOT NULL
|
AND sp.category IS NOT NULL
|
||||||
${categoryFilter}
|
${categoryFilter}
|
||||||
GROUP BY sp.category_raw
|
GROUP BY sp.category
|
||||||
),
|
),
|
||||||
med_prices AS (
|
med_prices AS (
|
||||||
SELECT
|
SELECT
|
||||||
${category ? 'sp.category_raw' : 'sp.category_raw'},
|
${category ? 'sp.category' : 'sp.category'},
|
||||||
COUNT(DISTINCT s.code) AS state_count,
|
COUNT(DISTINCT s.code) AS state_count,
|
||||||
COUNT(*) AS product_count,
|
COUNT(*) AS product_count,
|
||||||
AVG(sp.price_rec) AS avg_price,
|
AVG(sp.price_rec) AS avg_price,
|
||||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price
|
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
JOIN states s ON s.id = sp.state_id
|
||||||
JOIN states s ON s.id = d.state_id
|
|
||||||
WHERE s.medical_legal = TRUE
|
WHERE s.medical_legal = TRUE
|
||||||
AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
|
AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
|
||||||
AND sp.price_rec IS NOT NULL
|
AND sp.price_rec IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND sp.category_raw IS NOT NULL
|
AND sp.category IS NOT NULL
|
||||||
${categoryFilter}
|
${categoryFilter}
|
||||||
GROUP BY sp.category_raw
|
GROUP BY sp.category
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
COALESCE(r.category, m.category) AS category,
|
COALESCE(r.category, m.category) AS category,
|
||||||
@@ -362,7 +357,7 @@ export class StateAnalyticsService {
|
|||||||
`, params);
|
`, params);
|
||||||
|
|
||||||
return result.rows.map((row: any) => ({
|
return result.rows.map((row: any) => ({
|
||||||
category: row.category_raw,
|
category: row.category,
|
||||||
recreational: {
|
recreational: {
|
||||||
state_count: parseInt(row.rec_state_count) || 0,
|
state_count: parseInt(row.rec_state_count) || 0,
|
||||||
product_count: parseInt(row.rec_product_count) || 0,
|
product_count: parseInt(row.rec_product_count) || 0,
|
||||||
@@ -400,12 +395,12 @@ export class StateAnalyticsService {
|
|||||||
COALESCE(s.medical_legal, FALSE) AS medical_legal,
|
COALESCE(s.medical_legal, FALSE) AS medical_legal,
|
||||||
COUNT(DISTINCT d.id) AS dispensary_count,
|
COUNT(DISTINCT d.id) AS dispensary_count,
|
||||||
COUNT(DISTINCT sp.id) AS product_count,
|
COUNT(DISTINCT sp.id) AS product_count,
|
||||||
COUNT(DISTINCT sp.brand_name_raw) FILTER (WHERE sp.brand_name_raw IS NOT NULL) AS brand_count,
|
COUNT(DISTINCT sp.brand_name) FILTER (WHERE sp.brand_name IS NOT NULL) AS brand_count,
|
||||||
MAX(sps.captured_at) AS last_crawl_at
|
MAX(sps.captured_at) AS last_crawl_at
|
||||||
FROM states s
|
FROM states s
|
||||||
LEFT JOIN dispensaries d ON d.state_id = s.id
|
LEFT JOIN dispensaries d ON d.state_id = s.id
|
||||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE
|
LEFT JOIN store_products sp ON sp.state_id = s.id AND sp.is_in_stock = TRUE
|
||||||
LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id
|
LEFT JOIN store_product_snapshots sps ON sps.state_id = s.id
|
||||||
GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal
|
GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal
|
||||||
ORDER BY dispensary_count DESC, s.name
|
ORDER BY dispensary_count DESC, s.name
|
||||||
`);
|
`);
|
||||||
@@ -456,8 +451,8 @@ export class StateAnalyticsService {
|
|||||||
END AS gap_reason
|
END AS gap_reason
|
||||||
FROM states s
|
FROM states s
|
||||||
LEFT JOIN dispensaries d ON d.state_id = s.id
|
LEFT JOIN dispensaries d ON d.state_id = s.id
|
||||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE
|
LEFT JOIN store_products sp ON sp.state_id = s.id AND sp.is_in_stock = TRUE
|
||||||
LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id
|
LEFT JOIN store_product_snapshots sps ON sps.state_id = s.id
|
||||||
WHERE s.recreational_legal = TRUE OR s.medical_legal = TRUE
|
WHERE s.recreational_legal = TRUE OR s.medical_legal = TRUE
|
||||||
GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal
|
GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal
|
||||||
HAVING COUNT(DISTINCT d.id) = 0
|
HAVING COUNT(DISTINCT d.id) = 0
|
||||||
@@ -504,8 +499,7 @@ export class StateAnalyticsService {
|
|||||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price,
|
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price,
|
||||||
COUNT(*) AS product_count
|
COUNT(*) AS product_count
|
||||||
FROM states s
|
FROM states s
|
||||||
JOIN dispensaries d ON d.state_id = s.id
|
JOIN store_products sp ON sp.state_id = s.id
|
||||||
JOIN store_products sp ON sp.dispensary_id = d.id
|
|
||||||
WHERE sp.price_rec IS NOT NULL
|
WHERE sp.price_rec IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
|
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
|
||||||
|
|||||||
@@ -89,22 +89,22 @@ export class StoreAnalyticsService {
|
|||||||
// Get brands added/dropped
|
// Get brands added/dropped
|
||||||
const brandsResult = await this.pool.query(`
|
const brandsResult = await this.pool.query(`
|
||||||
WITH start_brands AS (
|
WITH start_brands AS (
|
||||||
SELECT DISTINCT brand_name_raw
|
SELECT DISTINCT brand_name
|
||||||
FROM store_product_snapshots
|
FROM store_product_snapshots
|
||||||
WHERE dispensary_id = $1
|
WHERE dispensary_id = $1
|
||||||
AND captured_at >= $2::timestamp AND captured_at < $2::timestamp + INTERVAL '1 day'
|
AND captured_at >= $2 AND captured_at < $2 + INTERVAL '1 day'
|
||||||
AND brand_name_raw IS NOT NULL
|
AND brand_name IS NOT NULL
|
||||||
),
|
),
|
||||||
end_brands AS (
|
end_brands AS (
|
||||||
SELECT DISTINCT brand_name_raw
|
SELECT DISTINCT brand_name
|
||||||
FROM store_product_snapshots
|
FROM store_product_snapshots
|
||||||
WHERE dispensary_id = $1
|
WHERE dispensary_id = $1
|
||||||
AND captured_at >= $3::timestamp - INTERVAL '1 day' AND captured_at <= $3::timestamp
|
AND captured_at >= $3 - INTERVAL '1 day' AND captured_at <= $3
|
||||||
AND brand_name_raw IS NOT NULL
|
AND brand_name IS NOT NULL
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
ARRAY(SELECT brand_name_raw FROM end_brands EXCEPT SELECT brand_name_raw FROM start_brands) AS added,
|
ARRAY(SELECT brand_name FROM end_brands EXCEPT SELECT brand_name FROM start_brands) AS added,
|
||||||
ARRAY(SELECT brand_name_raw FROM start_brands EXCEPT SELECT brand_name_raw FROM end_brands) AS dropped
|
ARRAY(SELECT brand_name FROM start_brands EXCEPT SELECT brand_name FROM end_brands) AS dropped
|
||||||
`, [dispensaryId, start, end]);
|
`, [dispensaryId, start, end]);
|
||||||
|
|
||||||
const brands = brandsResult.rows[0] || { added: [], dropped: [] };
|
const brands = brandsResult.rows[0] || { added: [], dropped: [] };
|
||||||
@@ -184,9 +184,9 @@ export class StoreAnalyticsService {
|
|||||||
-- Products added
|
-- Products added
|
||||||
SELECT
|
SELECT
|
||||||
sp.id AS store_product_id,
|
sp.id AS store_product_id,
|
||||||
sp.name_raw AS product_name,
|
sp.name AS product_name,
|
||||||
sp.brand_name_raw,
|
sp.brand_name,
|
||||||
sp.category_raw,
|
sp.category,
|
||||||
'added' AS event_type,
|
'added' AS event_type,
|
||||||
sp.first_seen_at AS event_date,
|
sp.first_seen_at AS event_date,
|
||||||
NULL::TEXT AS old_value,
|
NULL::TEXT AS old_value,
|
||||||
@@ -201,9 +201,9 @@ export class StoreAnalyticsService {
|
|||||||
-- Stock in/out from snapshots
|
-- Stock in/out from snapshots
|
||||||
SELECT
|
SELECT
|
||||||
sps.store_product_id,
|
sps.store_product_id,
|
||||||
sp.name_raw AS product_name,
|
sp.name AS product_name,
|
||||||
sp.brand_name_raw,
|
sp.brand_name,
|
||||||
sp.category_raw,
|
sp.category,
|
||||||
CASE
|
CASE
|
||||||
WHEN sps.is_in_stock = TRUE AND LAG(sps.is_in_stock) OVER w = FALSE THEN 'stock_in'
|
WHEN sps.is_in_stock = TRUE AND LAG(sps.is_in_stock) OVER w = FALSE THEN 'stock_in'
|
||||||
WHEN sps.is_in_stock = FALSE AND LAG(sps.is_in_stock) OVER w = TRUE THEN 'stock_out'
|
WHEN sps.is_in_stock = FALSE AND LAG(sps.is_in_stock) OVER w = TRUE THEN 'stock_out'
|
||||||
@@ -224,9 +224,9 @@ export class StoreAnalyticsService {
|
|||||||
-- Price changes from snapshots
|
-- Price changes from snapshots
|
||||||
SELECT
|
SELECT
|
||||||
sps.store_product_id,
|
sps.store_product_id,
|
||||||
sp.name_raw AS product_name,
|
sp.name AS product_name,
|
||||||
sp.brand_name_raw,
|
sp.brand_name,
|
||||||
sp.category_raw,
|
sp.category,
|
||||||
'price_change' AS event_type,
|
'price_change' AS event_type,
|
||||||
sps.captured_at AS event_date,
|
sps.captured_at AS event_date,
|
||||||
LAG(sps.price_rec::TEXT) OVER w AS old_value,
|
LAG(sps.price_rec::TEXT) OVER w AS old_value,
|
||||||
@@ -250,8 +250,8 @@ export class StoreAnalyticsService {
|
|||||||
return result.rows.map((row: any) => ({
|
return result.rows.map((row: any) => ({
|
||||||
store_product_id: row.store_product_id,
|
store_product_id: row.store_product_id,
|
||||||
product_name: row.product_name,
|
product_name: row.product_name,
|
||||||
brand_name: row.brand_name_raw,
|
brand_name: row.brand_name,
|
||||||
category: row.category_raw,
|
category: row.category,
|
||||||
event_type: row.event_type,
|
event_type: row.event_type,
|
||||||
event_date: row.event_date ? row.event_date.toISOString() : null,
|
event_date: row.event_date ? row.event_date.toISOString() : null,
|
||||||
old_value: row.old_value,
|
old_value: row.old_value,
|
||||||
@@ -259,122 +259,6 @@ export class StoreAnalyticsService {
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Get quantity changes for a store (increases/decreases)
|
|
||||||
* Useful for estimating sales (decreases) or restocks (increases)
|
|
||||||
*
|
|
||||||
* @param direction - 'decrease' for likely sales, 'increase' for restocks, 'all' for both
|
|
||||||
*/
|
|
||||||
async getQuantityChanges(
|
|
||||||
dispensaryId: number,
|
|
||||||
options: {
|
|
||||||
window?: TimeWindow;
|
|
||||||
customRange?: DateRange;
|
|
||||||
direction?: 'increase' | 'decrease' | 'all';
|
|
||||||
limit?: number;
|
|
||||||
} = {}
|
|
||||||
): Promise<{
|
|
||||||
dispensary_id: number;
|
|
||||||
window: TimeWindow;
|
|
||||||
direction: string;
|
|
||||||
total_changes: number;
|
|
||||||
total_units_decreased: number;
|
|
||||||
total_units_increased: number;
|
|
||||||
changes: Array<{
|
|
||||||
store_product_id: number;
|
|
||||||
product_name: string;
|
|
||||||
brand_name: string | null;
|
|
||||||
category: string | null;
|
|
||||||
old_quantity: number;
|
|
||||||
new_quantity: number;
|
|
||||||
quantity_delta: number;
|
|
||||||
direction: 'increase' | 'decrease';
|
|
||||||
captured_at: string;
|
|
||||||
}>;
|
|
||||||
}> {
|
|
||||||
const { window = '7d', customRange, direction = 'all', limit = 100 } = options;
|
|
||||||
const { start, end } = getDateRangeFromWindow(window, customRange);
|
|
||||||
|
|
||||||
// Build direction filter
|
|
||||||
let directionFilter = '';
|
|
||||||
if (direction === 'decrease') {
|
|
||||||
directionFilter = 'AND qty_delta < 0';
|
|
||||||
} else if (direction === 'increase') {
|
|
||||||
directionFilter = 'AND qty_delta > 0';
|
|
||||||
}
|
|
||||||
|
|
||||||
const result = await this.pool.query(`
|
|
||||||
WITH qty_changes AS (
|
|
||||||
SELECT
|
|
||||||
sps.store_product_id,
|
|
||||||
sp.name_raw AS product_name,
|
|
||||||
sp.brand_name_raw AS brand_name,
|
|
||||||
sp.category_raw AS category,
|
|
||||||
LAG(sps.stock_quantity) OVER w AS old_quantity,
|
|
||||||
sps.stock_quantity AS new_quantity,
|
|
||||||
sps.stock_quantity - LAG(sps.stock_quantity) OVER w AS qty_delta,
|
|
||||||
sps.captured_at
|
|
||||||
FROM store_product_snapshots sps
|
|
||||||
JOIN store_products sp ON sp.id = sps.store_product_id
|
|
||||||
WHERE sps.dispensary_id = $1
|
|
||||||
AND sps.captured_at >= $2
|
|
||||||
AND sps.captured_at <= $3
|
|
||||||
AND sps.stock_quantity IS NOT NULL
|
|
||||||
WINDOW w AS (PARTITION BY sps.store_product_id ORDER BY sps.captured_at)
|
|
||||||
)
|
|
||||||
SELECT *
|
|
||||||
FROM qty_changes
|
|
||||||
WHERE old_quantity IS NOT NULL
|
|
||||||
AND qty_delta != 0
|
|
||||||
${directionFilter}
|
|
||||||
ORDER BY captured_at DESC
|
|
||||||
LIMIT $4
|
|
||||||
`, [dispensaryId, start, end, limit]);
|
|
||||||
|
|
||||||
// Calculate totals
|
|
||||||
const totalsResult = await this.pool.query(`
|
|
||||||
WITH qty_changes AS (
|
|
||||||
SELECT
|
|
||||||
sps.stock_quantity - LAG(sps.stock_quantity) OVER w AS qty_delta
|
|
||||||
FROM store_product_snapshots sps
|
|
||||||
WHERE sps.dispensary_id = $1
|
|
||||||
AND sps.captured_at >= $2
|
|
||||||
AND sps.captured_at <= $3
|
|
||||||
AND sps.stock_quantity IS NOT NULL
|
|
||||||
AND sps.store_product_id IS NOT NULL
|
|
||||||
WINDOW w AS (PARTITION BY sps.store_product_id ORDER BY sps.captured_at)
|
|
||||||
)
|
|
||||||
SELECT
|
|
||||||
COUNT(*) FILTER (WHERE qty_delta != 0) AS total_changes,
|
|
||||||
COALESCE(SUM(ABS(qty_delta)) FILTER (WHERE qty_delta < 0), 0) AS units_decreased,
|
|
||||||
COALESCE(SUM(qty_delta) FILTER (WHERE qty_delta > 0), 0) AS units_increased
|
|
||||||
FROM qty_changes
|
|
||||||
WHERE qty_delta IS NOT NULL
|
|
||||||
`, [dispensaryId, start, end]);
|
|
||||||
|
|
||||||
const totals = totalsResult.rows[0] || {};
|
|
||||||
|
|
||||||
return {
|
|
||||||
dispensary_id: dispensaryId,
|
|
||||||
window,
|
|
||||||
direction,
|
|
||||||
total_changes: parseInt(totals.total_changes) || 0,
|
|
||||||
total_units_decreased: parseInt(totals.units_decreased) || 0,
|
|
||||||
total_units_increased: parseInt(totals.units_increased) || 0,
|
|
||||||
changes: result.rows.map((row: any) => ({
|
|
||||||
store_product_id: row.store_product_id,
|
|
||||||
product_name: row.product_name,
|
|
||||||
brand_name: row.brand_name_raw,
|
|
||||||
category: row.category_raw,
|
|
||||||
old_quantity: row.old_quantity,
|
|
||||||
new_quantity: row.new_quantity,
|
|
||||||
quantity_delta: row.qty_delta,
|
|
||||||
direction: row.qty_delta > 0 ? 'increase' : 'decrease',
|
|
||||||
captured_at: row.captured_at?.toISOString() || null,
|
|
||||||
})),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get store inventory composition (categories and brands breakdown)
|
* Get store inventory composition (categories and brands breakdown)
|
||||||
*/
|
*/
|
||||||
@@ -415,14 +299,14 @@ export class StoreAnalyticsService {
|
|||||||
// Get top brands
|
// Get top brands
|
||||||
const brandsResult = await this.pool.query(`
|
const brandsResult = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
brand_name_raw AS brand,
|
brand_name AS brand,
|
||||||
COUNT(*) AS count,
|
COUNT(*) AS count,
|
||||||
ROUND(COUNT(*)::NUMERIC * 100 / NULLIF($2, 0), 2) AS percent
|
ROUND(COUNT(*)::NUMERIC * 100 / NULLIF($2, 0), 2) AS percent
|
||||||
FROM store_products
|
FROM store_products
|
||||||
WHERE dispensary_id = $1
|
WHERE dispensary_id = $1
|
||||||
AND brand_name_raw IS NOT NULL
|
AND brand_name IS NOT NULL
|
||||||
AND is_in_stock = TRUE
|
AND is_in_stock = TRUE
|
||||||
GROUP BY brand_name_raw
|
GROUP BY brand_name
|
||||||
ORDER BY count DESC
|
ORDER BY count DESC
|
||||||
LIMIT 20
|
LIMIT 20
|
||||||
`, [dispensaryId, totalProducts]);
|
`, [dispensaryId, totalProducts]);
|
||||||
@@ -432,7 +316,7 @@ export class StoreAnalyticsService {
|
|||||||
in_stock_count: parseInt(totals.in_stock) || 0,
|
in_stock_count: parseInt(totals.in_stock) || 0,
|
||||||
out_of_stock_count: parseInt(totals.out_of_stock) || 0,
|
out_of_stock_count: parseInt(totals.out_of_stock) || 0,
|
||||||
categories: categoriesResult.rows.map((row: any) => ({
|
categories: categoriesResult.rows.map((row: any) => ({
|
||||||
category: row.category_raw,
|
category: row.category,
|
||||||
count: parseInt(row.count),
|
count: parseInt(row.count),
|
||||||
percent: parseFloat(row.percent) || 0,
|
percent: parseFloat(row.percent) || 0,
|
||||||
})),
|
})),
|
||||||
@@ -574,24 +458,23 @@ export class StoreAnalyticsService {
|
|||||||
),
|
),
|
||||||
market_prices AS (
|
market_prices AS (
|
||||||
SELECT
|
SELECT
|
||||||
sp.category_raw,
|
sp.category,
|
||||||
AVG(sp.price_rec) AS market_avg
|
AVG(sp.price_rec) AS market_avg
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
WHERE sp.state_id = $2
|
||||||
WHERE d.state_id = $2
|
|
||||||
AND sp.price_rec IS NOT NULL
|
AND sp.price_rec IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND sp.category_raw IS NOT NULL
|
AND sp.category IS NOT NULL
|
||||||
GROUP BY sp.category_raw
|
GROUP BY sp.category
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
sp.category_raw,
|
sp.category,
|
||||||
sp.store_avg AS store_avg_price,
|
sp.store_avg AS store_avg_price,
|
||||||
mp.market_avg AS market_avg_price,
|
mp.market_avg AS market_avg_price,
|
||||||
ROUND(((sp.store_avg - mp.market_avg) / NULLIF(mp.market_avg, 0) * 100)::NUMERIC, 2) AS price_vs_market_percent,
|
ROUND(((sp.store_avg - mp.market_avg) / NULLIF(mp.market_avg, 0) * 100)::NUMERIC, 2) AS price_vs_market_percent,
|
||||||
sp.product_count
|
sp.product_count
|
||||||
FROM store_prices sp
|
FROM store_prices sp
|
||||||
LEFT JOIN market_prices mp ON mp.category = sp.category_raw
|
LEFT JOIN market_prices mp ON mp.category = sp.category
|
||||||
ORDER BY sp.product_count DESC
|
ORDER BY sp.product_count DESC
|
||||||
`, [dispensaryId, dispensary.state_id]);
|
`, [dispensaryId, dispensary.state_id]);
|
||||||
|
|
||||||
@@ -603,10 +486,9 @@ export class StoreAnalyticsService {
|
|||||||
WHERE dispensary_id = $1 AND price_rec IS NOT NULL AND is_in_stock = TRUE
|
WHERE dispensary_id = $1 AND price_rec IS NOT NULL AND is_in_stock = TRUE
|
||||||
),
|
),
|
||||||
market_avg AS (
|
market_avg AS (
|
||||||
SELECT AVG(sp.price_rec) AS avg
|
SELECT AVG(price_rec) AS avg
|
||||||
FROM store_products sp
|
FROM store_products
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
WHERE state_id = $2 AND price_rec IS NOT NULL AND is_in_stock = TRUE
|
||||||
WHERE d.state_id = $2 AND sp.price_rec IS NOT NULL AND sp.is_in_stock = TRUE
|
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
ROUND(((sa.avg - ma.avg) / NULLIF(ma.avg, 0) * 100)::NUMERIC, 2) AS price_vs_market
|
ROUND(((sa.avg - ma.avg) / NULLIF(ma.avg, 0) * 100)::NUMERIC, 2) AS price_vs_market
|
||||||
@@ -617,7 +499,7 @@ export class StoreAnalyticsService {
|
|||||||
dispensary_id: dispensaryId,
|
dispensary_id: dispensaryId,
|
||||||
dispensary_name: dispensary.name,
|
dispensary_name: dispensary.name,
|
||||||
categories: result.rows.map((row: any) => ({
|
categories: result.rows.map((row: any) => ({
|
||||||
category: row.category_raw,
|
category: row.category,
|
||||||
store_avg_price: parseFloat(row.store_avg_price),
|
store_avg_price: parseFloat(row.store_avg_price),
|
||||||
market_avg_price: row.market_avg_price ? parseFloat(row.market_avg_price) : 0,
|
market_avg_price: row.market_avg_price ? parseFloat(row.market_avg_price) : 0,
|
||||||
price_vs_market_percent: row.price_vs_market_percent ? parseFloat(row.price_vs_market_percent) : 0,
|
price_vs_market_percent: row.price_vs_market_percent ? parseFloat(row.price_vs_market_percent) : 0,
|
||||||
|
|||||||
@@ -11,4 +11,3 @@ export { BrandPenetrationService } from './BrandPenetrationService';
|
|||||||
export { CategoryAnalyticsService } from './CategoryAnalyticsService';
|
export { CategoryAnalyticsService } from './CategoryAnalyticsService';
|
||||||
export { StoreAnalyticsService } from './StoreAnalyticsService';
|
export { StoreAnalyticsService } from './StoreAnalyticsService';
|
||||||
export { StateAnalyticsService } from './StateAnalyticsService';
|
export { StateAnalyticsService } from './StateAnalyticsService';
|
||||||
export { BrandIntelligenceService } from './BrandIntelligenceService';
|
|
||||||
|
|||||||
@@ -322,48 +322,3 @@ export interface RecVsMedPriceComparison {
|
|||||||
};
|
};
|
||||||
price_diff_percent: number | null;
|
price_diff_percent: number | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// BRAND PROMOTIONAL ANALYTICS TYPES
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
export interface BrandPromotionalEvent {
|
|
||||||
product_name: string;
|
|
||||||
store_product_id: number;
|
|
||||||
dispensary_id: number;
|
|
||||||
dispensary_name: string;
|
|
||||||
state_code: string;
|
|
||||||
category: string | null;
|
|
||||||
special_start: string; // ISO date when special started
|
|
||||||
special_end: string | null; // ISO date when special ended (null if ongoing)
|
|
||||||
duration_days: number | null;
|
|
||||||
regular_price: number;
|
|
||||||
special_price: number;
|
|
||||||
discount_percent: number;
|
|
||||||
quantity_at_start: number | null;
|
|
||||||
quantity_at_end: number | null;
|
|
||||||
quantity_sold_estimate: number | null; // quantity_at_start - quantity_at_end
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface BrandPromotionalSummary {
|
|
||||||
brand_name: string;
|
|
||||||
window: TimeWindow;
|
|
||||||
total_promotional_events: number;
|
|
||||||
total_products_on_special: number;
|
|
||||||
total_dispensaries_with_specials: number;
|
|
||||||
states_with_specials: string[];
|
|
||||||
avg_discount_percent: number;
|
|
||||||
avg_duration_days: number | null;
|
|
||||||
total_quantity_sold_estimate: number | null;
|
|
||||||
promotional_frequency: {
|
|
||||||
weekly_avg: number;
|
|
||||||
monthly_avg: number;
|
|
||||||
};
|
|
||||||
by_category: Array<{
|
|
||||||
category: string;
|
|
||||||
event_count: number;
|
|
||||||
avg_discount_percent: number;
|
|
||||||
quantity_sold_estimate: number | null;
|
|
||||||
}>;
|
|
||||||
events: BrandPromotionalEvent[];
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,53 +1,49 @@
|
|||||||
/**
|
/**
|
||||||
* Crawl Rotator - Proxy & User Agent Rotation for Crawlers
|
* Crawl Rotator - Proxy & User Agent Rotation for Crawlers
|
||||||
*
|
*
|
||||||
* Updated: 2025-12-10 per workflow-12102025.md
|
* Manages rotation of proxies and user agents to avoid blocks.
|
||||||
*
|
* Used by platform-specific crawlers (Dutchie, Jane, etc.)
|
||||||
* KEY BEHAVIORS (per workflow-12102025.md):
|
|
||||||
* 1. Task determines WHAT work to do, proxy determines SESSION IDENTITY
|
|
||||||
* 2. Proxy location (timezone) sets Accept-Language headers (always English)
|
|
||||||
* 3. On 403: immediately get new IP, new fingerprint, retry
|
|
||||||
* 4. After 3 consecutive 403s on same proxy with different fingerprints → disable proxy
|
|
||||||
*
|
|
||||||
* USER-AGENT GENERATION (per workflow-12102025.md):
|
|
||||||
* - Device distribution: Mobile 62%, Desktop 36%, Tablet 2%
|
|
||||||
* - Browser whitelist: Chrome, Safari, Edge, Firefox only
|
|
||||||
* - UA sticks until IP rotates
|
|
||||||
* - Failure = alert admin + stop crawl (no fallback)
|
|
||||||
*
|
|
||||||
* Uses intoli/user-agents for realistic UA generation with daily-updated data.
|
|
||||||
*
|
*
|
||||||
* Canonical location: src/services/crawl-rotator.ts
|
* Canonical location: src/services/crawl-rotator.ts
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { Pool } from 'pg';
|
import { Pool } from 'pg';
|
||||||
import UserAgent from 'user-agents';
|
|
||||||
import {
|
|
||||||
HTTPFingerprint,
|
|
||||||
generateHTTPFingerprint,
|
|
||||||
BrowserType,
|
|
||||||
} from './http-fingerprint';
|
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// UA CONSTANTS (per workflow-12102025.md)
|
// USER AGENT CONFIGURATION
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Per workflow-12102025.md: Device category distribution (hardcoded)
|
* Modern browser user agents (Chrome, Firefox, Safari, Edge on various platforms)
|
||||||
* Mobile: 62%, Desktop: 36%, Tablet: 2%
|
* Updated: 2024
|
||||||
*/
|
*/
|
||||||
const DEVICE_WEIGHTS = {
|
export const USER_AGENTS = [
|
||||||
mobile: 62,
|
// Chrome on Windows
|
||||||
desktop: 36,
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||||
tablet: 2,
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
|
||||||
} as const;
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
|
||||||
|
|
||||||
/**
|
// Chrome on macOS
|
||||||
* Per workflow-12102025.md: Browser whitelist
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||||
* Only Chrome (67%), Safari (20%), Edge (6%), Firefox (3%)
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
|
||||||
* Samsung Internet, Opera, and other niche browsers are filtered out
|
|
||||||
*/
|
// Firefox on Windows
|
||||||
const ALLOWED_BROWSERS = ['Chrome', 'Safari', 'Edge', 'Firefox'] as const;
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
|
||||||
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0',
|
||||||
|
|
||||||
|
// Firefox on macOS
|
||||||
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0',
|
||||||
|
|
||||||
|
// Safari on macOS
|
||||||
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15',
|
||||||
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15',
|
||||||
|
|
||||||
|
// Edge on Windows
|
||||||
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
|
||||||
|
|
||||||
|
// Chrome on Linux
|
||||||
|
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||||
|
];
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// PROXY TYPES
|
// PROXY TYPES
|
||||||
@@ -65,13 +61,8 @@ export interface Proxy {
|
|||||||
failureCount: number;
|
failureCount: number;
|
||||||
successCount: number;
|
successCount: number;
|
||||||
avgResponseTimeMs: number | null;
|
avgResponseTimeMs: number | null;
|
||||||
maxConnections: number;
|
maxConnections: number; // Number of concurrent connections allowed (for rotating proxies)
|
||||||
/**
|
// Location info (if known)
|
||||||
* Per workflow-12102025.md: Track consecutive 403s with different fingerprints.
|
|
||||||
* After 3 consecutive 403s → disable proxy (it's burned).
|
|
||||||
*/
|
|
||||||
consecutive403Count: number;
|
|
||||||
// Location info - determines session headers per workflow-12102025.md
|
|
||||||
city?: string;
|
city?: string;
|
||||||
state?: string;
|
state?: string;
|
||||||
country?: string;
|
country?: string;
|
||||||
@@ -86,40 +77,6 @@ export interface ProxyStats {
|
|||||||
avgSuccessRate: number;
|
avgSuccessRate: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// FINGERPRINT TYPE
|
|
||||||
// Per workflow-12102025.md: Full browser fingerprint from user-agents
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
export interface BrowserFingerprint {
|
|
||||||
userAgent: string;
|
|
||||||
platform: string;
|
|
||||||
screenWidth: number;
|
|
||||||
screenHeight: number;
|
|
||||||
viewportWidth: number;
|
|
||||||
viewportHeight: number;
|
|
||||||
deviceCategory: string;
|
|
||||||
browserName: string; // Per workflow-12102025.md: for session logging
|
|
||||||
// Derived headers for anti-detect
|
|
||||||
acceptLanguage: string;
|
|
||||||
secChUa?: string;
|
|
||||||
secChUaPlatform?: string;
|
|
||||||
secChUaMobile?: string;
|
|
||||||
// Per workflow-12102025.md: HTTP Fingerprinting section
|
|
||||||
httpFingerprint: HTTPFingerprint;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Per workflow-12102025.md: Session log entry for debugging blocked sessions
|
|
||||||
*/
|
|
||||||
export interface UASessionLog {
|
|
||||||
deviceCategory: string;
|
|
||||||
browserName: string;
|
|
||||||
userAgent: string;
|
|
||||||
proxyIp: string | null;
|
|
||||||
sessionStartedAt: Date;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// PROXY ROTATOR CLASS
|
// PROXY ROTATOR CLASS
|
||||||
// ============================================================
|
// ============================================================
|
||||||
@@ -134,6 +91,9 @@ export class ProxyRotator {
|
|||||||
this.pool = pool || null;
|
this.pool = pool || null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize with database pool
|
||||||
|
*/
|
||||||
setPool(pool: Pool): void {
|
setPool(pool: Pool): void {
|
||||||
this.pool = pool;
|
this.pool = pool;
|
||||||
}
|
}
|
||||||
@@ -162,7 +122,6 @@ export class ProxyRotator {
|
|||||||
0 as "successCount",
|
0 as "successCount",
|
||||||
response_time_ms as "avgResponseTimeMs",
|
response_time_ms as "avgResponseTimeMs",
|
||||||
COALESCE(max_connections, 1) as "maxConnections",
|
COALESCE(max_connections, 1) as "maxConnections",
|
||||||
COALESCE(consecutive_403_count, 0) as "consecutive403Count",
|
|
||||||
city,
|
city,
|
||||||
state,
|
state,
|
||||||
country,
|
country,
|
||||||
@@ -175,9 +134,11 @@ export class ProxyRotator {
|
|||||||
|
|
||||||
this.proxies = result.rows;
|
this.proxies = result.rows;
|
||||||
|
|
||||||
|
// Calculate total concurrent capacity
|
||||||
const totalCapacity = this.proxies.reduce((sum, p) => sum + p.maxConnections, 0);
|
const totalCapacity = this.proxies.reduce((sum, p) => sum + p.maxConnections, 0);
|
||||||
console.log(`[ProxyRotator] Loaded ${this.proxies.length} active proxies (${totalCapacity} max concurrent connections)`);
|
console.log(`[ProxyRotator] Loaded ${this.proxies.length} active proxies (${totalCapacity} max concurrent connections)`);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
// Table might not exist - that's okay
|
||||||
console.warn(`[ProxyRotator] Could not load proxies: ${error}`);
|
console.warn(`[ProxyRotator] Could not load proxies: ${error}`);
|
||||||
this.proxies = [];
|
this.proxies = [];
|
||||||
}
|
}
|
||||||
@@ -189,6 +150,7 @@ export class ProxyRotator {
|
|||||||
getNext(): Proxy | null {
|
getNext(): Proxy | null {
|
||||||
if (this.proxies.length === 0) return null;
|
if (this.proxies.length === 0) return null;
|
||||||
|
|
||||||
|
// Round-robin rotation
|
||||||
this.currentIndex = (this.currentIndex + 1) % this.proxies.length;
|
this.currentIndex = (this.currentIndex + 1) % this.proxies.length;
|
||||||
this.lastRotation = new Date();
|
this.lastRotation = new Date();
|
||||||
|
|
||||||
@@ -223,68 +185,23 @@ export class ProxyRotator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Mark proxy as blocked (403 received)
|
* Mark proxy as failed (temporarily remove from rotation)
|
||||||
* Per workflow-12102025.md:
|
|
||||||
* - Increment consecutive_403_count
|
|
||||||
* - After 3 consecutive 403s with different fingerprints → disable proxy
|
|
||||||
* - This is separate from general failures (timeouts, etc.)
|
|
||||||
*/
|
|
||||||
async markBlocked(proxyId: number): Promise<boolean> {
|
|
||||||
const proxy = this.proxies.find(p => p.id === proxyId);
|
|
||||||
let shouldDisable = false;
|
|
||||||
|
|
||||||
if (proxy) {
|
|
||||||
proxy.consecutive403Count++;
|
|
||||||
|
|
||||||
// Per workflow-12102025.md: 3 consecutive 403s → proxy is burned
|
|
||||||
if (proxy.consecutive403Count >= 3) {
|
|
||||||
proxy.isActive = false;
|
|
||||||
this.proxies = this.proxies.filter(p => p.id !== proxyId);
|
|
||||||
console.log(`[ProxyRotator] Proxy ${proxyId} DISABLED after ${proxy.consecutive403Count} consecutive 403s (burned)`);
|
|
||||||
shouldDisable = true;
|
|
||||||
} else {
|
|
||||||
console.log(`[ProxyRotator] Proxy ${proxyId} blocked (403 #${proxy.consecutive403Count}/3)`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update database
|
|
||||||
if (this.pool) {
|
|
||||||
try {
|
|
||||||
await this.pool.query(`
|
|
||||||
UPDATE proxies
|
|
||||||
SET
|
|
||||||
consecutive_403_count = COALESCE(consecutive_403_count, 0) + 1,
|
|
||||||
last_failure_at = NOW(),
|
|
||||||
test_result = '403 Forbidden',
|
|
||||||
active = CASE WHEN COALESCE(consecutive_403_count, 0) >= 2 THEN false ELSE active END,
|
|
||||||
updated_at = NOW()
|
|
||||||
WHERE id = $1
|
|
||||||
`, [proxyId]);
|
|
||||||
} catch (err) {
|
|
||||||
console.error(`[ProxyRotator] Failed to update proxy ${proxyId}:`, err);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return shouldDisable;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Mark proxy as failed (general error - timeout, connection error, etc.)
|
|
||||||
* Separate from 403 blocking per workflow-12102025.md
|
|
||||||
*/
|
*/
|
||||||
async markFailed(proxyId: number, error?: string): Promise<void> {
|
async markFailed(proxyId: number, error?: string): Promise<void> {
|
||||||
|
// Update in-memory
|
||||||
const proxy = this.proxies.find(p => p.id === proxyId);
|
const proxy = this.proxies.find(p => p.id === proxyId);
|
||||||
if (proxy) {
|
if (proxy) {
|
||||||
proxy.failureCount++;
|
proxy.failureCount++;
|
||||||
|
|
||||||
// Deactivate if too many general failures
|
// Deactivate if too many failures
|
||||||
if (proxy.failureCount >= 5) {
|
if (proxy.failureCount >= 5) {
|
||||||
proxy.isActive = false;
|
proxy.isActive = false;
|
||||||
this.proxies = this.proxies.filter(p => p.id !== proxyId);
|
this.proxies = this.proxies.filter(p => p.id !== proxyId);
|
||||||
console.log(`[ProxyRotator] Proxy ${proxyId} deactivated after ${proxy.failureCount} general failures`);
|
console.log(`[ProxyRotator] Proxy ${proxyId} deactivated after ${proxy.failureCount} failures`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update database
|
||||||
if (this.pool) {
|
if (this.pool) {
|
||||||
try {
|
try {
|
||||||
await this.pool.query(`
|
await this.pool.query(`
|
||||||
@@ -303,22 +220,23 @@ export class ProxyRotator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Mark proxy as successful - resets consecutive 403 count
|
* Mark proxy as successful
|
||||||
* Per workflow-12102025.md: successful request clears the 403 counter
|
|
||||||
*/
|
*/
|
||||||
async markSuccess(proxyId: number, responseTimeMs?: number): Promise<void> {
|
async markSuccess(proxyId: number, responseTimeMs?: number): Promise<void> {
|
||||||
|
// Update in-memory
|
||||||
const proxy = this.proxies.find(p => p.id === proxyId);
|
const proxy = this.proxies.find(p => p.id === proxyId);
|
||||||
if (proxy) {
|
if (proxy) {
|
||||||
proxy.successCount++;
|
proxy.successCount++;
|
||||||
proxy.consecutive403Count = 0; // Reset on success per workflow-12102025.md
|
|
||||||
proxy.lastUsedAt = new Date();
|
proxy.lastUsedAt = new Date();
|
||||||
if (responseTimeMs !== undefined) {
|
if (responseTimeMs !== undefined) {
|
||||||
|
// Rolling average
|
||||||
proxy.avgResponseTimeMs = proxy.avgResponseTimeMs
|
proxy.avgResponseTimeMs = proxy.avgResponseTimeMs
|
||||||
? (proxy.avgResponseTimeMs * 0.8) + (responseTimeMs * 0.2)
|
? (proxy.avgResponseTimeMs * 0.8) + (responseTimeMs * 0.2)
|
||||||
: responseTimeMs;
|
: responseTimeMs;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update database
|
||||||
if (this.pool) {
|
if (this.pool) {
|
||||||
try {
|
try {
|
||||||
await this.pool.query(`
|
await this.pool.query(`
|
||||||
@@ -326,7 +244,6 @@ export class ProxyRotator {
|
|||||||
SET
|
SET
|
||||||
last_tested_at = NOW(),
|
last_tested_at = NOW(),
|
||||||
test_result = 'success',
|
test_result = 'success',
|
||||||
consecutive_403_count = 0,
|
|
||||||
response_time_ms = CASE
|
response_time_ms = CASE
|
||||||
WHEN response_time_ms IS NULL THEN $2
|
WHEN response_time_ms IS NULL THEN $2
|
||||||
ELSE (response_time_ms * 0.8 + $2 * 0.2)::integer
|
ELSE (response_time_ms * 0.8 + $2 * 0.2)::integer
|
||||||
@@ -355,8 +272,8 @@ export class ProxyRotator {
|
|||||||
*/
|
*/
|
||||||
getStats(): ProxyStats {
|
getStats(): ProxyStats {
|
||||||
const totalProxies = this.proxies.length;
|
const totalProxies = this.proxies.length;
|
||||||
const activeProxies = this.proxies.reduce((sum, p) => sum + p.maxConnections, 0);
|
const activeProxies = this.proxies.reduce((sum, p) => sum + p.maxConnections, 0); // Total concurrent capacity
|
||||||
const blockedProxies = this.proxies.filter(p => p.failureCount >= 5 || p.consecutive403Count >= 3).length;
|
const blockedProxies = this.proxies.filter(p => p.failureCount >= 5).length;
|
||||||
|
|
||||||
const successRates = this.proxies
|
const successRates = this.proxies
|
||||||
.filter(p => p.successCount + p.failureCount > 0)
|
.filter(p => p.successCount + p.failureCount > 0)
|
||||||
@@ -368,12 +285,15 @@ export class ProxyRotator {
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
totalProxies,
|
totalProxies,
|
||||||
activeProxies,
|
activeProxies, // Total concurrent capacity across all proxies
|
||||||
blockedProxies,
|
blockedProxies,
|
||||||
avgSuccessRate,
|
avgSuccessRate,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if proxy pool has available proxies
|
||||||
|
*/
|
||||||
hasAvailableProxies(): boolean {
|
hasAvailableProxies(): boolean {
|
||||||
return this.proxies.length > 0;
|
return this.proxies.length > 0;
|
||||||
}
|
}
|
||||||
@@ -381,194 +301,53 @@ export class ProxyRotator {
|
|||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// USER AGENT ROTATOR CLASS
|
// USER AGENT ROTATOR CLASS
|
||||||
// Per workflow-12102025.md: Uses intoli/user-agents for realistic fingerprints
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
export class UserAgentRotator {
|
export class UserAgentRotator {
|
||||||
private currentFingerprint: BrowserFingerprint | null = null;
|
private userAgents: string[];
|
||||||
private sessionLog: UASessionLog | null = null;
|
private currentIndex: number = 0;
|
||||||
|
private lastRotation: Date = new Date();
|
||||||
|
|
||||||
constructor() {
|
constructor(userAgents: string[] = USER_AGENTS) {
|
||||||
// Per workflow-12102025.md: Initialize with first fingerprint
|
this.userAgents = userAgents;
|
||||||
this.rotate();
|
// Start at random index to avoid patterns
|
||||||
|
this.currentIndex = Math.floor(Math.random() * userAgents.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Per workflow-12102025.md: Roll device category based on distribution
|
* Get next user agent in rotation
|
||||||
* Mobile: 62%, Desktop: 36%, Tablet: 2%
|
|
||||||
*/
|
*/
|
||||||
private rollDeviceCategory(): 'mobile' | 'desktop' | 'tablet' {
|
getNext(): string {
|
||||||
const roll = Math.random() * 100;
|
this.currentIndex = (this.currentIndex + 1) % this.userAgents.length;
|
||||||
if (roll < DEVICE_WEIGHTS.mobile) {
|
this.lastRotation = new Date();
|
||||||
return 'mobile';
|
return this.userAgents[this.currentIndex];
|
||||||
} else if (roll < DEVICE_WEIGHTS.mobile + DEVICE_WEIGHTS.desktop) {
|
|
||||||
return 'desktop';
|
|
||||||
} else {
|
|
||||||
return 'tablet';
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Per workflow-12102025.md: Extract browser name from UA string
|
* Get current user agent without rotating
|
||||||
*/
|
*/
|
||||||
private extractBrowserName(userAgent: string): string {
|
getCurrent(): string {
|
||||||
if (userAgent.includes('Edg/')) return 'Edge';
|
return this.userAgents[this.currentIndex];
|
||||||
if (userAgent.includes('Firefox/')) return 'Firefox';
|
|
||||||
if (userAgent.includes('Safari/') && !userAgent.includes('Chrome/')) return 'Safari';
|
|
||||||
if (userAgent.includes('Chrome/')) return 'Chrome';
|
|
||||||
return 'Unknown';
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Per workflow-12102025.md: Check if browser is in whitelist
|
* Get a random user agent
|
||||||
*/
|
*/
|
||||||
private isAllowedBrowser(userAgent: string): boolean {
|
getRandom(): string {
|
||||||
const browserName = this.extractBrowserName(userAgent);
|
const index = Math.floor(Math.random() * this.userAgents.length);
|
||||||
return ALLOWED_BROWSERS.includes(browserName as typeof ALLOWED_BROWSERS[number]);
|
return this.userAgents[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate a new random fingerprint
|
* Get total available user agents
|
||||||
* Per workflow-12102025.md:
|
|
||||||
* - Roll device category (62/36/2)
|
|
||||||
* - Filter to top 4 browsers only
|
|
||||||
* - Failure = alert admin + stop (no fallback)
|
|
||||||
*/
|
*/
|
||||||
rotate(proxyIp?: string): BrowserFingerprint {
|
|
||||||
// Per workflow-12102025.md: Roll device category
|
|
||||||
const deviceCategory = this.rollDeviceCategory();
|
|
||||||
|
|
||||||
// Per workflow-12102025.md: Generate UA filtered to device category
|
|
||||||
const generator = new UserAgent({ deviceCategory });
|
|
||||||
|
|
||||||
// Per workflow-12102025.md: Try to get an allowed browser (max 50 attempts)
|
|
||||||
let ua: ReturnType<typeof generator>;
|
|
||||||
let attempts = 0;
|
|
||||||
const maxAttempts = 50;
|
|
||||||
|
|
||||||
do {
|
|
||||||
ua = generator();
|
|
||||||
attempts++;
|
|
||||||
} while (!this.isAllowedBrowser(ua.data.userAgent) && attempts < maxAttempts);
|
|
||||||
|
|
||||||
// Per workflow-12102025.md: If we can't get allowed browser, this is a failure
|
|
||||||
if (!this.isAllowedBrowser(ua.data.userAgent)) {
|
|
||||||
const errorMsg = `[UserAgentRotator] CRITICAL: Failed to generate allowed browser after ${maxAttempts} attempts. Device: ${deviceCategory}. Last UA: ${ua.data.userAgent}`;
|
|
||||||
console.error(errorMsg);
|
|
||||||
// Per workflow-12102025.md: Alert admin + stop crawl
|
|
||||||
// TODO: Post alert to admin dashboard
|
|
||||||
throw new Error(errorMsg);
|
|
||||||
}
|
|
||||||
|
|
||||||
const data = ua.data;
|
|
||||||
const browserName = this.extractBrowserName(data.userAgent);
|
|
||||||
|
|
||||||
// Build sec-ch-ua headers from user agent string
|
|
||||||
const secChUa = this.buildSecChUa(data.userAgent, deviceCategory);
|
|
||||||
|
|
||||||
// Per workflow-12102025.md: HTTP Fingerprinting - generate full HTTP fingerprint
|
|
||||||
const httpFingerprint = generateHTTPFingerprint(browserName as BrowserType);
|
|
||||||
|
|
||||||
this.currentFingerprint = {
|
|
||||||
userAgent: data.userAgent,
|
|
||||||
platform: data.platform,
|
|
||||||
screenWidth: data.screenWidth,
|
|
||||||
screenHeight: data.screenHeight,
|
|
||||||
viewportWidth: data.viewportWidth,
|
|
||||||
viewportHeight: data.viewportHeight,
|
|
||||||
deviceCategory: data.deviceCategory,
|
|
||||||
browserName, // Per workflow-12102025.md: for session logging
|
|
||||||
// Per workflow-12102025.md: always English
|
|
||||||
acceptLanguage: 'en-US,en;q=0.9',
|
|
||||||
...secChUa,
|
|
||||||
// Per workflow-12102025.md: HTTP Fingerprinting section
|
|
||||||
httpFingerprint,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Per workflow-12102025.md: Log session data
|
|
||||||
this.sessionLog = {
|
|
||||||
deviceCategory,
|
|
||||||
browserName,
|
|
||||||
userAgent: data.userAgent,
|
|
||||||
proxyIp: proxyIp || null,
|
|
||||||
sessionStartedAt: new Date(),
|
|
||||||
};
|
|
||||||
|
|
||||||
console.log(`[UserAgentRotator] New fingerprint: device=${deviceCategory}, browser=${browserName}, UA=${data.userAgent.slice(0, 50)}...`);
|
|
||||||
return this.currentFingerprint;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get current fingerprint without rotating
|
|
||||||
*/
|
|
||||||
getCurrent(): BrowserFingerprint {
|
|
||||||
if (!this.currentFingerprint) {
|
|
||||||
return this.rotate();
|
|
||||||
}
|
|
||||||
return this.currentFingerprint;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get a random fingerprint (rotates and returns)
|
|
||||||
*/
|
|
||||||
getRandom(proxyIp?: string): BrowserFingerprint {
|
|
||||||
return this.rotate(proxyIp);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Per workflow-12102025.md: Get session log for debugging
|
|
||||||
*/
|
|
||||||
getSessionLog(): UASessionLog | null {
|
|
||||||
return this.sessionLog;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Build sec-ch-ua headers from user agent string
|
|
||||||
* Per workflow-12102025.md: Include mobile indicator based on device category
|
|
||||||
*/
|
|
||||||
private buildSecChUa(userAgent: string, deviceCategory: string): { secChUa?: string; secChUaPlatform?: string; secChUaMobile?: string } {
|
|
||||||
const isMobile = deviceCategory === 'mobile' || deviceCategory === 'tablet';
|
|
||||||
|
|
||||||
// Extract Chrome version if present
|
|
||||||
const chromeMatch = userAgent.match(/Chrome\/(\d+)/);
|
|
||||||
const edgeMatch = userAgent.match(/Edg\/(\d+)/);
|
|
||||||
|
|
||||||
if (edgeMatch) {
|
|
||||||
const version = edgeMatch[1];
|
|
||||||
return {
|
|
||||||
secChUa: `"Microsoft Edge";v="${version}", "Chromium";v="${version}", "Not_A Brand";v="24"`,
|
|
||||||
secChUaPlatform: userAgent.includes('Windows') ? '"Windows"' : userAgent.includes('Android') ? '"Android"' : '"macOS"',
|
|
||||||
secChUaMobile: isMobile ? '?1' : '?0',
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
if (chromeMatch) {
|
|
||||||
const version = chromeMatch[1];
|
|
||||||
let platform = '"Linux"';
|
|
||||||
if (userAgent.includes('Windows')) platform = '"Windows"';
|
|
||||||
else if (userAgent.includes('Mac')) platform = '"macOS"';
|
|
||||||
else if (userAgent.includes('Android')) platform = '"Android"';
|
|
||||||
else if (userAgent.includes('iPhone') || userAgent.includes('iPad')) platform = '"iOS"';
|
|
||||||
|
|
||||||
return {
|
|
||||||
secChUa: `"Google Chrome";v="${version}", "Chromium";v="${version}", "Not_A Brand";v="24"`,
|
|
||||||
secChUaPlatform: platform,
|
|
||||||
secChUaMobile: isMobile ? '?1' : '?0',
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Firefox/Safari don't send sec-ch-ua
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
getCount(): number {
|
getCount(): number {
|
||||||
return 1; // user-agents generates dynamically
|
return this.userAgents.length;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// COMBINED ROTATOR
|
// COMBINED ROTATOR (for convenience)
|
||||||
// Per workflow-12102025.md: Coordinates proxy + fingerprint rotation
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
export class CrawlRotator {
|
export class CrawlRotator {
|
||||||
@@ -580,51 +359,49 @@ export class CrawlRotator {
|
|||||||
this.userAgent = new UserAgentRotator();
|
this.userAgent = new UserAgentRotator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize rotator (load proxies from DB)
|
||||||
|
*/
|
||||||
async initialize(): Promise<void> {
|
async initialize(): Promise<void> {
|
||||||
await this.proxy.loadProxies();
|
await this.proxy.loadProxies();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Rotate proxy only (get new IP)
|
* Rotate proxy only
|
||||||
*/
|
*/
|
||||||
rotateProxy(): Proxy | null {
|
rotateProxy(): Proxy | null {
|
||||||
return this.proxy.getNext();
|
return this.proxy.getNext();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Rotate fingerprint only (new UA, screen size, etc.)
|
* Rotate user agent only
|
||||||
*/
|
*/
|
||||||
rotateFingerprint(): BrowserFingerprint {
|
rotateUserAgent(): string {
|
||||||
return this.userAgent.rotate();
|
return this.userAgent.getNext();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Rotate both proxy and fingerprint
|
* Rotate both proxy and user agent
|
||||||
* Per workflow-12102025.md: called on 403 for fresh identity
|
|
||||||
* Passes proxy IP to UA rotation for session logging
|
|
||||||
*/
|
*/
|
||||||
rotateBoth(): { proxy: Proxy | null; fingerprint: BrowserFingerprint } {
|
rotateBoth(): { proxy: Proxy | null; userAgent: string } {
|
||||||
const proxy = this.proxy.getNext();
|
|
||||||
const proxyIp = proxy ? proxy.host : undefined;
|
|
||||||
return {
|
return {
|
||||||
proxy,
|
proxy: this.proxy.getNext(),
|
||||||
fingerprint: this.userAgent.rotate(proxyIp),
|
userAgent: this.userAgent.getNext(),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get current proxy and fingerprint without rotating
|
* Get current proxy and user agent without rotating
|
||||||
*/
|
*/
|
||||||
getCurrent(): { proxy: Proxy | null; fingerprint: BrowserFingerprint } {
|
getCurrent(): { proxy: Proxy | null; userAgent: string } {
|
||||||
return {
|
return {
|
||||||
proxy: this.proxy.getCurrent(),
|
proxy: this.proxy.getCurrent(),
|
||||||
fingerprint: this.userAgent.getCurrent(),
|
userAgent: this.userAgent.getCurrent(),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Record success for current proxy
|
* Record success for current proxy
|
||||||
* Per workflow-12102025.md: resets consecutive 403 count
|
|
||||||
*/
|
*/
|
||||||
async recordSuccess(responseTimeMs?: number): Promise<void> {
|
async recordSuccess(responseTimeMs?: number): Promise<void> {
|
||||||
const current = this.proxy.getCurrent();
|
const current = this.proxy.getCurrent();
|
||||||
@@ -634,20 +411,7 @@ export class CrawlRotator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Record 403 block for current proxy
|
* Record failure for current proxy
|
||||||
* Per workflow-12102025.md: increments consecutive_403_count, disables after 3
|
|
||||||
* Returns true if proxy was disabled
|
|
||||||
*/
|
|
||||||
async recordBlock(): Promise<boolean> {
|
|
||||||
const current = this.proxy.getCurrent();
|
|
||||||
if (current) {
|
|
||||||
return await this.proxy.markBlocked(current.id);
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Record general failure (not 403)
|
|
||||||
*/
|
*/
|
||||||
async recordFailure(error?: string): Promise<void> {
|
async recordFailure(error?: string): Promise<void> {
|
||||||
const current = this.proxy.getCurrent();
|
const current = this.proxy.getCurrent();
|
||||||
@@ -657,13 +421,14 @@ export class CrawlRotator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get current proxy location info
|
* Get current proxy location info (for reporting)
|
||||||
* Per workflow-12102025.md: proxy location determines session headers
|
* Note: For rotating proxies (like IPRoyal), the actual exit location varies per request
|
||||||
*/
|
*/
|
||||||
getProxyLocation(): { city?: string; state?: string; country?: string; timezone?: string; isRotating: boolean } | null {
|
getProxyLocation(): { city?: string; state?: string; country?: string; timezone?: string; isRotating: boolean } | null {
|
||||||
const current = this.proxy.getCurrent();
|
const current = this.proxy.getCurrent();
|
||||||
if (!current) return null;
|
if (!current) return null;
|
||||||
|
|
||||||
|
// Check if this is a rotating proxy (max_connections > 1 usually indicates rotating)
|
||||||
const isRotating = current.maxConnections > 1;
|
const isRotating = current.maxConnections > 1;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -674,127 +439,6 @@ export class CrawlRotator {
|
|||||||
isRotating
|
isRotating
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Get timezone from current proxy
|
|
||||||
* Per workflow-12102025.md: used for Accept-Language header
|
|
||||||
*/
|
|
||||||
getProxyTimezone(): string | undefined {
|
|
||||||
const current = this.proxy.getCurrent();
|
|
||||||
return current?.timezone;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Preflight check - verifies proxy and anti-detect are working
|
|
||||||
* MUST be called before any task execution to ensure anonymity.
|
|
||||||
*
|
|
||||||
* Tests:
|
|
||||||
* 1. Proxy available - a proxy must be loaded and active
|
|
||||||
* 2. Proxy connectivity - makes HTTP request through proxy to verify connection
|
|
||||||
* 3. Anti-detect headers - verifies fingerprint is set with required headers
|
|
||||||
*
|
|
||||||
* @returns Promise<PreflightResult> with pass/fail status and details
|
|
||||||
*/
|
|
||||||
async preflight(): Promise<PreflightResult> {
|
|
||||||
const result: PreflightResult = {
|
|
||||||
passed: false,
|
|
||||||
proxyAvailable: false,
|
|
||||||
proxyConnected: false,
|
|
||||||
antidetectReady: false,
|
|
||||||
proxyIp: null,
|
|
||||||
fingerprint: null,
|
|
||||||
error: null,
|
|
||||||
responseTimeMs: null,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Step 1: Check proxy is available
|
|
||||||
const currentProxy = this.proxy.getCurrent();
|
|
||||||
if (!currentProxy) {
|
|
||||||
result.error = 'No proxy available';
|
|
||||||
console.log('[Preflight] FAILED - No proxy available');
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
result.proxyAvailable = true;
|
|
||||||
result.proxyIp = currentProxy.host;
|
|
||||||
|
|
||||||
// Step 2: Check fingerprint/anti-detect is ready
|
|
||||||
const fingerprint = this.userAgent.getCurrent();
|
|
||||||
if (!fingerprint || !fingerprint.userAgent) {
|
|
||||||
result.error = 'Anti-detect fingerprint not initialized';
|
|
||||||
console.log('[Preflight] FAILED - No fingerprint');
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
result.antidetectReady = true;
|
|
||||||
result.fingerprint = {
|
|
||||||
userAgent: fingerprint.userAgent,
|
|
||||||
browserName: fingerprint.browserName,
|
|
||||||
deviceCategory: fingerprint.deviceCategory,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Step 3: Test proxy connectivity with an actual HTTP request
|
|
||||||
// Use httpbin.org/ip to verify request goes through proxy
|
|
||||||
const proxyUrl = this.proxy.getProxyUrl(currentProxy);
|
|
||||||
const testUrl = 'https://httpbin.org/ip';
|
|
||||||
|
|
||||||
try {
|
|
||||||
const { default: axios } = await import('axios');
|
|
||||||
const { HttpsProxyAgent } = await import('https-proxy-agent');
|
|
||||||
|
|
||||||
const agent = new HttpsProxyAgent(proxyUrl);
|
|
||||||
const startTime = Date.now();
|
|
||||||
|
|
||||||
const response = await axios.get(testUrl, {
|
|
||||||
httpsAgent: agent,
|
|
||||||
timeout: 15000, // 15 second timeout
|
|
||||||
headers: {
|
|
||||||
'User-Agent': fingerprint.userAgent,
|
|
||||||
'Accept-Language': fingerprint.acceptLanguage,
|
|
||||||
...(fingerprint.secChUa && { 'sec-ch-ua': fingerprint.secChUa }),
|
|
||||||
...(fingerprint.secChUaPlatform && { 'sec-ch-ua-platform': fingerprint.secChUaPlatform }),
|
|
||||||
...(fingerprint.secChUaMobile && { 'sec-ch-ua-mobile': fingerprint.secChUaMobile }),
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
result.responseTimeMs = Date.now() - startTime;
|
|
||||||
result.proxyConnected = true;
|
|
||||||
result.passed = true;
|
|
||||||
|
|
||||||
// Mark success on proxy stats
|
|
||||||
await this.proxy.markSuccess(currentProxy.id, result.responseTimeMs);
|
|
||||||
|
|
||||||
console.log(`[Preflight] PASSED - Proxy ${currentProxy.host} connected (${result.responseTimeMs}ms), UA: ${fingerprint.browserName}/${fingerprint.deviceCategory}`);
|
|
||||||
} catch (err: any) {
|
|
||||||
result.error = `Proxy connection failed: ${err.message || 'Unknown error'}`;
|
|
||||||
console.log(`[Preflight] FAILED - Proxy connection error: ${err.message}`);
|
|
||||||
|
|
||||||
// Mark failure on proxy stats
|
|
||||||
await this.proxy.markFailed(currentProxy.id, err.message);
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Result from preflight check
|
|
||||||
*/
|
|
||||||
export interface PreflightResult {
|
|
||||||
/** Overall pass/fail */
|
|
||||||
passed: boolean;
|
|
||||||
/** Step 1: Is a proxy loaded? */
|
|
||||||
proxyAvailable: boolean;
|
|
||||||
/** Step 2: Did HTTP request through proxy succeed? */
|
|
||||||
proxyConnected: boolean;
|
|
||||||
/** Step 3: Is fingerprint/anti-detect ready? */
|
|
||||||
antidetectReady: boolean;
|
|
||||||
/** Current proxy IP */
|
|
||||||
proxyIp: string | null;
|
|
||||||
/** Fingerprint summary */
|
|
||||||
fingerprint: { userAgent: string; browserName: string; deviceCategory: string } | null;
|
|
||||||
/** Error message if failed */
|
|
||||||
error: string | null;
|
|
||||||
/** Proxy response time in ms */
|
|
||||||
responseTimeMs: number | null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|||||||
@@ -1,100 +0,0 @@
|
|||||||
/**
|
|
||||||
* Curl Preflight - Verify curl/axios transport works through proxy
|
|
||||||
*
|
|
||||||
* Tests:
|
|
||||||
* 1. Proxy is available and active
|
|
||||||
* 2. HTTP request through proxy succeeds
|
|
||||||
* 3. Anti-detect headers are properly set
|
|
||||||
*
|
|
||||||
* Use case: Fast, simple API requests that don't need browser fingerprint
|
|
||||||
*/
|
|
||||||
|
|
||||||
import axios from 'axios';
|
|
||||||
import { HttpsProxyAgent } from 'https-proxy-agent';
|
|
||||||
import { CrawlRotator, PreflightResult } from './crawl-rotator';
|
|
||||||
|
|
||||||
export interface CurlPreflightResult extends PreflightResult {
|
|
||||||
method: 'curl';
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Run curl preflight check
|
|
||||||
* Tests proxy connectivity using axios/curl through the proxy
|
|
||||||
*/
|
|
||||||
export async function runCurlPreflight(
|
|
||||||
crawlRotator: CrawlRotator
|
|
||||||
): Promise<CurlPreflightResult> {
|
|
||||||
const result: CurlPreflightResult = {
|
|
||||||
method: 'curl',
|
|
||||||
passed: false,
|
|
||||||
proxyAvailable: false,
|
|
||||||
proxyConnected: false,
|
|
||||||
antidetectReady: false,
|
|
||||||
proxyIp: null,
|
|
||||||
fingerprint: null,
|
|
||||||
error: null,
|
|
||||||
responseTimeMs: null,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Step 1: Check proxy is available
|
|
||||||
const currentProxy = crawlRotator.proxy.getCurrent();
|
|
||||||
if (!currentProxy) {
|
|
||||||
result.error = 'No proxy available';
|
|
||||||
console.log('[CurlPreflight] FAILED - No proxy available');
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
result.proxyAvailable = true;
|
|
||||||
result.proxyIp = currentProxy.host;
|
|
||||||
|
|
||||||
// Step 2: Check fingerprint/anti-detect is ready
|
|
||||||
const fingerprint = crawlRotator.userAgent.getCurrent();
|
|
||||||
if (!fingerprint || !fingerprint.userAgent) {
|
|
||||||
result.error = 'Anti-detect fingerprint not initialized';
|
|
||||||
console.log('[CurlPreflight] FAILED - No fingerprint');
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
result.antidetectReady = true;
|
|
||||||
result.fingerprint = {
|
|
||||||
userAgent: fingerprint.userAgent,
|
|
||||||
browserName: fingerprint.browserName,
|
|
||||||
deviceCategory: fingerprint.deviceCategory,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Step 3: Test proxy connectivity with an actual HTTP request
|
|
||||||
const proxyUrl = crawlRotator.proxy.getProxyUrl(currentProxy);
|
|
||||||
const testUrl = 'https://httpbin.org/ip';
|
|
||||||
|
|
||||||
try {
|
|
||||||
const agent = new HttpsProxyAgent(proxyUrl);
|
|
||||||
const startTime = Date.now();
|
|
||||||
|
|
||||||
const response = await axios.get(testUrl, {
|
|
||||||
httpsAgent: agent,
|
|
||||||
timeout: 15000, // 15 second timeout
|
|
||||||
headers: {
|
|
||||||
'User-Agent': fingerprint.userAgent,
|
|
||||||
'Accept-Language': fingerprint.acceptLanguage,
|
|
||||||
...(fingerprint.secChUa && { 'sec-ch-ua': fingerprint.secChUa }),
|
|
||||||
...(fingerprint.secChUaPlatform && { 'sec-ch-ua-platform': fingerprint.secChUaPlatform }),
|
|
||||||
...(fingerprint.secChUaMobile && { 'sec-ch-ua-mobile': fingerprint.secChUaMobile }),
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
result.responseTimeMs = Date.now() - startTime;
|
|
||||||
result.proxyConnected = true;
|
|
||||||
result.passed = true;
|
|
||||||
|
|
||||||
// Mark success on proxy stats
|
|
||||||
await crawlRotator.proxy.markSuccess(currentProxy.id, result.responseTimeMs);
|
|
||||||
|
|
||||||
console.log(`[CurlPreflight] PASSED - Proxy ${currentProxy.host} connected (${result.responseTimeMs}ms), UA: ${fingerprint.browserName}/${fingerprint.deviceCategory}`);
|
|
||||||
} catch (err: any) {
|
|
||||||
result.error = `Proxy connection failed: ${err.message || 'Unknown error'}`;
|
|
||||||
console.log(`[CurlPreflight] FAILED - Proxy connection error: ${err.message}`);
|
|
||||||
|
|
||||||
// Mark failure on proxy stats
|
|
||||||
await crawlRotator.proxy.markFailed(currentProxy.id, err.message);
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
@@ -1,315 +0,0 @@
|
|||||||
/**
|
|
||||||
* HTTP Fingerprinting Service
|
|
||||||
*
|
|
||||||
* Per workflow-12102025.md - HTTP Fingerprinting section:
|
|
||||||
* - Full header set per browser type
|
|
||||||
* - Browser-specific header ordering
|
|
||||||
* - Natural randomization (DNT, Accept quality)
|
|
||||||
* - Dynamic Referer per dispensary
|
|
||||||
*
|
|
||||||
* Canonical location: src/services/http-fingerprint.ts
|
|
||||||
*/
|
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// TYPES
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
export type BrowserType = 'Chrome' | 'Firefox' | 'Safari' | 'Edge';
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Per workflow-12102025.md: Full HTTP fingerprint for a session
|
|
||||||
*/
|
|
||||||
export interface HTTPFingerprint {
|
|
||||||
browserType: BrowserType;
|
|
||||||
headers: Record<string, string>;
|
|
||||||
headerOrder: string[];
|
|
||||||
curlImpersonateBinary: string;
|
|
||||||
hasDNT: boolean;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Per workflow-12102025.md: Context for building headers
|
|
||||||
*/
|
|
||||||
export interface HeaderContext {
|
|
||||||
userAgent: string;
|
|
||||||
secChUa?: string;
|
|
||||||
secChUaPlatform?: string;
|
|
||||||
secChUaMobile?: string;
|
|
||||||
referer: string;
|
|
||||||
isPost: boolean;
|
|
||||||
contentLength?: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// CONSTANTS (per workflow-12102025.md)
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Per workflow-12102025.md: DNT header distribution (~30% of users)
|
|
||||||
*/
|
|
||||||
const DNT_PROBABILITY = 0.30;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Per workflow-12102025.md: Accept header variations for natural traffic
|
|
||||||
*/
|
|
||||||
const ACCEPT_VARIATIONS = [
|
|
||||||
'application/json, text/plain, */*',
|
|
||||||
'application/json,text/plain,*/*',
|
|
||||||
'*/*',
|
|
||||||
];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Per workflow-12102025.md: Accept-Language variations
|
|
||||||
*/
|
|
||||||
const ACCEPT_LANGUAGE_VARIATIONS = [
|
|
||||||
'en-US,en;q=0.9',
|
|
||||||
'en-US,en;q=0.8',
|
|
||||||
'en-US;q=0.9,en;q=0.8',
|
|
||||||
];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Per workflow-12102025.md: curl-impersonate binaries per browser
|
|
||||||
*/
|
|
||||||
const CURL_IMPERSONATE_BINARIES: Record<BrowserType, string> = {
|
|
||||||
Chrome: 'curl_chrome131',
|
|
||||||
Edge: 'curl_chrome131', // Edge uses Chromium
|
|
||||||
Firefox: 'curl_ff133',
|
|
||||||
Safari: 'curl_safari17',
|
|
||||||
};
|
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// HEADER ORDERING (per workflow-12102025.md)
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Per workflow-12102025.md: Chrome header order for GraphQL requests
|
|
||||||
*/
|
|
||||||
const CHROME_HEADER_ORDER = [
|
|
||||||
'Host',
|
|
||||||
'Connection',
|
|
||||||
'Content-Length',
|
|
||||||
'sec-ch-ua',
|
|
||||||
'DNT',
|
|
||||||
'sec-ch-ua-mobile',
|
|
||||||
'User-Agent',
|
|
||||||
'sec-ch-ua-platform',
|
|
||||||
'Content-Type',
|
|
||||||
'Accept',
|
|
||||||
'Origin',
|
|
||||||
'sec-fetch-site',
|
|
||||||
'sec-fetch-mode',
|
|
||||||
'sec-fetch-dest',
|
|
||||||
'Referer',
|
|
||||||
'Accept-Encoding',
|
|
||||||
'Accept-Language',
|
|
||||||
];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Per workflow-12102025.md: Firefox header order for GraphQL requests
|
|
||||||
*/
|
|
||||||
const FIREFOX_HEADER_ORDER = [
|
|
||||||
'Host',
|
|
||||||
'User-Agent',
|
|
||||||
'Accept',
|
|
||||||
'Accept-Language',
|
|
||||||
'Accept-Encoding',
|
|
||||||
'Content-Type',
|
|
||||||
'Content-Length',
|
|
||||||
'Origin',
|
|
||||||
'DNT',
|
|
||||||
'Connection',
|
|
||||||
'Referer',
|
|
||||||
'sec-fetch-dest',
|
|
||||||
'sec-fetch-mode',
|
|
||||||
'sec-fetch-site',
|
|
||||||
];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Per workflow-12102025.md: Safari header order for GraphQL requests
|
|
||||||
*/
|
|
||||||
const SAFARI_HEADER_ORDER = [
|
|
||||||
'Host',
|
|
||||||
'Connection',
|
|
||||||
'Content-Length',
|
|
||||||
'Accept',
|
|
||||||
'User-Agent',
|
|
||||||
'Content-Type',
|
|
||||||
'Origin',
|
|
||||||
'Referer',
|
|
||||||
'Accept-Encoding',
|
|
||||||
'Accept-Language',
|
|
||||||
];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Per workflow-12102025.md: Edge uses Chrome order (Chromium-based)
|
|
||||||
*/
|
|
||||||
const HEADER_ORDERS: Record<BrowserType, string[]> = {
|
|
||||||
Chrome: CHROME_HEADER_ORDER,
|
|
||||||
Edge: CHROME_HEADER_ORDER,
|
|
||||||
Firefox: FIREFOX_HEADER_ORDER,
|
|
||||||
Safari: SAFARI_HEADER_ORDER,
|
|
||||||
};
|
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// FINGERPRINT GENERATION
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Per workflow-12102025.md: Generate HTTP fingerprint for a session
|
|
||||||
* Randomization is done once per session for consistency
|
|
||||||
*/
|
|
||||||
export function generateHTTPFingerprint(browserType: BrowserType): HTTPFingerprint {
|
|
||||||
// Per workflow-12102025.md: DNT randomized per session (~30%)
|
|
||||||
const hasDNT = Math.random() < DNT_PROBABILITY;
|
|
||||||
|
|
||||||
return {
|
|
||||||
browserType,
|
|
||||||
headers: {}, // Built dynamically per request
|
|
||||||
headerOrder: HEADER_ORDERS[browserType],
|
|
||||||
curlImpersonateBinary: CURL_IMPERSONATE_BINARIES[browserType],
|
|
||||||
hasDNT,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Per workflow-12102025.md: Build complete headers for a request
|
|
||||||
* Returns headers in browser-specific order
|
|
||||||
*/
|
|
||||||
export function buildOrderedHeaders(
|
|
||||||
fingerprint: HTTPFingerprint,
|
|
||||||
context: HeaderContext
|
|
||||||
): { headers: Record<string, string>; orderedHeaders: string[] } {
|
|
||||||
const { browserType, hasDNT, headerOrder } = fingerprint;
|
|
||||||
const { userAgent, secChUa, secChUaPlatform, secChUaMobile, referer, isPost, contentLength } = context;
|
|
||||||
|
|
||||||
// Per workflow-12102025.md: Natural randomization for Accept
|
|
||||||
const accept = ACCEPT_VARIATIONS[Math.floor(Math.random() * ACCEPT_VARIATIONS.length)];
|
|
||||||
const acceptLanguage = ACCEPT_LANGUAGE_VARIATIONS[Math.floor(Math.random() * ACCEPT_LANGUAGE_VARIATIONS.length)];
|
|
||||||
|
|
||||||
// Build all possible headers
|
|
||||||
const allHeaders: Record<string, string> = {
|
|
||||||
'Connection': 'keep-alive',
|
|
||||||
'User-Agent': userAgent,
|
|
||||||
'Accept': accept,
|
|
||||||
'Accept-Language': acceptLanguage,
|
|
||||||
'Accept-Encoding': 'gzip, deflate, br',
|
|
||||||
};
|
|
||||||
|
|
||||||
// Per workflow-12102025.md: POST-only headers
|
|
||||||
if (isPost) {
|
|
||||||
allHeaders['Content-Type'] = 'application/json';
|
|
||||||
allHeaders['Origin'] = 'https://dutchie.com';
|
|
||||||
if (contentLength !== undefined) {
|
|
||||||
allHeaders['Content-Length'] = String(contentLength);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Per workflow-12102025.md: Dynamic Referer per dispensary
|
|
||||||
allHeaders['Referer'] = referer;
|
|
||||||
|
|
||||||
// Per workflow-12102025.md: DNT randomized per session
|
|
||||||
if (hasDNT) {
|
|
||||||
allHeaders['DNT'] = '1';
|
|
||||||
}
|
|
||||||
|
|
||||||
// Per workflow-12102025.md: Chromium-only headers (Chrome, Edge)
|
|
||||||
if (browserType === 'Chrome' || browserType === 'Edge') {
|
|
||||||
if (secChUa) allHeaders['sec-ch-ua'] = secChUa;
|
|
||||||
if (secChUaMobile) allHeaders['sec-ch-ua-mobile'] = secChUaMobile;
|
|
||||||
if (secChUaPlatform) allHeaders['sec-ch-ua-platform'] = secChUaPlatform;
|
|
||||||
allHeaders['sec-fetch-site'] = 'same-origin';
|
|
||||||
allHeaders['sec-fetch-mode'] = 'cors';
|
|
||||||
allHeaders['sec-fetch-dest'] = 'empty';
|
|
||||||
}
|
|
||||||
|
|
||||||
// Per workflow-12102025.md: Firefox has sec-fetch but no sec-ch
|
|
||||||
if (browserType === 'Firefox') {
|
|
||||||
allHeaders['sec-fetch-site'] = 'same-origin';
|
|
||||||
allHeaders['sec-fetch-mode'] = 'cors';
|
|
||||||
allHeaders['sec-fetch-dest'] = 'empty';
|
|
||||||
}
|
|
||||||
|
|
||||||
// Per workflow-12102025.md: Safari has no sec-* headers
|
|
||||||
|
|
||||||
// Filter to only headers that exist and order them
|
|
||||||
const orderedHeaders: string[] = [];
|
|
||||||
const headers: Record<string, string> = {};
|
|
||||||
|
|
||||||
for (const headerName of headerOrder) {
|
|
||||||
if (allHeaders[headerName]) {
|
|
||||||
orderedHeaders.push(headerName);
|
|
||||||
headers[headerName] = allHeaders[headerName];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return { headers, orderedHeaders };
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Per workflow-12102025.md: Build curl command arguments for headers
|
|
||||||
* Headers are added in browser-specific order
|
|
||||||
*/
|
|
||||||
export function buildCurlHeaderArgs(
|
|
||||||
fingerprint: HTTPFingerprint,
|
|
||||||
context: HeaderContext
|
|
||||||
): string[] {
|
|
||||||
const { headers, orderedHeaders } = buildOrderedHeaders(fingerprint, context);
|
|
||||||
|
|
||||||
const args: string[] = [];
|
|
||||||
for (const headerName of orderedHeaders) {
|
|
||||||
// Skip Host and Content-Length - curl handles these
|
|
||||||
if (headerName === 'Host' || headerName === 'Content-Length') continue;
|
|
||||||
args.push('-H', `${headerName}: ${headers[headerName]}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
return args;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Per workflow-12102025.md: Extract Referer from dispensary menu_url
|
|
||||||
*/
|
|
||||||
export function buildRefererFromMenuUrl(menuUrl: string | null | undefined): string {
|
|
||||||
if (!menuUrl) {
|
|
||||||
return 'https://dutchie.com/';
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract slug from menu_url
|
|
||||||
// Formats: /embedded-menu/<slug> or /dispensary/<slug> or full URL
|
|
||||||
let slug: string | null = null;
|
|
||||||
|
|
||||||
const embeddedMatch = menuUrl.match(/\/embedded-menu\/([^/?]+)/);
|
|
||||||
const dispensaryMatch = menuUrl.match(/\/dispensary\/([^/?]+)/);
|
|
||||||
|
|
||||||
if (embeddedMatch) {
|
|
||||||
slug = embeddedMatch[1];
|
|
||||||
} else if (dispensaryMatch) {
|
|
||||||
slug = dispensaryMatch[1];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (slug) {
|
|
||||||
return `https://dutchie.com/dispensary/${slug}`;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 'https://dutchie.com/';
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Per workflow-12102025.md: Get curl-impersonate binary for browser
|
|
||||||
*/
|
|
||||||
export function getCurlBinary(browserType: BrowserType): string {
|
|
||||||
return CURL_IMPERSONATE_BINARIES[browserType];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Per workflow-12102025.md: Check if curl-impersonate is available
|
|
||||||
*/
|
|
||||||
export function isCurlImpersonateAvailable(browserType: BrowserType): boolean {
|
|
||||||
const binary = CURL_IMPERSONATE_BINARIES[browserType];
|
|
||||||
try {
|
|
||||||
const { execSync } = require('child_process');
|
|
||||||
execSync(`which ${binary}`, { stdio: 'ignore' });
|
|
||||||
return true;
|
|
||||||
} catch {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user